Speech to Text

curl --request POST \
  --url https://apis.finevoice.ai/v1/audio/stt \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://example.com/audio.mp3",
  "language": "en",
  "title": "<string>",
  "format": "json",
  "engine": "whisper",
  "useAsync": true,
  "word_level_timestamp_alignment": false,
  "speaker_diarization": false,
  "min_speakers": 123,
  "max_speakers": 123,
  "batch_size": 123,
  "script_target": "<string>"
}
'

import requests

url = "https://apis.finevoice.ai/v1/audio/stt"

payload = {
    "url": "https://example.com/audio.mp3",
    "language": "en",
    "title": "<string>",
    "format": "json",
    "engine": "whisper",
    "useAsync": True,
    "word_level_timestamp_alignment": False,
    "speaker_diarization": False,
    "min_speakers": 123,
    "max_speakers": 123,
    "batch_size": 123,
    "script_target": "<string>"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://example.com/audio.mp3',
    language: 'en',
    title: '<string>',
    format: 'json',
    engine: 'whisper',
    useAsync: true,
    word_level_timestamp_alignment: false,
    speaker_diarization: false,
    min_speakers: 123,
    max_speakers: 123,
    batch_size: 123,
    script_target: '<string>'
  })
};

fetch('https://apis.finevoice.ai/v1/audio/stt', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://apis.finevoice.ai/v1/audio/stt",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://example.com/audio.mp3',
    'language' => 'en',
    'title' => '<string>',
    'format' => 'json',
    'engine' => 'whisper',
    'useAsync' => true,
    'word_level_timestamp_alignment' => false,
    'speaker_diarization' => false,
    'min_speakers' => 123,
    'max_speakers' => 123,
    'batch_size' => 123,
    'script_target' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://apis.finevoice.ai/v1/audio/stt"

	payload := strings.NewReader("{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://apis.finevoice.ai/v1/audio/stt")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://apis.finevoice.ai/v1/audio/stt")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "status": 123,
  "url": "<string>",
  "taskId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
  "error": {
    "code": "<string>",
    "message": "<string>"
  },
  "urls": [
    "<string>"
  ],
  "service": "<string>",
  "port": "<string>",
  "timestamp": "<string>"
}

POST

audio

stt

Speech to Text

curl --request POST \
  --url https://apis.finevoice.ai/v1/audio/stt \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "url": "https://example.com/audio.mp3",
  "language": "en",
  "title": "<string>",
  "format": "json",
  "engine": "whisper",
  "useAsync": true,
  "word_level_timestamp_alignment": false,
  "speaker_diarization": false,
  "min_speakers": 123,
  "max_speakers": 123,
  "batch_size": 123,
  "script_target": "<string>"
}
'

import requests

url = "https://apis.finevoice.ai/v1/audio/stt"

payload = {
    "url": "https://example.com/audio.mp3",
    "language": "en",
    "title": "<string>",
    "format": "json",
    "engine": "whisper",
    "useAsync": True,
    "word_level_timestamp_alignment": False,
    "speaker_diarization": False,
    "min_speakers": 123,
    "max_speakers": 123,
    "batch_size": 123,
    "script_target": "<string>"
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    url: 'https://example.com/audio.mp3',
    language: 'en',
    title: '<string>',
    format: 'json',
    engine: 'whisper',
    useAsync: true,
    word_level_timestamp_alignment: false,
    speaker_diarization: false,
    min_speakers: 123,
    max_speakers: 123,
    batch_size: 123,
    script_target: '<string>'
  })
};

fetch('https://apis.finevoice.ai/v1/audio/stt', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://apis.finevoice.ai/v1/audio/stt",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'url' => 'https://example.com/audio.mp3',
    'language' => 'en',
    'title' => '<string>',
    'format' => 'json',
    'engine' => 'whisper',
    'useAsync' => true,
    'word_level_timestamp_alignment' => false,
    'speaker_diarization' => false,
    'min_speakers' => 123,
    'max_speakers' => 123,
    'batch_size' => 123,
    'script_target' => '<string>'
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://apis.finevoice.ai/v1/audio/stt"

	payload := strings.NewReader("{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://apis.finevoice.ai/v1/audio/stt")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://apis.finevoice.ai/v1/audio/stt")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"url\": \"https://example.com/audio.mp3\",\n  \"language\": \"en\",\n  \"title\": \"<string>\",\n  \"format\": \"json\",\n  \"engine\": \"whisper\",\n  \"useAsync\": true,\n  \"word_level_timestamp_alignment\": false,\n  \"speaker_diarization\": false,\n  \"min_speakers\": 123,\n  \"max_speakers\": 123,\n  \"batch_size\": 123,\n  \"script_target\": \"<string>\"\n}"

response = http.request(request)
puts response.read_body

{
  "status": 123,
  "url": "<string>",
  "taskId": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
  "error": {
    "code": "<string>",
    "message": "<string>"
  },
  "urls": [
    "<string>"
  ],
  "service": "<string>",
  "port": "<string>",
  "timestamp": "<string>"
}

Authorizations

Authorization

string

header

required

Bearer token (API key). Format: Bearer {your_api_key}

Body

application/json

The speech-to-text request payload.

url

string

The source audio or video URL.

Example:

"https://example.com/audio.mp3"

language

string

The source language code (e.g. en, zh, ja).

Example:

"en"

title

string

An optional task title for reference.

format

string

Expected transcript output format: srt, vtt, json, txt.

Example:

"json"

engine

string

The transcription engine to use. Supported: whisper, funasr.

Example:

"whisper"

useAsync

boolean

Set to true to process asynchronously.

Example:

true

word_level_timestamp_alignment

boolean

Whether to include word-level timestamp alignment.

Example:

false

speaker_diarization

boolean

Whether to enable speaker diarization (identify multiple speakers).

Example:

false

min_speakers

integer<int32>

Minimum number of speakers for diarization.

max_speakers

integer<int32>

Maximum number of speakers for diarization.

batch_size

integer<int32>

The transcription batch size.

script_target

string

The target output script style or format.

Response

Task accepted. Returns a taskId for async polling or the result URL directly.

Standard response for audio processing tasks.

status

integer<int32>

HTTP-style status code (200 for success, 202 for in-progress).

url

string

Download URL of the generated audio file (available when completed).

taskId

string

Task identifier for async polling. Use with GET /v1/task/{task_id}.

Example:

"a1b2c3d4-e5f6-7890-abcd-ef1234567890"

error

object

Show child attributes

urls

string[]

Multiple output URLs (e.g. for separation stems).

service

string

port

string

timestamp

string

Voice Conversion Sound Effect Generation

API documentation

Text to Speech

Voice Conversion

Speech to Text

Sound Effects

Audio Separation

Podcast Generation

Task Status

AI Voices

Voice Design

AI Voice Cloning

Music Generation

Audio Enhancement

Speech to Text

Authorizations

Body

Response