Create text-to-speech audio stream over the WebSocket
Table of contents
January 7, 2025
Use POST audio/create-stream to obtain token
and payload
.
To see the provided below code in action use Try It.
wss://api.useapi.net/v1/minimax/audio/wss?token=
token
Query Parameters
token
is required. Use POST audio/create-stream to obtaintoken
.
Responses
-
{ "error": "<Error message>" }
Examples
var player = null;
var ws = null;
const urlCreateStream = 'https://api.useapi.net/v1/minimax/audio/create-stream';
const wssCreateStream = 'wss://api.useapi.net/v1/minimax/audio/wss';
const urlAudio = 'https://api.useapi.net/v1/minimax/audio';
class DynamicAudioPlayer {
constructor() {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
this.audioQueue = [];
this.isPlaying = false;
this.currentSource = null;
this.onAudioFinishedCallback = null;
}
async loadAudioData(base64Chunk, finishCallback) {
try {
const byteArray = this.hexStringToByteArray('fffbe8c4' + base64Chunk);
this.audioQueue.push(byteArray);
this.onAudioFinishedCallback = finishCallback;
if (!this.isPlaying) {
this.isPlaying = true;
await this.playNextChunk();
}
} catch (e) {
console.error("Error decoding audio data:", e);
}
}
async playNextChunk() {
if (this.audioQueue.length > 0) {
const byteArray = this.audioQueue.shift();
const audioBuffer = await this.audioContext.decodeAudioData(byteArray.buffer);
this.scheduleAudioBuffer(audioBuffer);
} else {
this.isPlaying = false;
if (this.onAudioFinishedCallback) {
this.onAudioFinishedCallback();
this.onAudioFinishedCallback = null;
}
}
}
scheduleAudioBuffer(audioBuffer) {
const source = this.audioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.audioContext.destination);
source.start();
this.currentSource = source;
source.onended = () => {
this.playNextChunk();
};
}
stop() {
this.audioQueue = [];
if (this.currentSource) {
this.currentSource.stop();
this.currentSource = null;
}
this.isPlaying = false;
}
hexStringToByteArray(hexString) {
const bytes = new Uint8Array(hexString.length / 2);
for (let i = 0; i < hexString.length; i += 2) {
bytes[i / 2] = parseInt(hexString.substring(i, i + 2), 16);
}
return bytes;
}
}
async function streamAudio(data, callback, finishCallback) {
const parseData = async (wssData) => {
try {
const json = JSON.parse(wssData);
let audio;
if (json.data?.audio) {
audio = json.data.audio;
json.data.audio = `…omitted ${audio.length} bytes of raw audio…`;
}
if (json.data?.status == 1 && audio)
player.loadAudioData(audio, finishCallback);
if (callback)
callback({ status: 200, json });
if (json.data?.status == 2 && json.trace_id) {
const { trace_id } = json;
const { headers, body } = data;
const { account } = JSON.parse(body);
callback({ text: `⌛ GET ${urlAudio} ⁝ looking for generated mp3 using matched trace_id (${trace_id})…` });
const response = await fetch(`${urlAudio}${account ? '/?account=' + account : ''}`, { headers });
const text = await response.text();
if (!response.ok) {
callback({ status: response.status, text });
return;
}
const { audio_list } = JSON.parse(text);
const item = audio_list?.find(d => d.audio_title?.endsWith(`_${trace_id}`));
const { audio_url } = item ?? {};
callback({ status: response.status, json: item, text: '👉🏻 ' + audio_url });
}
} catch (error) {
console.error(`Failed to parse JSON: ${error}`, wssData);
}
};
if (player)
player.stop();
else
player = new DynamicAudioPlayer();
if (ws) {
ws.close();
ws = null;
}
callback({ text: `⏳ Requesting WebSocket token and payload from ${urlCreateStream}…` });
const response = await fetch(urlCreateStream, data);
const text = await response.text();
callback({ status: response.status, text });
if (!response.ok)
return;
const { token, payload } = JSON.parse(text);
callback({ text: `⌛ Establishing WebSocket connection to ${wssCreateStream}…` });
ws = new WebSocket(`${wssCreateStream}/?token=${token}`);
ws.addEventListener('open', () => {
callback({ text: `🚀 Sending payload over WebSocket connection` });
ws.send(JSON.stringify({ payload }));
});
ws.addEventListener('message', async event => {
await parseData(event.data);
});
ws.addEventListener('error', event => {
const text = `🛑 WebSocket error: ${JSON.stringify(event)}`;
console.error(text);
callback({ text });
});
ws.addEventListener('close', event => {
console.log('WebSocket close', event);
});
}
// Here's how you call above functions
const data = {
method: 'POST',
headers: {
'Authorization': `Bearer ${api_token_value}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: 'your text goes here',
voice_id: 'desired voice'
})
};
await streamAudio(
data,
// optional progress callback
(status, json, text) => {
console.log(`callback`, { status, json, text });
},
// optional playback completed callback
() => {
console.log(`playback completed`);
}
);
Model
The below model represent WebSocket message payload object.
The value of data.status
can be either 1
(progress) or 2
(completed). Once generation is completed you can locate the generated mp3
by matching then trace_id
to the end of audio_list[].audio_url
returned by the GET audio endpoint.
{ // TypeScript, all fields are optional
data: {
audio: string
status: number
ced: string
}
extra_info?: {
audio_length: number
audio_sample_rate: number
audio_size: number
bitrate: number
word_count: number
invisible_character_ratio: number
usage_characters: number
audio_format: string
audio_channel: number
}
input_sensitive: boolean
trace_id: string
base_resp: {
status_code: number
status_msg: string
}
}
Try It
See above code in action at Try It POST audio/create-stream.