Create text-to-speech audio stream over the WebSocket

Table of contents

January 7, 2025 (August 21, 2025)

This version of MiniMax audio has been decommissioned. Consider switching to Mureka API

  1. Query Parameters
  2. Responses
  3. Examples
  4. Model
  5. Try It

Use POST audio/create-stream to obtain token and payload.
To see the provided below code in action use Try It.

wss://api.useapi.net/v1/minimax/audio/wss?token=token

Query Parameters
Responses
Examples
var player = null;
var ws = null;

const urlCreateStream = 'https://api.useapi.net/v1/minimax/audio/create-stream';
const wssCreateStream = 'wss://api.useapi.net/v1/minimax/audio/wss';
const urlAudio = 'https://api.useapi.net/v1/minimax/audio';

class DynamicAudioPlayer {
    constructor() {
        this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
        this.audioQueue = [];
        this.isPlaying = false;
        this.currentSource = null;
        this.onAudioFinishedCallback = null;
    }

    async loadAudioData(base64Chunk, finishCallback) {
        try {
            const byteArray = this.hexStringToByteArray('fffbe8c4' + base64Chunk);
            this.audioQueue.push(byteArray);
            this.onAudioFinishedCallback = finishCallback;

            if (!this.isPlaying) {
                this.isPlaying = true;
                await this.playNextChunk();
            }
        } catch (e) {
            console.error("Error decoding audio data:", e);
        }
    }

    async playNextChunk() {
        if (this.audioQueue.length > 0) {
            const byteArray = this.audioQueue.shift();
            const audioBuffer = await this.audioContext.decodeAudioData(byteArray.buffer);
            this.scheduleAudioBuffer(audioBuffer);
        } else {
            this.isPlaying = false;
            if (this.onAudioFinishedCallback) {
                this.onAudioFinishedCallback();
                this.onAudioFinishedCallback = null;
            }
        }
    }

    scheduleAudioBuffer(audioBuffer) {
        const source = this.audioContext.createBufferSource();
        source.buffer = audioBuffer;
        source.connect(this.audioContext.destination);
        source.start();
        this.currentSource = source;

        source.onended = () => {
            this.playNextChunk();
        };
    }

    stop() {
        this.audioQueue = [];
        if (this.currentSource) {
            this.currentSource.stop();
            this.currentSource = null;
        }
        this.isPlaying = false;
    }

    hexStringToByteArray(hexString) {
        const bytes = new Uint8Array(hexString.length / 2);
        for (let i = 0; i < hexString.length; i += 2) {
            bytes[i / 2] = parseInt(hexString.substring(i, i + 2), 16);
        }
        return bytes;
    }
}

async function streamAudio(data, callback, finishCallback) {
    const parseData = async (wssData) => {
        try {
            const json = JSON.parse(wssData);

            // Added March 17, 2025
            // Error occurred.
            if(json.data?.status === undefined && json.statusInfo?.code !== 0) {
                callback({ status: json.statusInfo.code, json, text: 'πŸ›‘ ' + json.statusInfo?.message });
                ws.close();
                ws = null;
                finishCallback();
                return;
            }

            let audio;

            if (json.data?.audio) {
                audio = json.data.audio;
                json.data.audio = `…omitted ${audio.length} bytes of raw audio…`;
            }

            if (json.data?.status == 1 && audio)
                player.loadAudioData(audio, finishCallback);

            if (callback)
                callback({ status: 200, json });

            if (json.data?.status == 2) {
                const { headers, body } = data;
                const { account } = JSON.parse(body);

                callback({ text: `βŒ› GET ${urlAudio} ⁝ looking for generated mp3…` });

                const response = await fetch(`${urlAudio}${account ? '/?account=' + account : ''}`, { headers });

                const text = await response.text();

                if (!response.ok) {
                    callback({ status: response.status, text });
                    return;
                }

                const { audio_list } = JSON.parse(text);

                const item = audio_list?.at(0);

                const { audio_url } = item ?? {};

                callback({ status: response.status, json: item, text: 'πŸ‘‰πŸ» ' + audio_url });
            }
        } catch (error) {
            console.error(`Failed to parse JSON: ${error}`, wssData);
        }
    };

    if (player)
        player.stop();
    else
        player = new DynamicAudioPlayer();

    if (ws) {
        ws.close();
        ws = null;
    }

    callback({ text: `⏳ Requesting WebSocket token and payload from ${urlCreateStream}…` });

    const response = await fetch(urlCreateStream, data);

    const text = await response.text();

    callback({ status: response.status, text });

    if (!response.ok)
        return;

    const { token, payload } = JSON.parse(text);

    callback({ text: `βŒ› Establishing WebSocket connection to ${wssCreateStream}…` });

    ws = new WebSocket(`${wssCreateStream}/?token=${token}`);

    ws.addEventListener('open', () => {
        callback({ text: `πŸš€ Sending payload over WebSocket connection` });
        // Updated March 17, 2025
        ws.send(JSON.stringify(payload));
    });

    ws.addEventListener('message', async event => {
        await parseData(event.data);
    });

    ws.addEventListener('error', event => {
        const text = `πŸ›‘ WebSocket error: ${JSON.stringify(event)}`;
        console.error(text);
        callback({ text });
    });

    ws.addEventListener('close', event => {
        console.log('WebSocket close', event);
    });
}

// Here's how you call above functions

const data = {
    method: 'POST',
    headers: {
        'Authorization': `Bearer ${api_token_value}`,
        'Content-Type': 'application/json'
    },
    body: JSON.stringify({
        text: 'your text goes here',
        voice_id: 'desired voice'
    })
};

await streamAudio(
    data,
    // optional progress callback
    (status, json, text) => {
        console.log(`callback`, { status, json, text });
    },
    // optional playback completed callback
    () => {
        console.log(`playback completed`);
    }
);    
Model

The below model represent WebSocket message payload object.
The value of data.status can be either 1 (progress) or 2 (completed). Once generation is completed, you can locate the generated mp3 file in audio_list[] returned by the GET audio endpoint. It will be the last returned item or alternatively, you can match on text.

{ // TypeScript, all fields are optional
  data: {
    audio: string
    status: number
    ced: string
  }
  extra_info?: {
    audio_length: number
    audio_sample_rate: number
    audio_size: number
    bitrate: number
    word_count: number
    invisible_character_ratio: number
    usage_characters: number
    audio_format: string
    audio_channel: number
  }
  input_sensitive: boolean
  trace_id: string
  base_resp: {
    status_code: number
    status_msg: string
  }
}
Try It

See above code in action at Try It POST audio/create-stream.