generated from tailored/router-template
395 lines
13 KiB
TypeScript
395 lines
13 KiB
TypeScript
// https://www.volcengine.com/docs/6561/1329505#%E7%A4%BA%E4%BE%8Bsamples
|
|
|
|
import { WebSocket } from 'ws';
|
|
|
|
import fs from 'fs/promises';
|
|
import { nanoid } from 'nanoid';
|
|
const uuidv4 = nanoid;
|
|
const PROTOCOL_VERSION = 0b0001;
|
|
const DEFAULT_HEADER_SIZE = 0b0001;
|
|
|
|
// Message Type:
|
|
const FULL_CLIENT_REQUEST = 0b0001;
|
|
const AUDIO_ONLY_RESPONSE = 0b1011;
|
|
const FULL_SERVER_RESPONSE = 0b1001;
|
|
const ERROR_INFORMATION = 0b1111;
|
|
|
|
// Message Type Specific Flags
|
|
const MsgTypeFlagNoSeq = 0b0000; // Non-terminal packet with no sequence
|
|
const MsgTypeFlagPositiveSeq = 0b1; // Non-terminal packet with sequence > 0
|
|
const MsgTypeFlagLastNoSeq = 0b10; // last packet with no sequence
|
|
const MsgTypeFlagNegativeSeq = 0b11; // Payload contains event number (int32)
|
|
const MsgTypeFlagWithEvent = 0b100;
|
|
// Message Serialization
|
|
const NO_SERIALIZATION = 0b0000;
|
|
const JSON_TYPE = 0b0001;
|
|
// Message Compression
|
|
const COMPRESSION_NO = 0b0000;
|
|
const COMPRESSION_GZIP = 0b0001;
|
|
|
|
const EVENT_NONE = 0;
|
|
const EVENT_Start_Connection = 1;
|
|
const EVENT_FinishConnection = 2;
|
|
const EVENT_ConnectionStarted = 50; // 成功建连
|
|
const EVENT_ConnectionFailed = 51; // 建连失败(可能是无法通过权限认证)
|
|
const EVENT_ConnectionFinished = 52; // 连接结束
|
|
// 上行Session事件
|
|
const EVENT_StartSession = 100;
|
|
const EVENT_FinishSession = 102;
|
|
// 下行Session事件
|
|
const EVENT_SessionStarted = 150;
|
|
const EVENT_SessionFinished = 152;
|
|
const EVENT_SessionFailed = 153;
|
|
// 上行通用事件
|
|
const EVENT_TaskRequest = 200;
|
|
// 下行TTS事件
|
|
const EVENT_TTSSentenceStart = 350;
|
|
const EVENT_TTSSentenceEnd = 351;
|
|
const EVENT_TTSResponse = 352;
|
|
|
|
class Header {
|
|
headerSize: number;
|
|
protocolVersion: number;
|
|
messageType: number;
|
|
messageTypeSpecificFlags: number;
|
|
serialMethod: number;
|
|
compressionType: number;
|
|
reservedData: number;
|
|
|
|
constructor(
|
|
protocolVersion: number = PROTOCOL_VERSION,
|
|
headerSize: number = DEFAULT_HEADER_SIZE,
|
|
messageType: number = 0,
|
|
messageTypeSpecificFlags: number = 0,
|
|
serialMethod: number = NO_SERIALIZATION,
|
|
compressionType: number = COMPRESSION_NO,
|
|
reservedData: number = 0,
|
|
) {
|
|
this.headerSize = headerSize;
|
|
this.protocolVersion = protocolVersion;
|
|
this.messageType = messageType;
|
|
this.messageTypeSpecificFlags = messageTypeSpecificFlags;
|
|
this.serialMethod = serialMethod;
|
|
this.compressionType = compressionType;
|
|
this.reservedData = reservedData;
|
|
}
|
|
|
|
asBytes(): Buffer {
|
|
return Buffer.from([
|
|
(this.protocolVersion << 4) | this.headerSize,
|
|
(this.messageType << 4) | this.messageTypeSpecificFlags,
|
|
(this.serialMethod << 4) | this.compressionType,
|
|
this.reservedData,
|
|
]);
|
|
}
|
|
}
|
|
|
|
class Optional {
|
|
event: number;
|
|
sessionId: string | null;
|
|
errorCode: number;
|
|
connectionId: string | null;
|
|
responseMetaJson: string | null;
|
|
sequence: number | null;
|
|
|
|
constructor(event: number = EVENT_NONE, sessionId: string | null = null, sequence: number | null = null) {
|
|
this.event = event;
|
|
this.sessionId = sessionId;
|
|
this.errorCode = 0;
|
|
this.connectionId = null;
|
|
this.responseMetaJson = null;
|
|
this.sequence = sequence;
|
|
}
|
|
|
|
// 转成 byte 序列
|
|
asBytes(): Buffer {
|
|
const optionBytes = Buffer.alloc(0);
|
|
let result = optionBytes;
|
|
|
|
if (this.event !== EVENT_NONE) {
|
|
const eventBuffer = Buffer.alloc(4);
|
|
eventBuffer.writeInt32BE(this.event);
|
|
result = Buffer.concat([result, eventBuffer]);
|
|
}
|
|
|
|
if (this.sessionId !== null) {
|
|
const sessionIdBytes = Buffer.from(this.sessionId);
|
|
const sizeBuffer = Buffer.alloc(4);
|
|
sizeBuffer.writeInt32BE(sessionIdBytes.length);
|
|
result = Buffer.concat([result, sizeBuffer, sessionIdBytes]);
|
|
}
|
|
|
|
if (this.sequence !== null) {
|
|
const sequenceBuffer = Buffer.alloc(4);
|
|
sequenceBuffer.writeInt32BE(this.sequence);
|
|
result = Buffer.concat([result, sequenceBuffer]);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
|
|
class Response {
|
|
optional: Optional;
|
|
header: Header;
|
|
payload: Buffer | null;
|
|
|
|
constructor(header: Header, optional: Optional) {
|
|
this.optional = optional;
|
|
this.header = header;
|
|
this.payload = null;
|
|
}
|
|
|
|
toString(): string {
|
|
return this.payload?.toString() || '';
|
|
}
|
|
}
|
|
|
|
// 发送事件
|
|
async function sendEvent(ws: WebSocket, header: Buffer, optional: Buffer | null = null, payload: Buffer | null = null): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
const fullClientRequest: Buffer[] = [header];
|
|
|
|
if (optional !== null) {
|
|
fullClientRequest.push(optional);
|
|
}
|
|
|
|
if (payload !== null) {
|
|
const payloadSizeBuffer = Buffer.alloc(4);
|
|
payloadSizeBuffer.writeInt32BE(payload.length);
|
|
fullClientRequest.push(payloadSizeBuffer, payload);
|
|
}
|
|
|
|
ws.send(Buffer.concat(fullClientRequest), (err) => {
|
|
if (err) reject(err);
|
|
else resolve();
|
|
});
|
|
});
|
|
}
|
|
|
|
// 读取 res 数组某段 字符串内容
|
|
function readResContent(res: Buffer, offset: number): [string, number] {
|
|
const contentSize = res.readInt32BE(offset);
|
|
offset += 4;
|
|
const content = res.slice(offset, offset + contentSize).toString();
|
|
offset += contentSize;
|
|
return [content, offset];
|
|
}
|
|
|
|
// 读取 payload
|
|
function readResPayload(res: Buffer, offset: number): [Buffer, number] {
|
|
const payloadSize = res.readInt32BE(offset);
|
|
offset += 4;
|
|
const payload = res.slice(offset, offset + payloadSize);
|
|
offset += payloadSize;
|
|
return [payload, offset];
|
|
}
|
|
|
|
// 解析响应结果
|
|
function parserResponse(res: Buffer | string): Response {
|
|
if (typeof res === 'string') {
|
|
throw new Error(res);
|
|
}
|
|
|
|
const response = new Response(new Header(), new Optional());
|
|
// 解析结果
|
|
// header
|
|
const header = response.header;
|
|
const num = 0b00001111;
|
|
header.protocolVersion = (res[0] >> 4) & num;
|
|
header.headerSize = res[0] & 0x0f;
|
|
header.messageType = (res[1] >> 4) & num;
|
|
header.messageTypeSpecificFlags = res[1] & 0x0f;
|
|
header.serialMethod = (res[2] >> 4) & num;
|
|
header.compressionType = res[2] & 0x0f;
|
|
header.reservedData = res[3];
|
|
|
|
let offset = 4;
|
|
const optional = response.optional;
|
|
|
|
if (header.messageType === FULL_SERVER_RESPONSE || header.messageType === AUDIO_ONLY_RESPONSE) {
|
|
// read event
|
|
if (header.messageTypeSpecificFlags === MsgTypeFlagWithEvent) {
|
|
optional.event = res.readInt32BE(offset);
|
|
offset += 4;
|
|
|
|
if (optional.event === EVENT_NONE) {
|
|
return response;
|
|
}
|
|
// read connectionId
|
|
else if (optional.event === EVENT_ConnectionStarted) {
|
|
[optional.connectionId, offset] = readResContent(res, offset);
|
|
} else if (optional.event === EVENT_ConnectionFailed) {
|
|
[optional.responseMetaJson, offset] = readResContent(res, offset);
|
|
} else if (optional.event === EVENT_SessionStarted || optional.event === EVENT_SessionFailed || optional.event === EVENT_SessionFinished) {
|
|
[optional.sessionId, offset] = readResContent(res, offset);
|
|
[optional.responseMetaJson, offset] = readResContent(res, offset);
|
|
} else {
|
|
[optional.sessionId, offset] = readResContent(res, offset);
|
|
[response.payload, offset] = readResPayload(res, offset);
|
|
}
|
|
}
|
|
} else if (header.messageType === ERROR_INFORMATION) {
|
|
optional.errorCode = res.readInt32BE(offset);
|
|
offset += 4;
|
|
[response.payload, offset] = readResPayload(res, offset);
|
|
}
|
|
|
|
return response;
|
|
}
|
|
|
|
function printResponse(res: Response, tag: string): void {
|
|
// console.log(`===>${tag} header:`, res.header, res.optional.event);
|
|
// console.log(`===>${tag} optional:`, res.optional);
|
|
}
|
|
|
|
function getPayloadBytes(
|
|
uid: string = '1234',
|
|
event: number = EVENT_NONE,
|
|
text: string = '',
|
|
speaker: string = '',
|
|
audioFormat: string = 'mp3',
|
|
audioSampleRate: number = 24000,
|
|
): Buffer {
|
|
return Buffer.from(
|
|
JSON.stringify({
|
|
user: { uid },
|
|
event,
|
|
namespace: 'BidirectionalTTS',
|
|
req_params: {
|
|
text,
|
|
speaker,
|
|
audio_params: {
|
|
format: audioFormat,
|
|
sample_rate: audioSampleRate,
|
|
},
|
|
},
|
|
}),
|
|
);
|
|
}
|
|
|
|
async function startConnection(websocket: WebSocket): Promise<void> {
|
|
const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent).asBytes();
|
|
|
|
const optional = new Optional(EVENT_Start_Connection).asBytes();
|
|
const payload = Buffer.from('{}');
|
|
return await sendEvent(websocket, header, optional, payload);
|
|
}
|
|
|
|
async function startSession(websocket: WebSocket, speaker: string, sessionId: string): Promise<void> {
|
|
const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
|
|
|
|
const optional = new Optional(EVENT_StartSession, sessionId).asBytes();
|
|
const payload = getPayloadBytes('1234', EVENT_StartSession, '', speaker);
|
|
return await sendEvent(websocket, header, optional, payload);
|
|
}
|
|
|
|
async function sendText(ws: WebSocket, speaker: string, text: string, sessionId: string): Promise<void> {
|
|
const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
|
|
|
|
const optional = new Optional(EVENT_TaskRequest, sessionId).asBytes();
|
|
const payload = getPayloadBytes('1234', EVENT_TaskRequest, text, speaker);
|
|
return await sendEvent(ws, header, optional, payload);
|
|
}
|
|
|
|
async function finishSession(ws: WebSocket, sessionId: string): Promise<void> {
|
|
const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
|
|
|
|
const optional = new Optional(EVENT_FinishSession, sessionId).asBytes();
|
|
const payload = Buffer.from('{}');
|
|
return await sendEvent(ws, header, optional, payload);
|
|
}
|
|
|
|
async function finishConnection(ws: WebSocket): Promise<void> {
|
|
const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
|
|
|
|
const optional = new Optional(EVENT_FinishConnection).asBytes();
|
|
const payload = Buffer.from('{}');
|
|
return await sendEvent(ws, header, optional, payload);
|
|
}
|
|
|
|
export async function runDemo(appId: string, token: string, speaker: string, text: string, outputPath: string): Promise<void> {
|
|
return new Promise((resolve, reject) => {
|
|
const wsHeader = {
|
|
'X-Api-App-Key': appId,
|
|
'X-Api-Access-Key': token,
|
|
'X-Api-Resource-Id': 'volc.service_type.10029',
|
|
'X-Api-Connect-Id': uuidv4(),
|
|
};
|
|
|
|
const url = 'wss://openspeech.bytedance.com/api/v3/tts/bidirection';
|
|
const ws = new WebSocket(url, { headers: wsHeader });
|
|
|
|
ws.on('open', async () => {
|
|
try {
|
|
await startConnection(ws);
|
|
let isFirstResponse = true;
|
|
let fileHandle: fs.FileHandle | null = null;
|
|
let sessionId: string = '';
|
|
ws.on('message', async (data) => {
|
|
try {
|
|
const res = parserResponse(data as Buffer);
|
|
printResponse(res, 'message res:');
|
|
if (res.optional.event === EVENT_ConnectionStarted) {
|
|
sessionId = uuidv4().replace(/-/g, '');
|
|
await startSession(ws, speaker, sessionId);
|
|
return;
|
|
} else if (res.optional.event === EVENT_ConnectionFinished) {
|
|
ws.close();
|
|
resolve();
|
|
return;
|
|
}
|
|
if (res.optional.event === EVENT_SessionStarted && isFirstResponse) {
|
|
isFirstResponse = false;
|
|
await sendText(ws, speaker, text, sessionId);
|
|
await finishSession(ws, sessionId);
|
|
fileHandle = await fs.open(outputPath, 'w');
|
|
} else if (!isFirstResponse) {
|
|
if (res.optional.event === EVENT_TTSResponse && res.header.messageType === AUDIO_ONLY_RESPONSE && res.payload && fileHandle) {
|
|
await fileHandle.write(res.payload);
|
|
} else if (res.optional.event === EVENT_TTSSentenceStart || res.optional.event === EVENT_TTSSentenceEnd) {
|
|
// continue
|
|
} else {
|
|
if (fileHandle) {
|
|
await fileHandle.close();
|
|
fileHandle = null;
|
|
}
|
|
await finishConnection(ws);
|
|
}
|
|
}
|
|
} catch (err) {
|
|
ws.close();
|
|
reject(err);
|
|
}
|
|
});
|
|
} catch (err) {
|
|
ws.close();
|
|
reject(err);
|
|
}
|
|
});
|
|
|
|
ws.on('error', (err) => {
|
|
reject(err);
|
|
});
|
|
});
|
|
}
|
|
|
|
export class TtsMix {
|
|
appId: string;
|
|
token: string;
|
|
constructor(appId: string, token: string) {
|
|
this.appId = appId;
|
|
this.token = token;
|
|
}
|
|
/**
|
|
* 获取语音
|
|
* @param speaker 说话人
|
|
* @param text 文本
|
|
* @param outputPath 输出路径
|
|
* @returns
|
|
*/
|
|
async getVoiceDemo(speaker: string, text: string, outputPath: string): Promise<void> {
|
|
return runDemo(this.appId, this.token, speaker, text, outputPath);
|
|
}
|
|
}
|