temp: add test

This commit is contained in:
2025-04-18 18:28:34 +08:00
parent d92b93c6f9
commit fdc3985b93
22 changed files with 669 additions and 2082 deletions

View File

@@ -1,16 +1,16 @@
// import WebSocket from 'ws';
import { initWs } from '../../ws/index.ts';
import { initWs } from '../../ws-adapter/index.ts';
type VideoWSOptions = {
export type VideoWSOptions = {
url?: string;
ws?: WebSocket;
itn?: boolean;
mode?: string;
mode?: VideoWsMode;
isFile?: boolean;
onConnect?: () => void;
};
export const VideoWsMode = ['2pass', 'online', 'offline'];
type VideoWsMode = (typeof VideoWsMode)[number];
export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number];
export type VideoWsResult = {
isFinal: boolean;

View File

View File

@@ -1,8 +1,4 @@
import * as fs from 'fs/promises';
import * as path from 'path';
import * as zlib from 'zlib';
import * as util from 'util';
import { Readable } from 'stream';
import { promisify } from 'util';
import { nanoid } from 'nanoid';
import { VolcEngineBase, uuid } from './base.ts';
@@ -254,7 +250,6 @@ interface AudioItem {
* ASR WebSocket Client
*/
export class AsrWsClient extends VolcEngineBase {
private audioPath: string;
private successCode: number = 1000;
private segDuration: number;
private format: string;
@@ -262,16 +257,14 @@ export class AsrWsClient extends VolcEngineBase {
private bits: number;
private channel: number;
private codec: string;
private authMethod: string;
private hotWords: string[] | null;
private streaming: boolean;
private mp3SegSize: number;
private reqEvent: number = 1;
private uid: string;
private seq: number = 1;
private hasSendFullClientRequest: boolean = false;
constructor(audioPath: string, options: AsrClientOptions = {}) {
constructor(options: AsrClientOptions = {}) {
super({
url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v3/sauc/bigmodel',
onConnect: () => this.onWsConnect(),
@@ -285,22 +278,19 @@ export class AsrWsClient extends VolcEngineBase {
},
});
this.audioPath = audioPath;
this.segDuration = options.segDuration || 100;
this.segDuration = options.segDuration || 200;
this.uid = options.uid || 'test';
this.format = options.format || 'wav';
this.rate = options.rate || 16000;
this.bits = options.bits || 16;
this.channel = options.channel || 1;
this.codec = options.codec || 'raw';
this.authMethod = options.authMethod || 'none';
this.hotWords = options.hotWords || null;
this.streaming = options.streaming !== undefined ? options.streaming : true;
this.mp3SegSize = options.mp3SegSize || 1000;
}
private onWsConnect() {
console.log('ASR WebSocket connected');
console.log('ASR Big Model WebSocket connected');
}
/**
@@ -321,6 +311,7 @@ export class AsrWsClient extends VolcEngineBase {
request: {
model_name: 'bigmodel',
enable_punc: true,
result_type: 'single', // all, single
},
};
}
@@ -357,90 +348,59 @@ export class AsrWsClient extends VolcEngineBase {
private async segmentDataProcessor(audioData: Buffer, segmentSize: number): Promise<any> {
await this.sendFullClientRequest();
const that = this;
// Wait for response
const result = await new Promise<any>((resolve, reject) => {
const onMessage = async (event: MessageEvent) => {
try {
const response = parseResponse(Buffer.from(event.data as ArrayBuffer));
console.log('Initial response:', response);
// Process audio chunks
for (const [chunk, last] of sliceData(audioData, segmentSize)) {
that.seq += 1;
if (last) {
that.seq = -that.seq;
}
const seq = that.seq;
const start = Date.now();
const compressedChunk = await gzipPromise(chunk);
const messageType = AUDIO_ONLY_REQUEST;
const flags = last ? NEG_WITH_SEQUENCE : POS_SEQUENCE;
const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);
// Set payload size
audioRequest.writeUInt32BE(compressedChunk.length, 8);
// Send audio chunk
(this as any).ws.send(audioRequest);
// Wait for each response
const chunkResponse = await new Promise<any>((resolveChunk) => {
const onChunkMessage = (chunkEvent: MessageEvent) => {
(this as any).ws.removeEventListener('message', onChunkMessage);
const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
console.log(`Seq ${seq} response:`, parsed);
resolveChunk(parsed);
};
(this as any).ws.addEventListener('message', onChunkMessage, { once: true });
});
// If streaming, add delay to simulate real-time
if (this.streaming) {
const elapsed = Date.now() - start;
const sleepTime = Math.max(0, this.segDuration - elapsed);
await new Promise((r) => setTimeout(r, sleepTime));
}
// If this is the last chunk, resolve with final result
if (last) {
resolve(chunkResponse);
break;
}
}
(this as any).ws.removeEventListener('message', onMessage);
} catch (error) {
console.error('Error processing response:', error);
reject(error);
const sendVoice = async (audioData: Buffer, segmentSize: number) => {
that.setCanSend(false);
for (const [chunk, last] of sliceData(audioData, segmentSize)) {
that.seq += 1;
const isEnd = that.isEnd && last; // 结束了,而且是语音的最后一包
if (isEnd) {
that.seq = -that.seq;
}
};
const seq = that.seq;
const compressedChunk = await gzipPromise(chunk);
(this as any).ws.addEventListener('message', onMessage, { once: true });
const messageType = AUDIO_ONLY_REQUEST;
const flags = isEnd ? NEG_WITH_SEQUENCE : POS_SEQUENCE;
(this as any).ws.addEventListener(
'error',
(error) => {
console.error('WebSocket error:', error);
reject(error);
},
{ once: true },
);
});
const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);
return result;
// Set payload size
audioRequest.writeUInt32BE(compressedChunk.length, 8);
// Send audio chunk
this.ws.send(audioRequest);
// 待测试, 是否需要等待
// const chunkResponse = await new Promise<any>((resolveChunk) => {
// const onChunkMessage = (chunkEvent: MessageEvent) => {
// (this as any).ws.removeEventListener('message', onChunkMessage);
// const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
// resolveChunk(parsed);
// };
// (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
// });
// if (last) {
// console.log('last', JSON.stringify(chunkResponse));
// break;
// }
}
that.setCanSend(true);
};
// Wait for response
await sendVoice(audioData, segmentSize);
}
/**
* Execute ASR on the audio file
*/
public async execute(): Promise<any> {
async onMessage(event: MessageEvent) {
try {
const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
// console.log('parsed', parsed.payloadSequence, parsed.payloadMsg.result.text);
if (parsed.isLastPackage) {
this.emitter.emit('end', parsed);
}
} catch (error) {
console.error('Error processing response:', error);
}
}
public async sendVoiceFile(data: Buffer) {
try {
const data = await fs.readFile(this.audioPath);
if (this.format === 'mp3') {
const segmentSize = this.mp3SegSize;
return await this.segmentDataProcessor(data, segmentSize);
@@ -465,65 +425,9 @@ export class AsrWsClient extends VolcEngineBase {
throw error;
}
}
/**
* Send OPUS data for processing
*/
public async sendOpusData(audioData: Buffer): Promise<any> {
const segmentSize = Math.floor((this.rate * 2 * this.channel * this.segDuration) / 500);
return await this.segmentDataProcessor(audioData, segmentSize);
public async sendVoiceStream(data: Buffer) {
const segmentSize = Buffer.byteLength(data);
console.log('segmentSize', segmentSize);
return await this.segmentDataProcessor(data, segmentSize);
}
}
/**
* Execute ASR on a single audio file
*/
export async function executeOne(audioItem: AudioItem, options: AsrClientOptions = {}): Promise<any> {
if (!audioItem.id || !audioItem.path) {
throw new Error('Audio item must have id and path properties');
}
const audioId = audioItem.id;
const audioPath = path.resolve(process.cwd(), audioItem.path);
const asrClient = new AsrWsClient(audioPath, options);
await new Promise((resolve) => setTimeout(resolve, 2000));
return asrClient.execute().then((result) => {
return {
id: audioId,
path: audioPath,
result: result,
};
});
}
/**
* Test stream processing
*/
export const testStream = async () => {
console.log('测试流式');
const audioPath = 'videos/asr_example.wav';
const res = await executeOne({
id: 1,
path: audioPath,
})
.then((result) => {
console.log('====end test=====');
console.log(result);
return result;
})
.catch((error) => {
console.error('Test error:', error);
return '';
});
};
/**
* Handle audio data directly
*/
export async function handleAudioData(audioData: Buffer, options: AsrClientOptions = {}): Promise<any> {
const asrClient = new AsrWsClient('', options);
return await asrClient.sendOpusData(audioData);
}

View File

@@ -265,7 +265,6 @@ export class AsrWsClient extends VolcEngineBase {
super({
url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v2/asr',
onConnect: () => this.onWsConnect(),
enabled: false,
wsOptions: {
headers: {
Authorization: `Bearer; ${options.token}`,
@@ -329,13 +328,6 @@ export class AsrWsClient extends VolcEngineBase {
},
};
}
/**
* Generate headers for authentication
*/
private tokenAuth(): Record<string, string> {
return { Authorization: `Bearer; ${this.token}` };
}
/**
* Process audio data in segments
*/

View File

@@ -1,5 +1,5 @@
import { initWs } from '../../ws/index.ts';
import { WSServer } from '../ws-server.ts';
import { initWs } from '../../ws-adapter/index.ts';
import { WSServer } from '../../provider/ws-server.ts';
import { nanoid } from 'nanoid';
export const uuid = () => nanoid(16);
@@ -7,7 +7,6 @@ export const uuid = () => nanoid(16);
type VolcEngineBaseOptions = {
url?: string;
ws?: WebSocket;
enabled?: boolean;
onConnect?: () => void;
wsOptions?: {
headers?: {
@@ -20,17 +19,38 @@ type VolcEngineBaseOptions = {
};
};
export class VolcEngineBase extends WSServer {
canSend = false;
isEnd: boolean = false;
constructor(opts: VolcEngineBaseOptions) {
super({
url: opts.url,
ws: opts.ws,
onConnect: opts.onConnect,
wsOptions: opts.wsOptions,
enabled: opts.enabled,
});
}
async onOpen() {
console.log('VolcEngineBase onOpen');
// 发送认证信息
}
async isCanSend() {
if (this.canSend) {
return true;
}
return new Promise((resolve) => {
this.emitter.once('canSend', () => {
resolve(true);
});
});
}
async setCanSend(canSend: boolean) {
this.canSend = canSend;
if (canSend) {
this.emitter.emit('canSend', canSend);
}
}
async setIsEnd(isEnd: boolean) {
this.isEnd = isEnd;
}
}

View File

@@ -1,28 +1,21 @@
import { AsrWsClient, testStream } from '../asr-ws-big-model-client.ts';
import { audioPath, config } from './common.ts';
import { AsrWsClient } from '../asr-ws-big-model-client.ts';
import { audioPath, audioPath2, blankAudioPath, config } from './common.ts';
import fs from 'fs';
// const asr = new AsrWsClient('videos/asr_example.wav');
// tsx src/asr/provider/volcengine/test/asr-bigmodel.ts
const main = async () => {
const audioId = '123';
const asrClient = new AsrWsClient(audioPath, {
const asrClient = new AsrWsClient({
appid: config.APP_ID,
token: config.TOKEN,
streaming: false,
});
await new Promise((resolve) => setTimeout(resolve, 2000));
return asrClient.execute().then((result) => {
return {
id: audioId,
path: audioPath,
result: result,
};
});
};
const main2 = async () => {
testStream();
const data = fs.readFileSync(audioPath);
await asrClient.sendVoiceFile(data);
await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
asrClient.setIsEnd(true);
await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
};
main();

View File

@@ -6,5 +6,7 @@ export const config = dotenv.config({
}).parsed;
export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));

View File

@@ -0,0 +1,39 @@
import { AsrWsClient } from '../asr-ws-big-model-client.ts';
import { audioPath, config, sleep } from '../test/common.ts';
import net from 'net';
import { Recording } from '../../../../recorder/index.ts';
import Stream from 'stream';
const recorder = new Recording();
const asrClient = new AsrWsClient({
appid: config.APP_ID,
token: config.TOKEN,
});
// tsx src/asr/provider/volcengine/test/recorder.ts
const main = async () => {
// await asrClient.sendVoiceFile(fs.readFileSync(audioPath));
const send = (data: Buffer) => {
asrClient.sendVoiceStream(data);
};
let chunks: Buffer = Buffer.alloc(0);
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
recorder.stream().on('data', (chunk) => {
chunks = Buffer.concat([chunks, chunk]);
const chunkSize = Buffer.byteLength(chunks);
if (chunkSize > chunk_size) {
send(chunks);
totalsend += chunks.length;
chunks = Buffer.alloc(0);
}
});
// setTimeout(() => {
// recorder.stop();
// }, 10000);
};
main();

View File

@@ -1,41 +1,77 @@
import { initWs } from '../ws/index.ts';
import { EventEmitter } from 'eventemitter3';
import { initWs } from '../ws-adapter/index.ts';
import type { ClientOptions } from 'ws';
type WSSOptions = {
url: string;
ws?: WebSocket;
onConnect?: () => void;
wsOptions?: ClientOptions;
enabled?: boolean;
emitter?: EventEmitter;
};
export class WSServer {
ws: WebSocket;
onConnect?: () => void;
connected: boolean;
emitter: EventEmitter;
constructor(opts: WSSOptions) {
this.initWs(opts);
}
async initWs(opts: WSSOptions) {
const enabled = opts.enabled || true;
if (opts.ws) {
this.ws = opts.ws;
} else if (enabled) {
this.ws = await initWs(opts.url, opts.wsOptions);
}
this.emitter = opts.emitter || new EventEmitter();
this.ws = await initWs(opts.url, opts.wsOptions);
this.onConnect = opts?.onConnect || (() => {});
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
}
/**
* 连接成功 ws 事件
*/
async onOpen() {
this.connected = true;
this.onConnect();
this.emitter.emit('open');
}
/**
* 检查是否连接
* @returns
*/
async isConnected() {
if (this.connected) {
return true;
}
return new Promise((resolve) => {
this.emitter.once('open', () => {
resolve(true);
});
});
}
/**
* 收到消息 ws 事件
* @param event
*/
async onMessage(event: MessageEvent) {
// console.log('WSS onMessage', event);
this.emitter.emit('message', event);
}
/**
* ws 错误事件
* @param event
*/
async onError(event: Event) {
console.error('WSS onError');
this.emitter.emit('error', event);
}
/**
* ws 关闭事件
* @param event
*/
async onClose(event: CloseEvent) {
console.error('WSS onClose');
this.emitter.emit('close', event);
}
}

View File

@@ -1,4 +1,5 @@
const isBrowser = process?.env?.BROWSER === 'true';
import { EventEmitter } from 'events';
type WebSocketOptions = {
/**
@@ -23,3 +24,18 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
}
return ws;
};
interface EventEmitterOptions {
/**
* Enables automatic capturing of promise rejection.
*/
captureRejections?: boolean | undefined;
}
/**
*
* @param opts
* @returns
*/
export const initEmitter = (opts?: EventEmitterOptions) => {
const emitter = new EventEmitter(opts);
return emitter;
};