temp: add test

2025-04-18 18:28:34 +08:00
parent d92b93c6f9
commit fdc3985b93
22 changed files with 669 additions and 2082 deletions
--- a/src/asr/provider/volcengine/asr-ws-big-model-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-big-model-client.ts
@@ -1,8 +1,4 @@
-import * as fs from 'fs/promises';
-import * as path from 'path';
 import * as zlib from 'zlib';
-import * as util from 'util';
-import { Readable } from 'stream';
 import { promisify } from 'util';
 import { nanoid } from 'nanoid';
 import { VolcEngineBase, uuid } from './base.ts';
@@ -254,7 +250,6 @@ interface AudioItem {
 * ASR WebSocket Client
 */
 export class AsrWsClient extends VolcEngineBase {
-  private audioPath: string;
  private successCode: number = 1000;
  private segDuration: number;
  private format: string;
@@ -262,16 +257,14 @@ export class AsrWsClient extends VolcEngineBase {
  private bits: number;
  private channel: number;
  private codec: string;
-  private authMethod: string;
  private hotWords: string[] | null;
-  private streaming: boolean;
  private mp3SegSize: number;
  private reqEvent: number = 1;
  private uid: string;
  private seq: number = 1;
  private hasSendFullClientRequest: boolean = false;

-  constructor(audioPath: string, options: AsrClientOptions = {}) {
+  constructor(options: AsrClientOptions = {}) {
    super({
      url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v3/sauc/bigmodel',
      onConnect: () => this.onWsConnect(),
@@ -285,22 +278,19 @@ export class AsrWsClient extends VolcEngineBase {
      },
    });

-    this.audioPath = audioPath;
-    this.segDuration = options.segDuration || 100;
+    this.segDuration = options.segDuration || 200;
    this.uid = options.uid || 'test';
    this.format = options.format || 'wav';
    this.rate = options.rate || 16000;
    this.bits = options.bits || 16;
    this.channel = options.channel || 1;
    this.codec = options.codec || 'raw';
-    this.authMethod = options.authMethod || 'none';
    this.hotWords = options.hotWords || null;
-    this.streaming = options.streaming !== undefined ? options.streaming : true;
    this.mp3SegSize = options.mp3SegSize || 1000;
  }

  private onWsConnect() {
-    console.log('ASR WebSocket connected');
+    console.log('ASR Big Model WebSocket connected');
  }

  /**
@@ -321,6 +311,7 @@ export class AsrWsClient extends VolcEngineBase {
      request: {
        model_name: 'bigmodel',
        enable_punc: true,
+        result_type: 'single', // all, single
      },
    };
  }
@@ -357,90 +348,59 @@ export class AsrWsClient extends VolcEngineBase {
  private async segmentDataProcessor(audioData: Buffer, segmentSize: number): Promise<any> {
    await this.sendFullClientRequest();
    const that = this;
-    // Wait for response
-    const result = await new Promise<any>((resolve, reject) => {
-      const onMessage = async (event: MessageEvent) => {
-        try {
-          const response = parseResponse(Buffer.from(event.data as ArrayBuffer));
-          console.log('Initial response:', response);
-
-          // Process audio chunks
-          for (const [chunk, last] of sliceData(audioData, segmentSize)) {
-            that.seq += 1;
-            if (last) {
-              that.seq = -that.seq;
-            }
-            const seq = that.seq;
-
-            const start = Date.now();
-            const compressedChunk = await gzipPromise(chunk);
-
-            const messageType = AUDIO_ONLY_REQUEST;
-            const flags = last ? NEG_WITH_SEQUENCE : POS_SEQUENCE;
-
-            const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);
-
-            // Set payload size
-            audioRequest.writeUInt32BE(compressedChunk.length, 8);
-
-            // Send audio chunk
-            (this as any).ws.send(audioRequest);
-
-            // Wait for each response
-            const chunkResponse = await new Promise<any>((resolveChunk) => {
-              const onChunkMessage = (chunkEvent: MessageEvent) => {
-                (this as any).ws.removeEventListener('message', onChunkMessage);
-                const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
-                console.log(`Seq ${seq} response:`, parsed);
-                resolveChunk(parsed);
-              };
-
-              (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
-            });
-
-            // If streaming, add delay to simulate real-time
-            if (this.streaming) {
-              const elapsed = Date.now() - start;
-              const sleepTime = Math.max(0, this.segDuration - elapsed);
-              await new Promise((r) => setTimeout(r, sleepTime));
-            }
-
-            // If this is the last chunk, resolve with final result
-            if (last) {
-              resolve(chunkResponse);
-              break;
-            }
-          }
-
-          (this as any).ws.removeEventListener('message', onMessage);
-        } catch (error) {
-          console.error('Error processing response:', error);
-          reject(error);
+    const sendVoice = async (audioData: Buffer, segmentSize: number) => {
+      that.setCanSend(false);
+      for (const [chunk, last] of sliceData(audioData, segmentSize)) {
+        that.seq += 1;
+        const isEnd = that.isEnd && last; // 结束了，而且是语音的最后一包
+        if (isEnd) {
+          that.seq = -that.seq;
        }
-      };
+        const seq = that.seq;
+        const compressedChunk = await gzipPromise(chunk);

-      (this as any).ws.addEventListener('message', onMessage, { once: true });
+        const messageType = AUDIO_ONLY_REQUEST;
+        const flags = isEnd ? NEG_WITH_SEQUENCE : POS_SEQUENCE;

-      (this as any).ws.addEventListener(
-        'error',
-        (error) => {
-          console.error('WebSocket error:', error);
-          reject(error);
-        },
-        { once: true },
-      );
-    });
+        const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);

-    return result;
+        // Set payload size
+        audioRequest.writeUInt32BE(compressedChunk.length, 8);
+        // Send audio chunk
+        this.ws.send(audioRequest);
+        // 待测试， 是否需要等待
+        // const chunkResponse = await new Promise<any>((resolveChunk) => {
+        //   const onChunkMessage = (chunkEvent: MessageEvent) => {
+        //     (this as any).ws.removeEventListener('message', onChunkMessage);
+        //     const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
+        //     resolveChunk(parsed);
+        //   };
+        //   (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
+        // });
+        // if (last) {
+        //   console.log('last', JSON.stringify(chunkResponse));
+        //   break;
+        // }
+      }
+      that.setCanSend(true);
+    };
+    // Wait for response
+    await sendVoice(audioData, segmentSize);
  }
-
-  /**
-   * Execute ASR on the audio file
-   */
-  public async execute(): Promise<any> {
+  async onMessage(event: MessageEvent) {
+    try {
+      const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
+      console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
+      // console.log('parsed', parsed.payloadSequence, parsed.payloadMsg.result.text);
+      if (parsed.isLastPackage) {
+        this.emitter.emit('end', parsed);
+      }
+    } catch (error) {
+      console.error('Error processing response:', error);
+    }
+  }
+  public async sendVoiceFile(data: Buffer) {
    try {
-      const data = await fs.readFile(this.audioPath);
-
      if (this.format === 'mp3') {
        const segmentSize = this.mp3SegSize;
        return await this.segmentDataProcessor(data, segmentSize);
@@ -465,65 +425,9 @@ export class AsrWsClient extends VolcEngineBase {
      throw error;
    }
  }
-
-  /**
-   * Send OPUS data for processing
-   */
-  public async sendOpusData(audioData: Buffer): Promise<any> {
-    const segmentSize = Math.floor((this.rate * 2 * this.channel * this.segDuration) / 500);
-    return await this.segmentDataProcessor(audioData, segmentSize);
+  public async sendVoiceStream(data: Buffer) {
+    const segmentSize = Buffer.byteLength(data);
+    console.log('segmentSize', segmentSize);
+    return await this.segmentDataProcessor(data, segmentSize);
  }
 }
-
-/**
- * Execute ASR on a single audio file
- */
-export async function executeOne(audioItem: AudioItem, options: AsrClientOptions = {}): Promise<any> {
-  if (!audioItem.id || !audioItem.path) {
-    throw new Error('Audio item must have id and path properties');
-  }
-
-  const audioId = audioItem.id;
-  const audioPath = path.resolve(process.cwd(), audioItem.path);
-
-  const asrClient = new AsrWsClient(audioPath, options);
-  await new Promise((resolve) => setTimeout(resolve, 2000));
-
-  return asrClient.execute().then((result) => {
-    return {
-      id: audioId,
-      path: audioPath,
-      result: result,
-    };
-  });
-}
-
-/**
- * Test stream processing
- */
-export const testStream = async () => {
-  console.log('测试流式');
-  const audioPath = 'videos/asr_example.wav';
-
-  const res = await executeOne({
-    id: 1,
-    path: audioPath,
-  })
-    .then((result) => {
-      console.log('====end test=====');
-      console.log(result);
-      return result;
-    })
-    .catch((error) => {
-      console.error('Test error:', error);
-      return '';
-    });
-};
-
-/**
- * Handle audio data directly
- */
-export async function handleAudioData(audioData: Buffer, options: AsrClientOptions = {}): Promise<any> {
-  const asrClient = new AsrWsClient('', options);
-  return await asrClient.sendOpusData(audioData);
-}