"feat: 更新ASR服务连接配置，优化录音流处理及模型路径"

2025-06-02 12:38:53 +08:00
parent e638d7907a
commit 232d799575
8 changed files with 242 additions and 26 deletions
--- a/src/asr/provider/funasr/test/recorder.ts
+++ b/src/asr/provider/funasr/test/recorder.ts
@@ -5,22 +5,32 @@ import Stream from 'stream';

 const recorder = new Recording();
 const writeStream = new Stream.Writable();
+const url = 'wss://funasr.xiongxiao.me';
 const ws = new VideoWS({
-  url: 'wss://192.168.31.220:10095',
+  // url: 'wss://192.168.31.220:10095',
+  url: url,
  isFile: false,
+  mode: '2pass',
+  wsOptions: {
+    rejectUnauthorized: false,
+  },
  onConnect: async () => {
    console.log('onConnect');
+    recorder.start();
    let chunks: Buffer = Buffer.alloc(0);
    var chunk_size = 960; // for asr chunk_size [5, 10, 5]
    let totalsend = 0;
    let len = 0;
    recorder.stream().on('data', (chunk) => {
-      chunks = Buffer.concat([chunks, chunk]);
-      if (chunks.length > chunk_size) {
-        ws.send(chunks);
-        totalsend += chunks.length;
-        chunks = Buffer.alloc(0);
-      }
+      // chunks = Buffer.concat([chunks, chunk]);
+      // if (chunks.length > chunk_size) {
+      //   ws.send(chunks);
+      //   console.log('chunk', chunk.length);
+
+      //   totalsend += chunks.length;
+      //   chunks = Buffer.alloc(0);
+      // }
+      ws.send(chunk);
    });
    ws.start();
    setTimeout(() => {
@@ -29,7 +39,11 @@ const ws = new VideoWS({
        process.exit(0);
      }, 1000);
      console.log('len', len);
-    }, 20000);
+    }, 10 * 30 * 1000);
+    // }, 5 * 1000);
+    ws.emitter.on('message', (event) => {
+      // console.log('message', event.data);
+    });
  },
 });

@@ -38,4 +52,4 @@ const server = net.createServer((socket) => {
    console.log('data', data);
  });
 });
-server.listen(10096);
+server.listen(10097);
--- a/src/asr/provider/funasr/ws.ts
+++ b/src/asr/provider/funasr/ws.ts
@@ -1,7 +1,7 @@
 // import WebSocket from 'ws';
 import { initWs } from '../../../ws-adapter/index.ts';
 import { logger } from '@/logger/index.ts';
-import { WSServer } from '../../provider/ws-server.ts';
+import { WSServer, WSSOptions } from '../../provider/ws-server.ts';

 export type VideoWSOptions = {
  url?: string;
@@ -11,19 +11,30 @@ export type VideoWSOptions = {
  isFile?: boolean;
  onConnect?: () => void;
  wav_format?: string;
+} & {
+  wsOptions?: WSSOptions['wsOptions'];
 };
 export const videoWsMode = ['2pass', 'online', 'offline'] as const;
 type VideoWsMode = (typeof videoWsMode)[number];
 type OpenRequest = {
+  // 语音分片大小(单位: 毫秒):
  chunk_size: number[];
+  // 音频文件名:
  wav_name: string;
+  // 是否正在说话:
  is_speaking: boolean;
+  // 分片间隔(单位: 毫秒):
  chunk_interval: number;
  // 逆文本标准化(ITN):
  itn: boolean;
+  // 模式:
+  // '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
  mode: VideoWsMode;
+  // 音频格式:
  wav_format?: string;
+  // 音频采样率(单位: Hz):
  audio_fs?: number;
+  // 热词列表:
  hotwords?: string;
 };
 export type VideoWsResult = {
@@ -40,7 +51,7 @@ export class VideoWS extends WSServer {
  mode?: VideoWsMode;
  wav_format?: string;
  constructor(options?: VideoWSOptions) {
-    super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect });
+    super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
    this.itn = options?.itn || false;
    this.itn = options?.itn || false;
    this.mode = options?.mode || 'online';
--- a/src/asr/provider/ws-server.ts
+++ b/src/asr/provider/ws-server.ts
@@ -1,7 +1,7 @@
 import { EventEmitter } from 'eventemitter3';
 import { initWs } from '../../ws-adapter/index.ts';
 import type { ClientOptions } from 'ws';
-type WSSOptions = {
+export type WSSOptions = {
  url: string;
  ws?: WebSocket;
  onConnect?: () => void;
--- a/src/recorder/index.ts
+++ b/src/recorder/index.ts
@@ -3,7 +3,6 @@ import { logger } from '../logger/index.ts';
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
 import recorders from '../recorder/recorders/index.ts';
 import Stream from 'stream';
-const logDebug = logger.debug;
 export type RecordingOptions = {
  /* 采样率，默认为16000 */
  sampleRate?: number;
@@ -64,9 +63,9 @@ export class Recording {
    this.args = args;
    this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions);

-    logDebug(`Started recording`);
-    logDebug('options', this.options);
-    logDebug(` ${this.cmd} ${this.args.join(' ')}`);
+    logger.debug(`Started recording`);
+    logger.debug('options', this.options);
+    logger.debug(` ${this.cmd} ${this.args.join(' ')}`);

    return this.start();
  }
@@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`,
    });

    err.on('data', (chunk) => {
-      logDebug(`STDERR: ${chunk}`);
+      logger.debug(`STDERR: ${chunk}`);
    });

    rec.on('data', (chunk) => {
-      logDebug(`Recording ${chunk.length} bytes`);
+      logger.debug(`Recording ${chunk.length} bytes`);
    });

    rec.on('end', () => {
-      logDebug('Recording ended');
+      logger.debug('Recording ended');
    });

    return this;
@@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`,

    this.process.kill('SIGSTOP');
    this._stream.pause();
-    logDebug('Paused recording');
+    logger.debug('Paused recording');
  }

  resume() {
@@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`,

    this.process.kill('SIGCONT');
    this._stream.resume();
-    logDebug('Resumed recording');
+    logger.debug('Resumed recording');
  }

  isPaused() {
--- a/src/wake/test/build.ts
+++ b/src/wake/test/build.ts
@@ -2,7 +2,7 @@ import vosk from 'vosk';
 import { Recording } from '../../recorder/index.ts';
 import fs from 'fs';
 import path from 'path';
-import { audioPath, sleep } from './common.ts';
+import { audioPath, sleep, mySpeechText } from './common.ts';
 import { encodeWav, decodeWav } from '../../utils/convert.ts';
 // 需要先下载Vosk模型
 // const MODEL_PATH = 'vosk-model-small-en-us-0.15';
@@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) {
  const wakeWords = ['欢迎']; // 自定义唤醒词列表
  const audioBuffer = fs.readFileSync(audioFilePath);
  const pcmBuffer = decodeWav(audioBuffer);
-  const result = await rec.acceptWaveformAsync(pcmBuffer);
+  const result = rec.acceptWaveform(pcmBuffer);
  console.log('result', result, rec.result());
+
+  // const result = await rec.acceptWaveformAsync(pcmBuffer);
+  // console.log('result', result, rec.result());
+
  // return new Promise((resolve) => {
  //   const pcmBufferLength = Buffer.byteLength(pcmBuffer);
  //   console.log('pcmBufferLength', pcmBufferLength);
@@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) {
  // });
 }

-detectWithVosk(audioPath).then((result) => {
+// detectWithVosk(audioPath).then((result) => {
+//   console.log('result', result);
+// });
+
+detectWithVosk(mySpeechText).then((result) => {
  console.log('result', result);
 });
--- a/src/wake/test/common.ts
+++ b/src/wake/test/common.ts
@@ -6,7 +6,13 @@ export const config = dotenv.config({
 }).parsed;

 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
+export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
 export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
 export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');

 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+
+const model_all = 'models/vosk-model-cn-0.22';
+const model_small = 'models/vosk-model-small-cn-0.22';
+export const MODEL_PATH = path.join(process.cwd(), model_small);
+// export const MODEL_PATH = path.join(process.cwd(), model_all);
--- a/src/wake/test/stream.ts
+++ b/src/wake/test/stream.ts
@@ -0,0 +1,178 @@
+import vosk from 'vosk';
+import { Recording } from '../../recorder/index.ts';
+import fs, { WriteStream } from 'fs';
+import path from 'path';
+import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
+import { encodeWav, decodeWav } from '../../utils/convert.ts';
+
+const streamText = async (audioFilePath: string) => {
+  if (!fs.existsSync(MODEL_PATH)) {
+    console.error('请先下载Vosk模型');
+    return false;
+  }
+
+  const model = new vosk.Model(MODEL_PATH);
+  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
+
+  const audioBuffer = fs.readFileSync(audioFilePath);
+  const pcmBuffer = decodeWav(audioBuffer);
+
+  for (let i = 0; i < pcmBuffer.length; i += 1024) {
+    const chunk = pcmBuffer.subarray(i, i + 1024);
+    if (rec.acceptWaveform(chunk)) {
+      const result = rec.result();
+      console.log('Streamed Result:', result);
+    } else {
+      const partialResult = rec.partialResult();
+      console.log('Partial Result:', partialResult);
+    }
+    // await sleep(100); // 模拟延时
+  }
+
+  return true;
+};
+
+// 测试流式处理
+// streamText(mySpeechText)
+//   .then((result) => {
+//     console.log('Final Result:', result);
+//   })
+//   .catch((error) => {
+//     console.error('Error during streaming:', error);
+//   });
+
+const record = async () => {
+  const recording = new Recording({
+    sampleRate: 16000,
+    channels: 1,
+  });
+
+  recording.start();
+  const stream = recording.stream();
+  console.log('Recording started...', stream);
+  const model = new vosk.Model(MODEL_PATH);
+  const rec = new vosk.Recognizer({
+    model: model,
+    sampleRate: 16000,
+    grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
+  });
+  console.log('Vosk Recognizer initialized...');
+
+  // 创建累积缓冲区
+  let accumulatedBuffer = Buffer.alloc(0);
+  const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
+
+  stream.on('data', (data: Buffer) => {
+    // const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
+    const pcmBuffer = data; // 假设数据已经是PCM格式
+
+    // 将新数据追加到累积缓冲区
+    accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
+
+    // 当积累的数据足够大时处理它
+    if (accumulatedBuffer.length >= PROCESS_SIZE) {
+      if (rec.acceptWaveform(accumulatedBuffer)) {
+        const result = rec.result();
+        console.log('Recorded Result:', result);
+        // 检查是否包含唤醒词
+        if (result.text) {
+          const detect = detectWakeWord(result.text);
+          if (detect.detected) {
+            console.log(`检测到唤醒词: "${detect.word}"，置信度: ${detect.confidence}`);
+          }
+          // 执行唤醒后的操作
+        }
+      } else {
+        const partialResult = rec.partialResult();
+        console.log('Partial Result:', partialResult);
+      }
+
+      // 清空累积缓冲区
+      accumulatedBuffer = Buffer.alloc(0);
+    }
+  });
+
+  // 添加停止录音的处理
+  stream.on('end', () => {
+    // 处理剩余的缓冲区数据
+    if (accumulatedBuffer.length > 0) {
+      if (rec.acceptWaveform(accumulatedBuffer)) {
+        const result = rec.result();
+        console.log('Final Recorded Result:', result);
+      }
+    }
+
+    // 获取最终结果
+    const finalResult = rec.finalResult();
+    console.log('Final Complete Result:', finalResult);
+
+    // 释放资源
+    rec.free();
+    model.free();
+  });
+
+  // 返回一个用于停止录音的函数
+  return {
+    stop: () => {
+      recording.stop();
+    },
+  };
+};
+// 添加唤醒配置
+const wakeConfig = {
+  words: ['你好小小', '嗨小小', '小小', '秀秀'],
+  threshold: 0.75, // 匹配置信度阈值
+  minWordCount: 2, // 最小词数
+};
+// 优化唤醒词检测
+function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
+  if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
+
+  let bestMatch = { detected: false, confidence: 0, word: '' };
+
+  for (const wakeWord of wakeConfig.words) {
+    // 计算文本与唤醒词的相似度
+    const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
+    console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
+    if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
+      bestMatch = { detected: true, confidence, word: wakeWord };
+    }
+  }
+
+  return bestMatch;
+}
+
+// 简单的字符串相似度计算函数
+function calculateSimilarity(str1: string, str2: string): number {
+  if (str1.includes(str2)) return 1.0;
+
+  // 计算莱文斯坦距离的简化版本
+  const longer = str1.length > str2.length ? str1 : str2;
+  const shorter = str1.length > str2.length ? str2 : str1;
+
+  // 如果短字符串为空，相似度为0
+  if (shorter.length === 0) return 0;
+
+  // 简单的相似度计算 - 可以替换为更复杂的算法
+  let matchCount = 0;
+  for (let i = 0; i <= longer.length - shorter.length; i++) {
+    const segment = longer.substring(i, i + shorter.length);
+    let localMatches = 0;
+    for (let j = 0; j < shorter.length; j++) {
+      if (segment[j] === shorter[j]) localMatches++;
+    }
+    matchCount = Math.max(matchCount, localMatches);
+  }
+
+  return matchCount / shorter.length;
+}
+// 启动录音并在适当的时候停止
+(async () => {
+  const recorder = await record();
+
+  // 可选：30秒后自动停止录音
+  setTimeout(() => {
+    console.log('Stopping recording...');
+    recorder.stop();
+  }, 10 * 30 * 1000);
+})();
--- a/src/ws-adapter/index.ts
+++ b/src/ws-adapter/index.ts
@@ -1,4 +1,4 @@
-const isBrowser = process?.env?.BROWSER === 'true';
+const isBrowser = process?.env?.BROWSER === 'true' || typeof window !== 'undefined' && typeof window.document !== 'undefined';
 import { EventEmitter } from 'events';

 type WebSocketOptions = {
@@ -17,7 +17,7 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
    const WebSocket = await import('ws').then((module) => module.default);
    const { rejectUnauthorized, headers, ...rest } = options || {};
    ws = new WebSocket(url, {
-      rejectUnauthorized: rejectUnauthorized || true,
+      rejectUnauthorized: rejectUnauthorized ?? true,
      headers: headers,
      ...rest,
    }) as any;