video-tools/src/wake/test/stream.ts

import vosk from 'vosk';
import { Recording } from '../../recorder/index.ts';
import fs, { WriteStream } from 'fs';
import path from 'path';
import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
import { encodeWav, decodeWav } from '../../utils/convert.ts';

const streamText = async (audioFilePath: string) => {
  if (!fs.existsSync(MODEL_PATH)) {
    console.error('请先下载Vosk模型');
    return false;
  }

  const model = new vosk.Model(MODEL_PATH);
  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });

  const audioBuffer = fs.readFileSync(audioFilePath);
  const pcmBuffer = decodeWav(audioBuffer);

  for (let i = 0; i < pcmBuffer.length; i += 1024) {
    const chunk = pcmBuffer.subarray(i, i + 1024);
    if (rec.acceptWaveform(chunk)) {
      const result = rec.result();
      console.log('Streamed Result:', result);
    } else {
      const partialResult = rec.partialResult();
      console.log('Partial Result:', partialResult);
    }
    // await sleep(100); // 模拟延时
  }

  return true;
};

// 测试流式处理
// streamText(mySpeechText)
//   .then((result) => {
//     console.log('Final Result:', result);
//   })
//   .catch((error) => {
//     console.error('Error during streaming:', error);
//   });

const record = async () => {
  const recording = new Recording({
    sampleRate: 16000,
    channels: 1,
  });

  recording.start();
  const stream = recording.stream();
  console.log('Recording started...', stream);
  const model = new vosk.Model(MODEL_PATH);
  const rec = new vosk.Recognizer({
    model: model,
    sampleRate: 16000,
    grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
  });
  console.log('Vosk Recognizer initialized...');

  // 创建累积缓冲区
  let accumulatedBuffer = Buffer.alloc(0);
  const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)

  stream.on('data', (data: Buffer) => {
    // const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
    const pcmBuffer = data; // 假设数据已经是PCM格式

    // 将新数据追加到累积缓冲区
    accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);

    // 当积累的数据足够大时处理它
    if (accumulatedBuffer.length >= PROCESS_SIZE) {
      if (rec.acceptWaveform(accumulatedBuffer)) {
        const result = rec.result();
        console.log('Recorded Result:', result);
        // 检查是否包含唤醒词
        if (result.text) {
          const detect = detectWakeWord(result.text);
          if (detect.detected) {
            console.log(`检测到唤醒词: "${detect.word}"，置信度: ${detect.confidence}`);
          }
          // 执行唤醒后的操作
        }
      } else {
        const partialResult = rec.partialResult();
        console.log('Partial Result:', partialResult);
      }

      // 清空累积缓冲区
      accumulatedBuffer = Buffer.alloc(0);
    }
  });

  // 添加停止录音的处理
  stream.on('end', () => {
    // 处理剩余的缓冲区数据
    if (accumulatedBuffer.length > 0) {
      if (rec.acceptWaveform(accumulatedBuffer)) {
        const result = rec.result();
        console.log('Final Recorded Result:', result);
      }
    }

    // 获取最终结果
    const finalResult = rec.finalResult();
    console.log('Final Complete Result:', finalResult);

    // 释放资源
    rec.free();
    model.free();
  });

  // 返回一个用于停止录音的函数
  return {
    stop: () => {
      recording.stop();
    },
  };
};
// 添加唤醒配置
const wakeConfig = {
  words: ['你好小小', '嗨小小', '小小', '秀秀'],
  threshold: 0.75, // 匹配置信度阈值
  minWordCount: 2, // 最小词数
};
// 优化唤醒词检测
function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
  if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };

  let bestMatch = { detected: false, confidence: 0, word: '' };

  for (const wakeWord of wakeConfig.words) {
    // 计算文本与唤醒词的相似度
    const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
    console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
    if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
      bestMatch = { detected: true, confidence, word: wakeWord };
    }
  }

  return bestMatch;
}

// 简单的字符串相似度计算函数
function calculateSimilarity(str1: string, str2: string): number {
  if (str1.includes(str2)) return 1.0;

  // 计算莱文斯坦距离的简化版本
  const longer = str1.length > str2.length ? str1 : str2;
  const shorter = str1.length > str2.length ? str2 : str1;

  // 如果短字符串为空，相似度为0
  if (shorter.length === 0) return 0;

  // 简单的相似度计算 - 可以替换为更复杂的算法
  let matchCount = 0;
  for (let i = 0; i <= longer.length - shorter.length; i++) {
    const segment = longer.substring(i, i + shorter.length);
    let localMatches = 0;
    for (let j = 0; j < shorter.length; j++) {
      if (segment[j] === shorter[j]) localMatches++;
    }
    matchCount = Math.max(matchCount, localMatches);
  }

  return matchCount / shorter.length;
}
// 启动录音并在适当的时候停止
(async () => {
  const recorder = await record();

  // 可选：30秒后自动停止录音
  setTimeout(() => {
    console.log('Stopping recording...');
    recorder.stop();
  }, 10 * 30 * 1000);
})();