diff --git a/src/asr/provider/funasr/test/recorder.ts b/src/asr/provider/funasr/test/recorder.ts index 18182a6..089064f 100644 --- a/src/asr/provider/funasr/test/recorder.ts +++ b/src/asr/provider/funasr/test/recorder.ts @@ -5,22 +5,32 @@ import Stream from 'stream'; const recorder = new Recording(); const writeStream = new Stream.Writable(); +const url = 'wss://funasr.xiongxiao.me'; const ws = new VideoWS({ - url: 'wss://192.168.31.220:10095', + // url: 'wss://192.168.31.220:10095', + url: url, isFile: false, + mode: '2pass', + wsOptions: { + rejectUnauthorized: false, + }, onConnect: async () => { console.log('onConnect'); + recorder.start(); let chunks: Buffer = Buffer.alloc(0); var chunk_size = 960; // for asr chunk_size [5, 10, 5] let totalsend = 0; let len = 0; recorder.stream().on('data', (chunk) => { - chunks = Buffer.concat([chunks, chunk]); - if (chunks.length > chunk_size) { - ws.send(chunks); - totalsend += chunks.length; - chunks = Buffer.alloc(0); - } + // chunks = Buffer.concat([chunks, chunk]); + // if (chunks.length > chunk_size) { + // ws.send(chunks); + // console.log('chunk', chunk.length); + + // totalsend += chunks.length; + // chunks = Buffer.alloc(0); + // } + ws.send(chunk); }); ws.start(); setTimeout(() => { @@ -29,7 +39,11 @@ const ws = new VideoWS({ process.exit(0); }, 1000); console.log('len', len); - }, 20000); + }, 10 * 30 * 1000); + // }, 5 * 1000); + ws.emitter.on('message', (event) => { + // console.log('message', event.data); + }); }, }); @@ -38,4 +52,4 @@ const server = net.createServer((socket) => { console.log('data', data); }); }); -server.listen(10096); +server.listen(10097); diff --git a/src/asr/provider/funasr/ws.ts b/src/asr/provider/funasr/ws.ts index 656c2a5..0f4346f 100644 --- a/src/asr/provider/funasr/ws.ts +++ b/src/asr/provider/funasr/ws.ts @@ -1,7 +1,7 @@ // import WebSocket from 'ws'; import { initWs } from '../../../ws-adapter/index.ts'; import { logger } from '@/logger/index.ts'; -import { WSServer } from '../../provider/ws-server.ts'; +import { WSServer, WSSOptions } from '../../provider/ws-server.ts'; export type VideoWSOptions = { url?: string; @@ -11,19 +11,30 @@ export type VideoWSOptions = { isFile?: boolean; onConnect?: () => void; wav_format?: string; +} & { + wsOptions?: WSSOptions['wsOptions']; }; export const videoWsMode = ['2pass', 'online', 'offline'] as const; type VideoWsMode = (typeof videoWsMode)[number]; type OpenRequest = { + // 语音分片大小(单位: 毫秒): chunk_size: number[]; + // 音频文件名: wav_name: string; + // 是否正在说话: is_speaking: boolean; + // 分片间隔(单位: 毫秒): chunk_interval: number; // 逆文本标准化(ITN): itn: boolean; + // 模式: + // '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式 mode: VideoWsMode; + // 音频格式: wav_format?: string; + // 音频采样率(单位: Hz): audio_fs?: number; + // 热词列表: hotwords?: string; }; export type VideoWsResult = { @@ -40,7 +51,7 @@ export class VideoWS extends WSServer { mode?: VideoWsMode; wav_format?: string; constructor(options?: VideoWSOptions) { - super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect }); + super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions }); this.itn = options?.itn || false; this.itn = options?.itn || false; this.mode = options?.mode || 'online'; diff --git a/src/asr/provider/ws-server.ts b/src/asr/provider/ws-server.ts index f3fd385..0508d35 100644 --- a/src/asr/provider/ws-server.ts +++ b/src/asr/provider/ws-server.ts @@ -1,7 +1,7 @@ import { EventEmitter } from 'eventemitter3'; import { initWs } from '../../ws-adapter/index.ts'; import type { ClientOptions } from 'ws'; -type WSSOptions = { +export type WSSOptions = { url: string; ws?: WebSocket; onConnect?: () => void; diff --git a/src/recorder/index.ts b/src/recorder/index.ts index 7136949..bc6bd34 100644 --- a/src/recorder/index.ts +++ b/src/recorder/index.ts @@ -3,7 +3,6 @@ import { logger } from '../logger/index.ts'; import { ChildProcessWithoutNullStreams, spawn } from 'child_process'; import recorders from '../recorder/recorders/index.ts'; import Stream from 'stream'; -const logDebug = logger.debug; export type RecordingOptions = { /* 采样率,默认为16000 */ sampleRate?: number; @@ -64,9 +63,9 @@ export class Recording { this.args = args; this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions); - logDebug(`Started recording`); - logDebug('options', this.options); - logDebug(` ${this.cmd} ${this.args.join(' ')}`); + logger.debug(`Started recording`); + logger.debug('options', this.options); + logger.debug(` ${this.cmd} ${this.args.join(' ')}`); return this.start(); } @@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`, }); err.on('data', (chunk) => { - logDebug(`STDERR: ${chunk}`); + logger.debug(`STDERR: ${chunk}`); }); rec.on('data', (chunk) => { - logDebug(`Recording ${chunk.length} bytes`); + logger.debug(`Recording ${chunk.length} bytes`); }); rec.on('end', () => { - logDebug('Recording ended'); + logger.debug('Recording ended'); }); return this; @@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`, this.process.kill('SIGSTOP'); this._stream.pause(); - logDebug('Paused recording'); + logger.debug('Paused recording'); } resume() { @@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`, this.process.kill('SIGCONT'); this._stream.resume(); - logDebug('Resumed recording'); + logger.debug('Resumed recording'); } isPaused() { diff --git a/src/wake/test/build.ts b/src/wake/test/build.ts index 5fcdc79..1221236 100644 --- a/src/wake/test/build.ts +++ b/src/wake/test/build.ts @@ -2,7 +2,7 @@ import vosk from 'vosk'; import { Recording } from '../../recorder/index.ts'; import fs from 'fs'; import path from 'path'; -import { audioPath, sleep } from './common.ts'; +import { audioPath, sleep, mySpeechText } from './common.ts'; import { encodeWav, decodeWav } from '../../utils/convert.ts'; // 需要先下载Vosk模型 // const MODEL_PATH = 'vosk-model-small-en-us-0.15'; @@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) { const wakeWords = ['欢迎']; // 自定义唤醒词列表 const audioBuffer = fs.readFileSync(audioFilePath); const pcmBuffer = decodeWav(audioBuffer); - const result = await rec.acceptWaveformAsync(pcmBuffer); + const result = rec.acceptWaveform(pcmBuffer); console.log('result', result, rec.result()); + + // const result = await rec.acceptWaveformAsync(pcmBuffer); + // console.log('result', result, rec.result()); + // return new Promise((resolve) => { // const pcmBufferLength = Buffer.byteLength(pcmBuffer); // console.log('pcmBufferLength', pcmBufferLength); @@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) { // }); } -detectWithVosk(audioPath).then((result) => { +// detectWithVosk(audioPath).then((result) => { +// console.log('result', result); +// }); + +detectWithVosk(mySpeechText).then((result) => { console.log('result', result); }); diff --git a/src/wake/test/common.ts b/src/wake/test/common.ts index 9e04322..692a4cc 100644 --- a/src/wake/test/common.ts +++ b/src/wake/test/common.ts @@ -6,7 +6,13 @@ export const config = dotenv.config({ }).parsed; export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav'); +export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav'); export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav'); export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav'); export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +const model_all = 'models/vosk-model-cn-0.22'; +const model_small = 'models/vosk-model-small-cn-0.22'; +export const MODEL_PATH = path.join(process.cwd(), model_small); +// export const MODEL_PATH = path.join(process.cwd(), model_all); diff --git a/src/wake/test/stream.ts b/src/wake/test/stream.ts new file mode 100644 index 0000000..e8f5f7e --- /dev/null +++ b/src/wake/test/stream.ts @@ -0,0 +1,178 @@ +import vosk from 'vosk'; +import { Recording } from '../../recorder/index.ts'; +import fs, { WriteStream } from 'fs'; +import path from 'path'; +import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts'; +import { encodeWav, decodeWav } from '../../utils/convert.ts'; + +const streamText = async (audioFilePath: string) => { + if (!fs.existsSync(MODEL_PATH)) { + console.error('请先下载Vosk模型'); + return false; + } + + const model = new vosk.Model(MODEL_PATH); + const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 }); + + const audioBuffer = fs.readFileSync(audioFilePath); + const pcmBuffer = decodeWav(audioBuffer); + + for (let i = 0; i < pcmBuffer.length; i += 1024) { + const chunk = pcmBuffer.subarray(i, i + 1024); + if (rec.acceptWaveform(chunk)) { + const result = rec.result(); + console.log('Streamed Result:', result); + } else { + const partialResult = rec.partialResult(); + console.log('Partial Result:', partialResult); + } + // await sleep(100); // 模拟延时 + } + + return true; +}; + +// 测试流式处理 +// streamText(mySpeechText) +// .then((result) => { +// console.log('Final Result:', result); +// }) +// .catch((error) => { +// console.error('Error during streaming:', error); +// }); + +const record = async () => { + const recording = new Recording({ + sampleRate: 16000, + channels: 1, + }); + + recording.start(); + const stream = recording.stream(); + console.log('Recording started...', stream); + const model = new vosk.Model(MODEL_PATH); + const rec = new vosk.Recognizer({ + model: model, + sampleRate: 16000, + grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词 + }); + console.log('Vosk Recognizer initialized...'); + + // 创建累积缓冲区 + let accumulatedBuffer = Buffer.alloc(0); + const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整) + + stream.on('data', (data: Buffer) => { + // const pcmBuffer = decodeWav(data); // 8192 bytes per chunk + const pcmBuffer = data; // 假设数据已经是PCM格式 + + // 将新数据追加到累积缓冲区 + accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]); + + // 当积累的数据足够大时处理它 + if (accumulatedBuffer.length >= PROCESS_SIZE) { + if (rec.acceptWaveform(accumulatedBuffer)) { + const result = rec.result(); + console.log('Recorded Result:', result); + // 检查是否包含唤醒词 + if (result.text) { + const detect = detectWakeWord(result.text); + if (detect.detected) { + console.log(`检测到唤醒词: "${detect.word}",置信度: ${detect.confidence}`); + } + // 执行唤醒后的操作 + } + } else { + const partialResult = rec.partialResult(); + console.log('Partial Result:', partialResult); + } + + // 清空累积缓冲区 + accumulatedBuffer = Buffer.alloc(0); + } + }); + + // 添加停止录音的处理 + stream.on('end', () => { + // 处理剩余的缓冲区数据 + if (accumulatedBuffer.length > 0) { + if (rec.acceptWaveform(accumulatedBuffer)) { + const result = rec.result(); + console.log('Final Recorded Result:', result); + } + } + + // 获取最终结果 + const finalResult = rec.finalResult(); + console.log('Final Complete Result:', finalResult); + + // 释放资源 + rec.free(); + model.free(); + }); + + // 返回一个用于停止录音的函数 + return { + stop: () => { + recording.stop(); + }, + }; +}; +// 添加唤醒配置 +const wakeConfig = { + words: ['你好小小', '嗨小小', '小小', '秀秀'], + threshold: 0.75, // 匹配置信度阈值 + minWordCount: 2, // 最小词数 +}; +// 优化唤醒词检测 +function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } { + if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' }; + + let bestMatch = { detected: false, confidence: 0, word: '' }; + + for (const wakeWord of wakeConfig.words) { + // 计算文本与唤醒词的相似度 + const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase()); + console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`); + if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) { + bestMatch = { detected: true, confidence, word: wakeWord }; + } + } + + return bestMatch; +} + +// 简单的字符串相似度计算函数 +function calculateSimilarity(str1: string, str2: string): number { + if (str1.includes(str2)) return 1.0; + + // 计算莱文斯坦距离的简化版本 + const longer = str1.length > str2.length ? str1 : str2; + const shorter = str1.length > str2.length ? str2 : str1; + + // 如果短字符串为空,相似度为0 + if (shorter.length === 0) return 0; + + // 简单的相似度计算 - 可以替换为更复杂的算法 + let matchCount = 0; + for (let i = 0; i <= longer.length - shorter.length; i++) { + const segment = longer.substring(i, i + shorter.length); + let localMatches = 0; + for (let j = 0; j < shorter.length; j++) { + if (segment[j] === shorter[j]) localMatches++; + } + matchCount = Math.max(matchCount, localMatches); + } + + return matchCount / shorter.length; +} +// 启动录音并在适当的时候停止 +(async () => { + const recorder = await record(); + + // 可选:30秒后自动停止录音 + setTimeout(() => { + console.log('Stopping recording...'); + recorder.stop(); + }, 10 * 30 * 1000); +})(); diff --git a/src/ws-adapter/index.ts b/src/ws-adapter/index.ts index 11bdded..9060475 100644 --- a/src/ws-adapter/index.ts +++ b/src/ws-adapter/index.ts @@ -1,4 +1,4 @@ -const isBrowser = process?.env?.BROWSER === 'true'; +const isBrowser = process?.env?.BROWSER === 'true' || typeof window !== 'undefined' && typeof window.document !== 'undefined'; import { EventEmitter } from 'events'; type WebSocketOptions = { @@ -17,7 +17,7 @@ export const initWs = async (url: string, options?: WebSocketOptions) => { const WebSocket = await import('ws').then((module) => module.default); const { rejectUnauthorized, headers, ...rest } = options || {}; ws = new WebSocket(url, { - rejectUnauthorized: rejectUnauthorized || true, + rejectUnauthorized: rejectUnauthorized ?? true, headers: headers, ...rest, }) as any;