"feat: 更新ASR服务连接配置,优化录音流处理及模型路径"

This commit is contained in:
熊潇 2025-06-02 12:38:53 +08:00
parent e638d7907a
commit 232d799575
8 changed files with 242 additions and 26 deletions

View File

@ -5,22 +5,32 @@ import Stream from 'stream';
const recorder = new Recording();
const writeStream = new Stream.Writable();
const url = 'wss://funasr.xiongxiao.me';
const ws = new VideoWS({
url: 'wss://192.168.31.220:10095',
// url: 'wss://192.168.31.220:10095',
url: url,
isFile: false,
mode: '2pass',
wsOptions: {
rejectUnauthorized: false,
},
onConnect: async () => {
console.log('onConnect');
recorder.start();
let chunks: Buffer = Buffer.alloc(0);
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
let len = 0;
recorder.stream().on('data', (chunk) => {
chunks = Buffer.concat([chunks, chunk]);
if (chunks.length > chunk_size) {
ws.send(chunks);
totalsend += chunks.length;
chunks = Buffer.alloc(0);
}
// chunks = Buffer.concat([chunks, chunk]);
// if (chunks.length > chunk_size) {
// ws.send(chunks);
// console.log('chunk', chunk.length);
// totalsend += chunks.length;
// chunks = Buffer.alloc(0);
// }
ws.send(chunk);
});
ws.start();
setTimeout(() => {
@ -29,7 +39,11 @@ const ws = new VideoWS({
process.exit(0);
}, 1000);
console.log('len', len);
}, 20000);
}, 10 * 30 * 1000);
// }, 5 * 1000);
ws.emitter.on('message', (event) => {
// console.log('message', event.data);
});
},
});
@ -38,4 +52,4 @@ const server = net.createServer((socket) => {
console.log('data', data);
});
});
server.listen(10096);
server.listen(10097);

View File

@ -1,7 +1,7 @@
// import WebSocket from 'ws';
import { initWs } from '../../../ws-adapter/index.ts';
import { logger } from '@/logger/index.ts';
import { WSServer } from '../../provider/ws-server.ts';
import { WSServer, WSSOptions } from '../../provider/ws-server.ts';
export type VideoWSOptions = {
url?: string;
@ -11,19 +11,30 @@ export type VideoWSOptions = {
isFile?: boolean;
onConnect?: () => void;
wav_format?: string;
} & {
wsOptions?: WSSOptions['wsOptions'];
};
export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number];
type OpenRequest = {
// 语音分片大小(单位: 毫秒):
chunk_size: number[];
// 音频文件名:
wav_name: string;
// 是否正在说话:
is_speaking: boolean;
// 分片间隔(单位: 毫秒):
chunk_interval: number;
// 逆文本标准化(ITN):
itn: boolean;
// 模式:
// '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
mode: VideoWsMode;
// 音频格式:
wav_format?: string;
// 音频采样率(单位: Hz):
audio_fs?: number;
// 热词列表:
hotwords?: string;
};
export type VideoWsResult = {
@ -40,7 +51,7 @@ export class VideoWS extends WSServer {
mode?: VideoWsMode;
wav_format?: string;
constructor(options?: VideoWSOptions) {
super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect });
super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
this.itn = options?.itn || false;
this.itn = options?.itn || false;
this.mode = options?.mode || 'online';

View File

@ -1,7 +1,7 @@
import { EventEmitter } from 'eventemitter3';
import { initWs } from '../../ws-adapter/index.ts';
import type { ClientOptions } from 'ws';
type WSSOptions = {
export type WSSOptions = {
url: string;
ws?: WebSocket;
onConnect?: () => void;

View File

@ -3,7 +3,6 @@ import { logger } from '../logger/index.ts';
import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
import recorders from '../recorder/recorders/index.ts';
import Stream from 'stream';
const logDebug = logger.debug;
export type RecordingOptions = {
/* 采样率默认为16000 */
sampleRate?: number;
@ -64,9 +63,9 @@ export class Recording {
this.args = args;
this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions);
logDebug(`Started recording`);
logDebug('options', this.options);
logDebug(` ${this.cmd} ${this.args.join(' ')}`);
logger.debug(`Started recording`);
logger.debug('options', this.options);
logger.debug(` ${this.cmd} ${this.args.join(' ')}`);
return this.start();
}
@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`,
});
err.on('data', (chunk) => {
logDebug(`STDERR: ${chunk}`);
logger.debug(`STDERR: ${chunk}`);
});
rec.on('data', (chunk) => {
logDebug(`Recording ${chunk.length} bytes`);
logger.debug(`Recording ${chunk.length} bytes`);
});
rec.on('end', () => {
logDebug('Recording ended');
logger.debug('Recording ended');
});
return this;
@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`,
this.process.kill('SIGSTOP');
this._stream.pause();
logDebug('Paused recording');
logger.debug('Paused recording');
}
resume() {
@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`,
this.process.kill('SIGCONT');
this._stream.resume();
logDebug('Resumed recording');
logger.debug('Resumed recording');
}
isPaused() {

View File

@ -2,7 +2,7 @@ import vosk from 'vosk';
import { Recording } from '../../recorder/index.ts';
import fs from 'fs';
import path from 'path';
import { audioPath, sleep } from './common.ts';
import { audioPath, sleep, mySpeechText } from './common.ts';
import { encodeWav, decodeWav } from '../../utils/convert.ts';
// 需要先下载Vosk模型
// const MODEL_PATH = 'vosk-model-small-en-us-0.15';
@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) {
const wakeWords = ['欢迎']; // 自定义唤醒词列表
const audioBuffer = fs.readFileSync(audioFilePath);
const pcmBuffer = decodeWav(audioBuffer);
const result = await rec.acceptWaveformAsync(pcmBuffer);
const result = rec.acceptWaveform(pcmBuffer);
console.log('result', result, rec.result());
// const result = await rec.acceptWaveformAsync(pcmBuffer);
// console.log('result', result, rec.result());
// return new Promise((resolve) => {
// const pcmBufferLength = Buffer.byteLength(pcmBuffer);
// console.log('pcmBufferLength', pcmBufferLength);
@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) {
// });
}
detectWithVosk(audioPath).then((result) => {
// detectWithVosk(audioPath).then((result) => {
// console.log('result', result);
// });
detectWithVosk(mySpeechText).then((result) => {
console.log('result', result);
});

View File

@ -6,7 +6,13 @@ export const config = dotenv.config({
}).parsed;
export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const model_all = 'models/vosk-model-cn-0.22';
const model_small = 'models/vosk-model-small-cn-0.22';
export const MODEL_PATH = path.join(process.cwd(), model_small);
// export const MODEL_PATH = path.join(process.cwd(), model_all);

178
src/wake/test/stream.ts Normal file
View File

@ -0,0 +1,178 @@
import vosk from 'vosk';
import { Recording } from '../../recorder/index.ts';
import fs, { WriteStream } from 'fs';
import path from 'path';
import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
import { encodeWav, decodeWav } from '../../utils/convert.ts';
const streamText = async (audioFilePath: string) => {
if (!fs.existsSync(MODEL_PATH)) {
console.error('请先下载Vosk模型');
return false;
}
const model = new vosk.Model(MODEL_PATH);
const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
const audioBuffer = fs.readFileSync(audioFilePath);
const pcmBuffer = decodeWav(audioBuffer);
for (let i = 0; i < pcmBuffer.length; i += 1024) {
const chunk = pcmBuffer.subarray(i, i + 1024);
if (rec.acceptWaveform(chunk)) {
const result = rec.result();
console.log('Streamed Result:', result);
} else {
const partialResult = rec.partialResult();
console.log('Partial Result:', partialResult);
}
// await sleep(100); // 模拟延时
}
return true;
};
// 测试流式处理
// streamText(mySpeechText)
// .then((result) => {
// console.log('Final Result:', result);
// })
// .catch((error) => {
// console.error('Error during streaming:', error);
// });
const record = async () => {
const recording = new Recording({
sampleRate: 16000,
channels: 1,
});
recording.start();
const stream = recording.stream();
console.log('Recording started...', stream);
const model = new vosk.Model(MODEL_PATH);
const rec = new vosk.Recognizer({
model: model,
sampleRate: 16000,
grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
});
console.log('Vosk Recognizer initialized...');
// 创建累积缓冲区
let accumulatedBuffer = Buffer.alloc(0);
const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
stream.on('data', (data: Buffer) => {
// const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
const pcmBuffer = data; // 假设数据已经是PCM格式
// 将新数据追加到累积缓冲区
accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
// 当积累的数据足够大时处理它
if (accumulatedBuffer.length >= PROCESS_SIZE) {
if (rec.acceptWaveform(accumulatedBuffer)) {
const result = rec.result();
console.log('Recorded Result:', result);
// 检查是否包含唤醒词
if (result.text) {
const detect = detectWakeWord(result.text);
if (detect.detected) {
console.log(`检测到唤醒词: "${detect.word}",置信度: ${detect.confidence}`);
}
// 执行唤醒后的操作
}
} else {
const partialResult = rec.partialResult();
console.log('Partial Result:', partialResult);
}
// 清空累积缓冲区
accumulatedBuffer = Buffer.alloc(0);
}
});
// 添加停止录音的处理
stream.on('end', () => {
// 处理剩余的缓冲区数据
if (accumulatedBuffer.length > 0) {
if (rec.acceptWaveform(accumulatedBuffer)) {
const result = rec.result();
console.log('Final Recorded Result:', result);
}
}
// 获取最终结果
const finalResult = rec.finalResult();
console.log('Final Complete Result:', finalResult);
// 释放资源
rec.free();
model.free();
});
// 返回一个用于停止录音的函数
return {
stop: () => {
recording.stop();
},
};
};
// 添加唤醒配置
const wakeConfig = {
words: ['你好小小', '嗨小小', '小小', '秀秀'],
threshold: 0.75, // 匹配置信度阈值
minWordCount: 2, // 最小词数
};
// 优化唤醒词检测
function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
let bestMatch = { detected: false, confidence: 0, word: '' };
for (const wakeWord of wakeConfig.words) {
// 计算文本与唤醒词的相似度
const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
bestMatch = { detected: true, confidence, word: wakeWord };
}
}
return bestMatch;
}
// 简单的字符串相似度计算函数
function calculateSimilarity(str1: string, str2: string): number {
if (str1.includes(str2)) return 1.0;
// 计算莱文斯坦距离的简化版本
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
// 如果短字符串为空相似度为0
if (shorter.length === 0) return 0;
// 简单的相似度计算 - 可以替换为更复杂的算法
let matchCount = 0;
for (let i = 0; i <= longer.length - shorter.length; i++) {
const segment = longer.substring(i, i + shorter.length);
let localMatches = 0;
for (let j = 0; j < shorter.length; j++) {
if (segment[j] === shorter[j]) localMatches++;
}
matchCount = Math.max(matchCount, localMatches);
}
return matchCount / shorter.length;
}
// 启动录音并在适当的时候停止
(async () => {
const recorder = await record();
// 可选30秒后自动停止录音
setTimeout(() => {
console.log('Stopping recording...');
recorder.stop();
}, 10 * 30 * 1000);
})();

View File

@ -1,4 +1,4 @@
const isBrowser = process?.env?.BROWSER === 'true';
const isBrowser = process?.env?.BROWSER === 'true' || typeof window !== 'undefined' && typeof window.document !== 'undefined';
import { EventEmitter } from 'events';
type WebSocketOptions = {
@ -17,7 +17,7 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
const WebSocket = await import('ws').then((module) => module.default);
const { rejectUnauthorized, headers, ...rest } = options || {};
ws = new WebSocket(url, {
rejectUnauthorized: rejectUnauthorized || true,
rejectUnauthorized: rejectUnauthorized ?? true,
headers: headers,
...rest,
}) as any;