generated from tailored/router-template
"feat: 更新ASR服务连接配置,优化录音流处理及模型路径"
This commit is contained in:
@@ -2,7 +2,7 @@ import vosk from 'vosk';
|
||||
import { Recording } from '../../recorder/index.ts';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { audioPath, sleep } from './common.ts';
|
||||
import { audioPath, sleep, mySpeechText } from './common.ts';
|
||||
import { encodeWav, decodeWav } from '../../utils/convert.ts';
|
||||
// 需要先下载Vosk模型
|
||||
// const MODEL_PATH = 'vosk-model-small-en-us-0.15';
|
||||
@@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) {
|
||||
const wakeWords = ['欢迎']; // 自定义唤醒词列表
|
||||
const audioBuffer = fs.readFileSync(audioFilePath);
|
||||
const pcmBuffer = decodeWav(audioBuffer);
|
||||
const result = await rec.acceptWaveformAsync(pcmBuffer);
|
||||
const result = rec.acceptWaveform(pcmBuffer);
|
||||
console.log('result', result, rec.result());
|
||||
|
||||
// const result = await rec.acceptWaveformAsync(pcmBuffer);
|
||||
// console.log('result', result, rec.result());
|
||||
|
||||
// return new Promise((resolve) => {
|
||||
// const pcmBufferLength = Buffer.byteLength(pcmBuffer);
|
||||
// console.log('pcmBufferLength', pcmBufferLength);
|
||||
@@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) {
|
||||
// });
|
||||
}
|
||||
|
||||
detectWithVosk(audioPath).then((result) => {
|
||||
// detectWithVosk(audioPath).then((result) => {
|
||||
// console.log('result', result);
|
||||
// });
|
||||
|
||||
detectWithVosk(mySpeechText).then((result) => {
|
||||
console.log('result', result);
|
||||
});
|
||||
|
||||
@@ -6,7 +6,13 @@ export const config = dotenv.config({
|
||||
}).parsed;
|
||||
|
||||
export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
|
||||
export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
|
||||
export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
|
||||
export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
|
||||
|
||||
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
||||
const model_all = 'models/vosk-model-cn-0.22';
|
||||
const model_small = 'models/vosk-model-small-cn-0.22';
|
||||
export const MODEL_PATH = path.join(process.cwd(), model_small);
|
||||
// export const MODEL_PATH = path.join(process.cwd(), model_all);
|
||||
|
||||
178
src/wake/test/stream.ts
Normal file
178
src/wake/test/stream.ts
Normal file
@@ -0,0 +1,178 @@
|
||||
import vosk from 'vosk';
|
||||
import { Recording } from '../../recorder/index.ts';
|
||||
import fs, { WriteStream } from 'fs';
|
||||
import path from 'path';
|
||||
import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
|
||||
import { encodeWav, decodeWav } from '../../utils/convert.ts';
|
||||
|
||||
const streamText = async (audioFilePath: string) => {
|
||||
if (!fs.existsSync(MODEL_PATH)) {
|
||||
console.error('请先下载Vosk模型');
|
||||
return false;
|
||||
}
|
||||
|
||||
const model = new vosk.Model(MODEL_PATH);
|
||||
const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
|
||||
|
||||
const audioBuffer = fs.readFileSync(audioFilePath);
|
||||
const pcmBuffer = decodeWav(audioBuffer);
|
||||
|
||||
for (let i = 0; i < pcmBuffer.length; i += 1024) {
|
||||
const chunk = pcmBuffer.subarray(i, i + 1024);
|
||||
if (rec.acceptWaveform(chunk)) {
|
||||
const result = rec.result();
|
||||
console.log('Streamed Result:', result);
|
||||
} else {
|
||||
const partialResult = rec.partialResult();
|
||||
console.log('Partial Result:', partialResult);
|
||||
}
|
||||
// await sleep(100); // 模拟延时
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// 测试流式处理
|
||||
// streamText(mySpeechText)
|
||||
// .then((result) => {
|
||||
// console.log('Final Result:', result);
|
||||
// })
|
||||
// .catch((error) => {
|
||||
// console.error('Error during streaming:', error);
|
||||
// });
|
||||
|
||||
const record = async () => {
|
||||
const recording = new Recording({
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
});
|
||||
|
||||
recording.start();
|
||||
const stream = recording.stream();
|
||||
console.log('Recording started...', stream);
|
||||
const model = new vosk.Model(MODEL_PATH);
|
||||
const rec = new vosk.Recognizer({
|
||||
model: model,
|
||||
sampleRate: 16000,
|
||||
grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
|
||||
});
|
||||
console.log('Vosk Recognizer initialized...');
|
||||
|
||||
// 创建累积缓冲区
|
||||
let accumulatedBuffer = Buffer.alloc(0);
|
||||
const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
|
||||
|
||||
stream.on('data', (data: Buffer) => {
|
||||
// const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
|
||||
const pcmBuffer = data; // 假设数据已经是PCM格式
|
||||
|
||||
// 将新数据追加到累积缓冲区
|
||||
accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
|
||||
|
||||
// 当积累的数据足够大时处理它
|
||||
if (accumulatedBuffer.length >= PROCESS_SIZE) {
|
||||
if (rec.acceptWaveform(accumulatedBuffer)) {
|
||||
const result = rec.result();
|
||||
console.log('Recorded Result:', result);
|
||||
// 检查是否包含唤醒词
|
||||
if (result.text) {
|
||||
const detect = detectWakeWord(result.text);
|
||||
if (detect.detected) {
|
||||
console.log(`检测到唤醒词: "${detect.word}",置信度: ${detect.confidence}`);
|
||||
}
|
||||
// 执行唤醒后的操作
|
||||
}
|
||||
} else {
|
||||
const partialResult = rec.partialResult();
|
||||
console.log('Partial Result:', partialResult);
|
||||
}
|
||||
|
||||
// 清空累积缓冲区
|
||||
accumulatedBuffer = Buffer.alloc(0);
|
||||
}
|
||||
});
|
||||
|
||||
// 添加停止录音的处理
|
||||
stream.on('end', () => {
|
||||
// 处理剩余的缓冲区数据
|
||||
if (accumulatedBuffer.length > 0) {
|
||||
if (rec.acceptWaveform(accumulatedBuffer)) {
|
||||
const result = rec.result();
|
||||
console.log('Final Recorded Result:', result);
|
||||
}
|
||||
}
|
||||
|
||||
// 获取最终结果
|
||||
const finalResult = rec.finalResult();
|
||||
console.log('Final Complete Result:', finalResult);
|
||||
|
||||
// 释放资源
|
||||
rec.free();
|
||||
model.free();
|
||||
});
|
||||
|
||||
// 返回一个用于停止录音的函数
|
||||
return {
|
||||
stop: () => {
|
||||
recording.stop();
|
||||
},
|
||||
};
|
||||
};
|
||||
// 添加唤醒配置
|
||||
const wakeConfig = {
|
||||
words: ['你好小小', '嗨小小', '小小', '秀秀'],
|
||||
threshold: 0.75, // 匹配置信度阈值
|
||||
minWordCount: 2, // 最小词数
|
||||
};
|
||||
// 优化唤醒词检测
|
||||
function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
|
||||
if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
|
||||
|
||||
let bestMatch = { detected: false, confidence: 0, word: '' };
|
||||
|
||||
for (const wakeWord of wakeConfig.words) {
|
||||
// 计算文本与唤醒词的相似度
|
||||
const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
|
||||
console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
|
||||
if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
|
||||
bestMatch = { detected: true, confidence, word: wakeWord };
|
||||
}
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
// 简单的字符串相似度计算函数
|
||||
function calculateSimilarity(str1: string, str2: string): number {
|
||||
if (str1.includes(str2)) return 1.0;
|
||||
|
||||
// 计算莱文斯坦距离的简化版本
|
||||
const longer = str1.length > str2.length ? str1 : str2;
|
||||
const shorter = str1.length > str2.length ? str2 : str1;
|
||||
|
||||
// 如果短字符串为空,相似度为0
|
||||
if (shorter.length === 0) return 0;
|
||||
|
||||
// 简单的相似度计算 - 可以替换为更复杂的算法
|
||||
let matchCount = 0;
|
||||
for (let i = 0; i <= longer.length - shorter.length; i++) {
|
||||
const segment = longer.substring(i, i + shorter.length);
|
||||
let localMatches = 0;
|
||||
for (let j = 0; j < shorter.length; j++) {
|
||||
if (segment[j] === shorter[j]) localMatches++;
|
||||
}
|
||||
matchCount = Math.max(matchCount, localMatches);
|
||||
}
|
||||
|
||||
return matchCount / shorter.length;
|
||||
}
|
||||
// 启动录音并在适当的时候停止
|
||||
(async () => {
|
||||
const recorder = await record();
|
||||
|
||||
// 可选:30秒后自动停止录音
|
||||
setTimeout(() => {
|
||||
console.log('Stopping recording...');
|
||||
recorder.stop();
|
||||
}, 10 * 30 * 1000);
|
||||
})();
|
||||
Reference in New Issue
Block a user