Compare commits

...

17 Commits

Author SHA1 Message Date
9e94a4d898 update 2025-10-14 23:04:59 +08:00
d4475cb2f2 更新 src/asr/provider/volcengine/auc.ts 2025-10-14 22:52:24 +08:00
5603d09e80 update 2025-10-13 22:13:19 +08:00
78cc6dcf55 update 2025-10-03 18:43:57 +08:00
8047577165 temp test 2025-08-23 22:34:36 +08:00
e4596b4fde add batch send file to get text 2025-06-23 18:34:54 +08:00
767e436eb8 fix: fix ws 2025-06-23 10:38:01 +08:00
203fa1f103 fix: 2025-06-22 15:18:44 +08:00
87769076c8 fix: add src code 2025-06-22 13:34:34 +08:00
4a9568447e remove some dependencies 2025-06-22 12:46:24 +08:00
b3b64ec59c bump version 2025-06-04 10:09:49 +08:00
232d799575 "feat: 更新ASR服务连接配置,优化录音流处理及模型路径" 2025-06-02 12:38:53 +08:00
e638d7907a test 2025-05-24 00:10:21 +08:00
38b4e58124 add txt 2025-05-20 12:17:52 +08:00
776e0800e9 tts for cosyvoice and funasr and aliyun 2025-05-20 00:39:21 +08:00
54da76bf9d 阿里云一句话识别 2025-05-19 01:44:24 +08:00
a1df51f56b fix funasr 2025-05-19 01:01:38 +08:00
30 changed files with 1617 additions and 1787 deletions

76
.gitignore vendored
View File

@@ -1,21 +1,69 @@
node_modules node_modules
dist # mac
.DS_Store
app.config.json5
apps.config.json
deploy.tar.gz
cache-file
/apps
logs
.env* .env*
!.env.example !.env*example
dist
build
logs
.turbo
pack-dist
# astro
.astro
# next
.next
# nuxt
.nuxt
# vercel
.vercel
# vuepress
.vuepress/dist
# coverage
coverage/
# typescript
*.tsbuildinfo
# debug logs
*.log
*.tmp
# vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
# idea
.idea
# system
Thumbs.db
ehthumbs.db
Desktop.ini
# temp files
*.tmp
*.temp
# local development
*.local
public/r
.pnpm-store
models models
videos/tts_mix.wav videos/tts_mix.mp3

View File

@@ -0,0 +1,100 @@
import { EventEmitter } from 'eventemitter3';
import { VideoWS, VideoWsResult, sleep } from '../src/asr/provider/funasr/ws.ts';
import fs from 'node:fs';
import path from 'node:path';
type BatchSendOptions = {
vws: VideoWS;
files: string[];
matchText?: string;
emitter?: EventEmitter;
};
export class BatchSendFiles {
files: string[];
vws: VideoWS;
emitter: EventEmitter;
constructor({ vws, files, emitter }: BatchSendOptions) {
this.files = files;
this.vws = vws;
this.emitter = emitter || vws.emitter;
}
async init() {
const isConnected = await this.vws.isConnected();
if (!isConnected) {
console.error('链接失败:', isConnected);
}
this.send();
}
waitOne() {
return new Promise((resolve) => {
this.vws.emitter.once('result', (data) => {
resolve(data);
});
});
}
async checkAudioFile(file: string) {
const stats = fs.statSync(file);
if (!stats.isFile()) {
throw new Error(`File not found: ${file}`);
}
const ext = path.extname(file).toLowerCase();
const validExtensions = ['.wav', '.mp3', '.flac', '.ogg', '.aac'];
if (!validExtensions.includes(ext)) {
throw new Error(`Invalid file type: ${ext}. Supported types are: ${validExtensions.join(', ')}`);
}
const fileSize = stats.size;
if (fileSize === 0) {
throw new Error(`File is empty: ${file}`);
}
const maxSize = 100 * 1024 * 1024; // 100 MB
if (fileSize > maxSize) {
throw new Error(`File size exceeds limit: ${fileSize} bytes. Maximum allowed size is ${maxSize} bytes.`);
}
return {
file,
ext,
size: fileSize,
isValid: true,
};
}
async send() {
const textList: { file: string; text: string }[] = [];
for (const file of this.files) {
let wav_format = 'wav';
try {
const ck = await this.checkAudioFile(file);
if (ck.ext !== '.wav') {
wav_format = ck.ext.replace('.', '');
}
} catch (error) {
console.error('Error checking file:', error);
continue;
}
const data = fs.readFileSync(file);
const wait = this.waitOne();
await this.vws.sendBuffer(data, { wav_format });
await sleep(1000);
console.log('File sent:', file);
const result: VideoWsResult = (await wait) as any;
console.log('Result:', result.text);
textList.push({ file, text: result.text });
console.log('----------------------');
}
this.emitter.emit('send-done', textList);
}
}
// const batchSend = new BatchSendFiles({
// vws: ws,
// // files: [audioTestPath],
// files: [videoTestPath, audioTestPath],
// });
// batchSend.init();
// batchSend.emitter.on('send-done', (data) => {
// const matchText = '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。';
// const textList = data as { file: string; text: string }[];
// for (const item of textList) {
// const getText = item.text || '';
// const distance = natural.JaroWinklerDistance(getText, matchText);
// console.log(`File: ${item.file}, \nText: ${item.text}\nDistance: ${distance}`);
// }
// // console.log('Batch processing done:', data);
// });

View File

@@ -1,22 +1,16 @@
{ {
"name": "@kevisual/video-tools", "name": "@kevisual/video-tools",
"version": "0.0.1", "version": "0.0.5",
"description": "", "description": "",
"main": "index.js", "main": "index.js",
"basename": "/root/video-tools", "basename": "/root/video-tools",
"app": { "app": {
"key": "video-tools", "key": "video-tools",
"entry": "dist/app.mjs", "entry": "dist/app.mjs",
"type": "system-app", "type": "system-app"
"files": [
"dist"
]
}, },
"scripts": { "scripts": {
"watch": "rollup -c rollup.config.mjs -w",
"build": "rollup -c rollup.config.mjs", "build": "rollup -c rollup.config.mjs",
"dev": "cross-env NODE_ENV=development nodemon --delay 2.5 -e js,cjs,mjs --exec node dist/app.mjs",
"dev:watch": "cross-env NODE_ENV=development concurrently -n \"Watch,Dev\" -c \"green,blue\" \"npm run watch\" \"sleep 1 && npm run dev\" ",
"dev:bun": "bun run src/dev.ts --watch", "dev:bun": "bun run src/dev.ts --watch",
"test": "tsx test/**/*.ts", "test": "tsx test/**/*.ts",
"clean": "rm -rf dist", "clean": "rm -rf dist",
@@ -30,61 +24,55 @@
"types": "types/index.d.ts", "types": "types/index.d.ts",
"files": [ "files": [
"dist", "dist",
"src" "src",
"examples"
], ],
"publishConfig": { "publishConfig": {
"access": "public" "access": "public"
}, },
"dependencies": { "dependencies": {
"@kevisual/router": "0.0.10", "@gradio/client": "^1.15.1",
"@kevisual/use-config": "^1.0.10", "@kevisual/router": "0.0.21",
"@kevisual/video": "^0.0.1", "@kevisual/use-config": "^1.0.17",
"@picovoice/porcupine-node": "^3.0.6", "@kevisual/video": "^0.0.2",
"cookie": "^1.0.2", "cookie": "^1.0.2",
"crypto-js": "^4.2.0",
"dayjs": "^1.11.13", "dayjs": "^1.11.13",
"eventemitter3": "^5.0.1", "eventemitter3": "^5.0.1",
"formidable": "^3.5.2", "formidable": "^3.5.4",
"lodash-es": "^4.17.21", "lodash-es": "^4.17.21",
"nanoid": "^5.1.5", "nanoid": "^5.1.5"
"vosk": "^0.3.39"
}, },
"devDependencies": { "devDependencies": {
"@kevisual/types": "^0.0.6", "@alicloud/pop-core": "^1.8.0",
"@kevisual/use-config": "^1.0.10", "@kevisual/logger": "^0.0.4",
"@rollup/plugin-alias": "^5.1.1", "@kevisual/types": "^0.0.10",
"@rollup/plugin-commonjs": "^28.0.3", "@kevisual/use-config": "^1.0.17",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^16.0.1",
"@rollup/plugin-replace": "^6.0.2",
"@rollup/plugin-typescript": "^12.1.2",
"@types/crypto-js": "^4.2.2", "@types/crypto-js": "^4.2.2",
"@types/formidable": "^3.4.5", "@types/formidable": "^3.4.5",
"@types/lodash-es": "^4.17.12", "@types/lodash-es": "^4.17.12",
"@types/node": "^22.14.1", "@types/node": "^22.15.29",
"@types/vosk": "^0.3.1", "@types/vosk": "^0.3.1",
"@types/ws": "^8.18.1", "@types/ws": "^8.18.1",
"commander": "^13.1.0", "commander": "^14.0.0",
"concurrently": "^9.1.2", "concurrently": "^9.1.2",
"cross-env": "^7.0.3", "cross-env": "^7.0.3",
"dotenv": "^16.5.0", "dotenv": "^16.5.0",
"inquire": "^0.4.8", "inquire": "^0.4.8",
"ioredis": "^5.6.1", "ioredis": "^5.6.1",
"nodemon": "^3.1.9", "nodemon": "^3.1.10",
"pg": "^8.14.1", "pg": "^8.16.0",
"pino": "^9.6.0", "pm2": "^6.0.6",
"pino-pretty": "^13.0.0",
"pm2": "^6.0.5",
"rimraf": "^6.0.1", "rimraf": "^6.0.1",
"rollup": "^4.40.0",
"rollup-plugin-copy": "^3.5.0",
"rollup-plugin-dts": "^6.2.1",
"rollup-plugin-esbuild": "^6.2.1",
"sequelize": "^6.37.7", "sequelize": "^6.37.7",
"tape": "^5.9.0", "tape": "^5.9.0",
"tsup": "^8.4.0", "tsx": "^4.19.4",
"tsx": "^4.19.3",
"typescript": "^5.8.3", "typescript": "^5.8.3",
"ws": "npm:@kevisual/ws" "ws": "npm:@kevisual/ws"
}, },
"packageManager": "pnpm@10.8.1" "exports": {
"./src/*": "./src/*",
"./examples/*": "./examples/*"
},
"packageManager": "pnpm@10.11.1"
} }

1962
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,75 +0,0 @@
import resolve from '@rollup/plugin-node-resolve';
import commonjs from '@rollup/plugin-commonjs';
import json from '@rollup/plugin-json';
import path from 'path';
import esbuild from 'rollup-plugin-esbuild';
import alias from '@rollup/plugin-alias';
import replace from '@rollup/plugin-replace';
import pkgs from './package.json' with {type: 'json'};
const isDev = process.env.NODE_ENV === 'development';
const input = isDev ? './src/dev.ts' : './src/main.ts';
/**
* @type {import('rollup').RollupOptions}
*/
const config = {
input,
output: {
dir: './dist',
entryFileNames: 'app.mjs',
chunkFileNames: '[name]-[hash].mjs',
format: 'esm',
},
plugins: [
replace({
preventAssignment: true, // 防止意外赋值
DEV_SERVER: JSON.stringify(isDev), // 替换 process.env.NODE_ENV
APP_VERSION: JSON.stringify(pkgs.version),
}),
alias({
// only esbuild needs to be configured
entries: [
{ find: '@', replacement: path.resolve('src') }, // 配置 @ 为 src 目录
{ find: 'http', replacement: 'node:http' },
{ find: 'https', replacement: 'node:https' },
{ find: 'fs', replacement: 'node:fs' },
{ find: 'path', replacement: 'node:path' },
{ find: 'crypto', replacement: 'node:crypto' },
{ find: 'zlib', replacement: 'node:zlib' },
{ find: 'stream', replacement: 'node:stream' },
{ find: 'net', replacement: 'node:net' },
{ find: 'tty', replacement: 'node:tty' },
{ find: 'tls', replacement: 'node:tls' },
{ find: 'buffer', replacement: 'node:buffer' },
{ find: 'timers', replacement: 'node:timers' },
// { find: 'string_decoder', replacement: 'node:string_decoder' },
{ find: 'dns', replacement: 'node:dns' },
{ find: 'domain', replacement: 'node:domain' },
{ find: 'os', replacement: 'node:os' },
{ find: 'events', replacement: 'node:events' },
{ find: 'url', replacement: 'node:url' },
{ find: 'assert', replacement: 'node:assert' },
{ find: 'util', replacement: 'node:util' },
],
}),
resolve({
preferBuiltins: true, // 强制优先使用内置模块
}),
commonjs(),
esbuild({
target: 'node22', //
minify: false, // 启用代码压缩
tsconfig: 'tsconfig.json',
}),
json(),
],
external: [
/@kevisual\/router(\/.*)?/, //, // 路由
/@kevisual\/use-config(\/.*)?/, //
'sequelize', // 数据库 orm
'ioredis', // redis
'pg', // pg
],
};
export default config;

View File

@@ -0,0 +1,131 @@
type AliAsrServerOptions = {
baseUrl?: string;
appkey: string;
token: string;
format?: string;
sampleRate?: string;
enablePunctuationPrediction?: boolean;
enableInverseTextNormalization?: boolean;
enableVoiceDetection?: boolean;
};
export class AliAsrServer {
private baseUrl: string;
private appkey: string;
private token: string;
private format: string;
private sampleRate: string;
private enablePunctuationPrediction: boolean;
private enableInverseTextNormalization: boolean;
private enableVoiceDetection: boolean;
constructor(opts?: AliAsrServerOptions) {
const {
baseUrl = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr',
appkey = '',
token = '',
format,
sampleRate,
enablePunctuationPrediction = true,
enableInverseTextNormalization = true,
enableVoiceDetection = false,
} = opts || {};
this.baseUrl = baseUrl;
this.appkey = appkey;
this.token = token;
this.format = format;
this.sampleRate = sampleRate;
this.enablePunctuationPrediction = enablePunctuationPrediction;
this.enableInverseTextNormalization = enableInverseTextNormalization;
this.enableVoiceDetection = enableVoiceDetection;
}
buildRequestUrl(): string {
const params = new URLSearchParams();
params.append('appkey', this.appkey);
this.format && params.append('format', this.format);
this.sampleRate && params.append('sample_rate', this.sampleRate);
if (this.enablePunctuationPrediction) {
params.append('enable_punctuation_prediction', 'true');
}
if (this.enableInverseTextNormalization) {
params.append('enable_inverse_text_normalization', 'true');
}
if (this.enableVoiceDetection) {
params.append('enable_voice_detection', 'true');
}
return `${this.baseUrl}?${params.toString()}`;
}
async processAudio(audioContent: Buffer): Promise<any> {
try {
// 设置请求头
const headers = {
'X-NLS-Token': this.token,
'Content-Type': 'application/octet-stream',
};
// 构建请求URL
const requestUrl = this.buildRequestUrl();
// 发送请求
const response = await fetch(requestUrl, {
method: 'POST',
headers,
body: audioContent,
});
// 处理响应
if (!response.ok) {
console.log(`The audio file recognized failed, http code: ${response.status}`);
const v = await response.text();
console.log('The audio file recognized response:', v);
return null;
}
// 解析响应体
// console.log('The audio file recognized response:', v);
const body = await response.json();
if (body.status === 20000000) {
console.log('The audio file recognized result:');
console.log(body);
console.log('result: ' + body.result);
console.log('The audio file recognized succeed!');
return body;
} else {
console.log('The audio file recognized failed!');
console.log(body);
return null;
}
} catch (error) {
if (error.code === 'ENOENT') {
console.log('The audio file does not exist!');
} else {
console.log('Error during audio processing:', error);
}
return null;
}
}
}
// // 使用示例
// async function main() {
// const asrServer = new AliAsrServer({
// appkey: '填入appkey',
// token: '填入服务鉴权Token',
// format: 'pcm',
// sampleRate: '16000',
// enablePunctuationPrediction: true,
// enableInverseTextNormalization: true,
// enableVoiceDetection: false,
// });
// const audioFile = '/path/to/nls-sample-16k.wav';
// await asrServer.processAudio(audioFile);
// }
// // 执行主函数
// main().catch(console.error);

View File

@@ -0,0 +1,42 @@
import RPCClient from '@alicloud/pop-core';
interface TokenResponse {
Token: {
Id: string;
ExpireTime: number;
};
}
type AliCommonOptions = {
accessKeyId: string;
accessKeySecret: string;
};
export class AliCommon {
private accessKeyId: string;
private accessKeySecret: string;
private endpoint: string;
private apiVersion: string;
token = '';
expireTime = 0;
constructor(opts?: AliCommonOptions) {
this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
this.apiVersion = '2019-02-28';
}
async getToken() {
if (this.token && this.expireTime > Date.now()) {
return this.token;
}
const client = new RPCClient({
accessKeyId: this.accessKeyId,
accessKeySecret: this.accessKeySecret,
endpoint: this.endpoint,
apiVersion: this.apiVersion,
});
const result = await client.request<TokenResponse>('CreateToken', {});
this.token = result.Token.Id;
this.expireTime = result.Token.ExpireTime * 1000;
return result.Token.Id;
}
}

View File

@@ -0,0 +1,25 @@
import { AliAsrServer } from '../aliyun-asr-server.ts';
import fs from 'fs/promises';
import path from 'path';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
// 使用示例
async function main() {
const asrServer = new AliAsrServer({
appkey: process.env.ALI_ASR_APP_KEY,
token: process.env.ALI_ASR_TOKEN,
format: 'mp3',
// format: 'wav',
});
const audioContent = await fs.readFile(videoTestPath);
await asrServer.processAudio(audioContent);
}
// 执行主函数
main().catch(console.error);

View File

@@ -0,0 +1,10 @@
import dotenv from 'dotenv';
dotenv.config();
import { AliCommon } from '../base.ts';
const aliCommon = new AliCommon({
accessKeyId: process.env.ALIYUN_AK_ID,
accessKeySecret: process.env.ALIYUN_AK_SECRET,
});
aliCommon.getToken().then(console.log);

View File

@@ -3,40 +3,80 @@ import net from 'net';
import path from 'path'; import path from 'path';
import fs from 'fs'; import fs from 'fs';
const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
const ws = new VideoWS({ // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// url: 'wss://192.168.31.220:10095', // const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
url: 'wss://funasr.xiongxiao.me', const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
isFile: true, const videoTestPath3 = path.join(process.cwd(), 'funasr_test.wav');
onConnect: async () => { const name = 'output-1746007775571.mp3';
console.log('onConnect'); const videoTestPath2 = path.join(process.cwd(), 'build', name);
const data = fs.readFileSync(videoTestPath);
let sampleBuf = new Uint8Array(data);
var chunk_size = 960; // for asr chunk_size [5, 10, 5] const url = 'wss://funasr.xiongxiao.me';
let totalsend = 0; const url5 = 'https://1.15.101.247:10095'; // pro
let len = 0; // const ws = new VideoWS({
ws.start(); // // url: 'wss://192.168.31.220:10095',
while (sampleBuf.length >= chunk_size) { // url: 'wss://funasr.xiongxiao.me',
const sendBuf = sampleBuf.slice(0, chunk_size); // isFile: true,
totalsend = totalsend + sampleBuf.length; // // mode: 'offline',
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); // wav_format: 'mp3',
if (len === 100) { // onConnect: async () => {
// ws.stop(); // console.log('onConnect');
// ws.start(); // const data = fs.readFileSync(videoTestPath);
await new Promise((resolve) => setTimeout(resolve, 1000)); // let sampleBuf = new Uint8Array(data);
// var chunk_size = 960; // for asr chunk_size [5, 10, 5]
// let totalsend = 0;
// let len = 0;
// ws.start();
// while (sampleBuf.length >= chunk_size) {
// const sendBuf = sampleBuf.slice(0, chunk_size);
// totalsend = totalsend + sampleBuf.length;
// sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
// if (len === 100) {
// // ws.stop();
// // ws.start();
// // await new Promise((resolve) => setTimeout(resolve, 1000));
// }
// await new Promise((resolve) => setTimeout(resolve, 10));
// ws.send(sendBuf);
// len++;
// }
// await new Promise((resolve) => setTimeout(resolve, 1000));
// ws.stop();
// console.log('len', len);
// },
// });
// const server = net.createServer((socket) => {
// socket.on('data', (data) => {
// console.log('data', data);
// });
// });
// server.listen(10096);
const ws2 = new VideoWS({
url: url5,
mode: '2pass',
onConnect: async () => {
const data = fs.readFileSync(videoTestPath3);
// await ws2.sendBuffer(data, { wav_format: 'mp3' });
// await new Promise((resolve) => setTimeout(resolve, 1000));
// const data2 = fs.readFileSync(videoTestPath2);
// await ws2.sendBuffer(data2, { wav_format: 'mp3' });
ws2.emitter.on('message', (event) => {
console.log('message', event.data);
});
ws2.emitter.on('result', (result) => {
if (result.is_final) {
console.log('Final result:', result);
process.exit(0);
} }
ws.send(sendBuf); });
len++; await ws2.start();
} await ws2.sendBuffer(data, { online: true });
ws.stop(); setTimeout(() => {
console.log('len', len); ws2.stop();
}, 4000);
}, },
}); });
const server = net.createServer((socket) => {
socket.on('data', (data) => {
console.log('data', data);
});
});
server.listen(10096);

View File

@@ -1,35 +1,61 @@
import { VideoWS } from '../ws.ts'; import { VideoWS } from '../ws.ts';
import path from 'node:path';
import net from 'net'; import net from 'net';
import { Recording } from '../../../../recorder/index.ts'; import { Recording } from '../../../../recorder/index.ts';
import Stream from 'stream'; import Stream from 'stream';
import fs from 'node:fs'; // 新增
const recorder = new Recording({
sampleRate: 16000,
channels: 1, //
audioType: 'wav',
threshold: 0,
recorder: 'rec',
silence: '1.0',
endOnSilence: true,
});
const writeFilePath = path.join(process.cwd(), 'funasr_test.wav');
const fileStream = fs.createWriteStream(writeFilePath, { encoding: 'binary' });
const url = 'wss://funasr.xiongxiao.me';
const url3 = 'wss://pro.xiongxiao.me:10095';
const url4 = 'wss://121.4.112.18:10095'; // aliyun
const url5 = 'https://1.15.101.247:10095'; // pro
const recorder = new Recording();
const writeStream = new Stream.Writable();
const ws = new VideoWS({ const ws = new VideoWS({
url: 'wss://192.168.31.220:10095', url: url5,
isFile: false, isFile: false,
// mode: 'online',
mode: '2pass',
wsOptions: {
rejectUnauthorized: false,
},
onConnect: async () => { onConnect: async () => {
console.log('onConnect'); console.log('onConnect');
let chunks: Buffer = Buffer.alloc(0); ws.start();
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0; recorder.start();
let len = 0; let len = 0;
recorder.stream().on('data', (chunk) => { recorder.stream().on('data', (chunk) => {
chunks = Buffer.concat([chunks, chunk]); // ws.sendBuffer(chunk, { online: true });
if (chunks.length > chunk_size) { // console.log('Sending audio chunk:', chunk.length);
ws.send(chunks); ws.send(chunk)
totalsend += chunks.length; fileStream.write(chunk); // 新增:将音频数据写入文件
chunks = Buffer.alloc(0); len += chunk.length;
}
}); });
ws.start();
setTimeout(() => { setTimeout(() => {
ws.stop(); ws.stop();
fileStream.end(); // 新增:关闭文件流
setTimeout(() => { setTimeout(() => {
process.exit(0); process.exit(0);
}, 1000); }, 1000);
console.log('len', len); console.log('len', len);
}, 20000); }, 10 * 1000);
ws.emitter.on('message', (event) => {
console.log('message', event.data);
});
}, },
}); });
@@ -38,4 +64,4 @@ const server = net.createServer((socket) => {
console.log('data', data); console.log('data', data);
}); });
}); });
server.listen(10096); server.listen(10097);

View File

@@ -1,6 +1,7 @@
// import WebSocket from 'ws'; // import WebSocket from 'ws';
import { initWs } from '../../../ws-adapter/index.ts'; import { EventEmitter } from 'eventemitter3';
import { WSServer, WSSOptions } from '../../provider/ws-server.ts';
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
export type VideoWSOptions = { export type VideoWSOptions = {
url?: string; url?: string;
ws?: WebSocket; ws?: WebSocket;
@@ -8,10 +9,34 @@ export type VideoWSOptions = {
mode?: VideoWsMode; mode?: VideoWsMode;
isFile?: boolean; isFile?: boolean;
onConnect?: () => void; onConnect?: () => void;
wav_format?: string;
emitter?: EventEmitter;
} & {
wsOptions?: WSSOptions['wsOptions'];
}; };
export const videoWsMode = ['2pass', 'online', 'offline'] as const; export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number]; type VideoWsMode = (typeof videoWsMode)[number];
type OpenRequest = {
// 语音分片大小(单位: 毫秒):
chunk_size: number[];
// 音频文件名:
wav_name: string;
// 是否正在说话:
is_speaking: boolean;
// 分片间隔(单位: 毫秒):
chunk_interval: number;
// 逆文本标准化(ITN):
itn: boolean;
// 模式:
// '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
mode: VideoWsMode;
// 音频格式:
wav_format?: string; // 'wav' - PCM格式, 'mp3' - MP3格式等
// 音频采样率(单位: Hz):
audio_fs?: number;
// 热词列表:
hotwords?: string;
};
export type VideoWsResult = { export type VideoWsResult = {
isFinal: boolean; isFinal: boolean;
mode: VideoWsMode; mode: VideoWsMode;
@@ -21,48 +46,21 @@ export type VideoWsResult = {
wav_name: string; wav_name: string;
}; };
export class VideoWS { export class VideoWS extends WSServer {
ws: WebSocket;
itn?: boolean; itn?: boolean;
mode?: VideoWsMode; mode?: VideoWsMode;
isFile?: boolean; wav_format?: string;
onConnect?: () => void;
constructor(options?: VideoWSOptions) { constructor(options?: VideoWSOptions) {
super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
this.itn = options?.itn || false;
this.itn = options?.itn || false; this.itn = options?.itn || false;
this.mode = options?.mode || 'online'; this.mode = options?.mode || 'online';
this.isFile = options?.isFile || false; this.wav_format = options?.wav_format;
this.initWs(options);
}
async initWs(options: VideoWSOptions) {
if (options?.ws) {
this.ws = options.ws;
} else {
this.ws = await initWs(options.url);
}
this.onConnect = options?.onConnect || (() => {});
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
} }
async onOpen() { async start(opts?: Partial<OpenRequest>) {
this.onConnect();
}
async start() {
let isFileMode = this.isFile;
const chunk_size = new Array(5, 10, 5); const chunk_size = new Array(5, 10, 5);
type OpenRequest = { console.log('start', chunk_size);
chunk_size: number[];
wav_name: string;
is_speaking: boolean;
chunk_interval: number;
itn: boolean;
mode: VideoWsMode;
wav_format?: string;
audio_fs?: number;
hotwords?: string;
};
const request: OpenRequest = { const request: OpenRequest = {
chunk_size: chunk_size, chunk_size: chunk_size,
wav_name: 'h5', // wav_name: 'h5', //
@@ -70,16 +68,13 @@ export class VideoWS {
chunk_interval: 10, chunk_interval: 10,
itn: this.itn, itn: this.itn,
mode: this.mode || 'online', mode: this.mode || 'online',
...opts,
}; };
console.log('request', request); const file_sample_rate = 16000;
if (isFileMode) { request.wav_format = request.wav_format || this.wav_format || 'wav';
const file_ext = 'wav'; if ('wav' == request.wav_format) {
const file_sample_rate = 16000; request.wav_format = 'PCM';
request.wav_format = file_ext; request.audio_fs = file_sample_rate;
if (file_ext == 'wav') {
request.wav_format = 'PCM';
request.audio_fs = file_sample_rate;
}
} }
this.ws.send(JSON.stringify(request)); this.ws.send(JSON.stringify(request));
} }
@@ -99,11 +94,41 @@ export class VideoWS {
this.ws.send(data); this.ws.send(data);
} }
} }
/**
* 发送音频数据, 离线
* @param data 音频数据
* @param opts 选项
*/
async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string; online?: boolean }) {
const { wav_format = 'wav', online = false } = opts || {};
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
let sampleBuf = new Uint8Array(data);
const ws = this;
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
let len = 0;
if (!online) ws.start({ wav_format });
while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
await new Promise((resolve) => setTimeout(resolve, 10));
ws.send(sendBuf);
len++;
}
if (!online) ws.stop();
}
}
async onMessage(event: MessageEvent) { async onMessage(event: MessageEvent) {
super.onMessage(event);
const data = event.data; const data = event.data;
try { try {
const result = JSON.parse(data.toString()); const result = JSON.parse(data.toString());
console.log('result', result); if (result?.is_final !== undefined && result?.text) {
// console.log('result', result, typeof result);
this.emitter.emit('result', result);
}
// console.log('onMessage-result', result);
} catch (error) { } catch (error) {
console.log('error', error); console.log('error', error);
} }

View File

@@ -1,5 +1,5 @@
import * as zlib from 'zlib'; import * as zlib from 'node:zlib';
import { promisify } from 'util'; import { promisify } from 'node:util';
import { nanoid } from 'nanoid'; import { nanoid } from 'nanoid';
import { VolcEngineBase, uuid } from './base.ts'; import { VolcEngineBase, uuid } from './base.ts';
@@ -61,6 +61,39 @@ function generateBeforePayload(sequence: number): Buffer {
return beforePayload; return beforePayload;
} }
export type ParsedMessage = {
isLastPackage: boolean;
payloadSequence?: number;
payloadMsg?: {
audio_info?: {
duration: number;
};
result?: {
additions?: {
log_id?: string;
};
text?: string;
utterances?: Array<{
additions?: {
fixed_prefix_result?: string;
};
definite?: boolean;
end_time?: number;
start_time?: number;
text?: string;
words?: Array<{
end_time: number;
start_time: number;
text: string;
}>;
}>;
};
error?: any;
};
payloadSize?: number;
code?: number;
seq?: number;
};
/** /**
* Parse response from the WebSocket server * Parse response from the WebSocket server
*/ */
@@ -393,10 +426,11 @@ export class AsrWsClient extends VolcEngineBase {
// Wait for response // Wait for response
await sendVoice(audioData, segmentSize); await sendVoice(audioData, segmentSize);
} }
async onMessage(event: MessageEvent) { async onMessage(event: MessageEvent) {
try { try {
const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer)); const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
console.log(`Seq ${parsed.payloadSequence} response:`, parsed); // console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
if (typeof event.data === 'string') { if (typeof event.data === 'string') {
throw new Error('event.data is string: ' + event.data); throw new Error('event.data is string: ' + event.data);
} }
@@ -405,10 +439,9 @@ export class AsrWsClient extends VolcEngineBase {
this.emitter.emit('error', parsed); this.emitter.emit('error', parsed);
this.isError = true; this.isError = true;
} }
this.emitter.emit('message', parsed);
if (parsed.isLastPackage) { if (parsed.isLastPackage) {
this.emitter.emit('end', parsed); this.emitter.emit('end', parsed);
} else {
this.emitter.emit('message', parsed);
} }
} catch (error) { } catch (error) {
console.error('Error processing response:', error); console.error('Error processing response:', error);
@@ -440,6 +473,14 @@ export class AsrWsClient extends VolcEngineBase {
throw error; throw error;
} }
} }
async setIsEnd(isEnd: boolean) {
super.setIsEnd(isEnd);
if (isEnd) {
// 发送空白包
const emptyBuffer = Buffer.alloc(10000);
this.sendVoiceStream(emptyBuffer);
}
}
/** /**
* 发送语音流, 最小10000 * 发送语音流, 最小10000
* @param data * @param data

View File

@@ -238,7 +238,7 @@ interface AudioItem {
id: string | number; id: string | number;
path: string; path: string;
} }
// 流式语音识别
export class AsrWsClient extends VolcEngineBase { export class AsrWsClient extends VolcEngineBase {
private audioPath: string; private audioPath: string;
private cluster: string; private cluster: string;

View File

@@ -0,0 +1,136 @@
// https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts
import { nanoid } from "nanoid"
export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
export const AsrBase = 'volc.bigasr.auc'
export const AsrTurbo = 'volc.bigasr.auc_turbo'
const uuid = () => nanoid()
type AsrOptions = {
url?: string
appid?: string
token?: string
type?: AsrType
}
type AsrType = 'flash' | 'standard' | 'turbo'
export class Asr {
url: string = FlashURL
appid: string = ""
token: string = ""
type: AsrType = 'flash'
constructor(options: AsrOptions = {}) {
this.appid = options.appid || ""
this.token = options.token || ""
this.type = options.type || 'flash'
if (this.type !== 'flash') {
this.url = AsrBaseURL
}
if (!this.appid || !this.token) {
throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
}
}
header() {
const model = this.type === 'flash' ? AsrTurbo : AsrBase
return {
"X-Api-App-Key": this.appid,
"X-Api-Access-Key": this.token,
"X-Api-Resource-Id": model,
"X-Api-Request-Id": uuid(),
"X-Api-Sequence": "-1",
}
}
submit(body: AsrRequest) {
if (!body.audio || (!body.audio.url && !body.audio.data)) {
throw new Error("audio.url or audio.data is required")
}
const data: AsrRequest = {
...body,
}
return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
}
async getText(body: AsrRequest) {
const res = await this.submit(body)
return res.json()
}
}
export type AsrResponse = {
audio_info: {
/**
* 音频时长,单位为 ms
*/
duration: number;
};
result: {
additions: {
duration: string;
};
text: string;
utterances: Array<{
end_time: number;
start_time: number;
text: string;
words: Array<{
confidence: number;
end_time: number;
start_time: number;
text: string;
}>;
}>;
};
}
export interface AsrRequest {
user?: {
uid: string;
};
audio: {
url?: string;
data?: string;
format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
codec?: 'raw' | 'opus'; // raw / opus默认为 raw(pcm) 。
rate?: 8000 | 16000; // 采样率,支持 8000 或 16000默认为 16000 。
channel?: 1 | 2; // 声道数,支持 1 或 2默认为 1。
};
request?: {
model_name?: string; // 识别模型名称,如 "bigmodel"
enable_words?: boolean; // 是否开启词级别时间戳,默认为 false。
enable_sentence_info?: boolean; // 是否开启句子级别时间戳,默认为 false。
enable_utterance_info?: boolean; // 是否开启语句级别时间戳,默认为 true。
enable_punctuation_prediction?: boolean; // 是否开启标点符号预测,默认为 true。
enable_inverse_text_normalization?: boolean; // 是否开启文本规范化,默认为 true。
enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别,默认为 false。
audio_channel_count?: 1 | 2; // 音频声道数,仅在 enable_separate_recognition_per_channel 开启时有效,支持 1 或 2默认为 1。
max_sentence_silence?: number; // 句子最大静音时间,仅在 enable_sentence_info 开启时有效,单位为 ms默认为 800。
custom_words?: string[];
enable_channel_split?: boolean; // 是否开启声道分离
enable_ddc?: boolean; // 是否开启 DDC双通道降噪
enable_speaker_info?: boolean; // 是否开启说话人分离
enable_punc?: boolean; // 是否开启标点符号预测(简写)
enable_itn?: boolean; // 是否开启文本规范化(简写)
vad_segment?: boolean; // 是否开启 VAD 断句
show_utterances?: boolean; // 是否返回语句级别结果
corpus?: {
boosting_table_name?: string;
correct_table_name?: string;
context?: string;
};
};
}
// const main = async () => {
// const base64Audio = wavToBase64(audioPath);
// const auc = new Asr({
// appid: config.VOLCENGINE_AUC_APPID,
// token: config.VOLCENGINE_AUC_TOKEN,
// });
// const result = await auc.getText({ audio: { data: base64Audio } });
// console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
// }
// main();

View File

@@ -1,4 +1,3 @@
import { initWs } from '../../../ws-adapter/index.ts';
import { WSServer } from '../../provider/ws-server.ts'; import { WSServer } from '../../provider/ws-server.ts';
import { nanoid } from 'nanoid'; import { nanoid } from 'nanoid';

View File

@@ -7,15 +7,22 @@ import fs from 'fs';
const main = async () => { const main = async () => {
const audioId = '123'; const audioId = '123';
const asrClient = new AsrWsClient({ const asrClient = new AsrWsClient({
appid: config.APP_ID, appid: config.VOLCENGINE_ASR_MODEL_APPID,
token: config.TOKEN, token: config.VOLCENGINE_ASR_MODEL_TOKEN,
}); });
asrClient.emitter.on('message', (result) => {
console.log('识别结果', JSON.stringify(result, null, 2));
})
asrClient.emitter.on('end', (result) => {
console.log('识别结束', JSON.stringify(result, null, 2));
})
await new Promise((resolve) => setTimeout(resolve, 2000)); await new Promise((resolve) => setTimeout(resolve, 2000));
const data = fs.readFileSync(audioPath); const data = fs.readFileSync(audioPath);
await asrClient.sendVoiceFile(data); await asrClient.sendVoiceFile(data);
await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath)); // await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
asrClient.setIsEnd(true); asrClient.setIsEnd(true);
await asrClient.sendVoiceFile(fs.readFileSync(audioPath2)); // await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
}; };
main(); main();

View File

@@ -0,0 +1,21 @@
import { audioPath, config, sleep } from './common.ts';
import { Asr } from '../auc.ts';
import fs from 'fs';
import util from 'node:util';
const wavToBase64 = (filePath: string) => {
const data = fs.readFileSync(filePath);
return data.toString('base64');
};
const main = async () => {
const base64Audio = wavToBase64(audioPath);
const auc = new Asr({
appid: config.VOLCENGINE_AUC_APPID,
token: config.VOLCENGINE_AUC_TOKEN,
});
const result = await auc.getText({ audio: { data: base64Audio } });
console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
}
main();

View File

@@ -1,7 +1,7 @@
import { EventEmitter } from 'eventemitter3'; import { EventEmitter } from 'eventemitter3';
import { initWs } from '../../ws-adapter/index.ts'; import { initWs } from '../../ws-adapter/index.ts';
import type { ClientOptions } from 'ws'; import type { ClientOptions } from 'ws';
type WSSOptions = { export type WSSOptions = {
url: string; url: string;
ws?: WebSocket; ws?: WebSocket;
onConnect?: () => void; onConnect?: () => void;
@@ -45,7 +45,7 @@ export class WSServer {
*/ */
async onOpen() { async onOpen() {
this.connected = true; this.connected = true;
this.onConnect(); this?.onConnect?.();
this.emitter.emit('open'); this.emitter.emit('open');
} }
/** /**

View File

@@ -1,37 +1,6 @@
import { pino } from 'pino'; import { Logger } from '@kevisual/logger/node';
import { useConfig } from '@kevisual/use-config/env';
const config = useConfig(); const level = process.env.LOG_LEVEL || 'info';
export const logger = new Logger({
export const logger = pino({ level: level as any,
level: config.LOG_LEVEL || 'info',
transport: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard',
ignore: 'pid,hostname',
},
},
serializers: {
error: pino.stdSerializers.err,
req: pino.stdSerializers.req,
res: pino.stdSerializers.res,
},
// base: {
// app: 'ai-videos',
// env: process.env.NODE_ENV || 'development',
// },
}); });
export const logError = (message: string, data?: any) => logger.error({ data }, message);
export const logWarning = (message: string, data?: any) => logger.warn({ data }, message);
export const logInfo = (message: string, data?: any) => logger.info({ data }, message);
export const logDebug = (message: string, data?: any) => logger.debug({ data }, message);
export const log = {
error: logError,
warn: logWarning,
info: logInfo,
debug: logDebug,
};

View File

@@ -1,9 +1,8 @@
import assert from 'assert'; import assert from 'assert';
import { logDebug, logInfo } from '../logger/index.ts'; import { logger } from '../logger/index.ts';
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'; import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
import recorders from '../recorder/recorders/index.ts'; import recorders from '../recorder/recorders/index.ts';
import Stream from 'stream'; import Stream from 'stream';
export type RecordingOptions = { export type RecordingOptions = {
/* 采样率默认为16000 */ /* 采样率默认为16000 */
sampleRate?: number; sampleRate?: number;
@@ -64,9 +63,9 @@ export class Recording {
this.args = args; this.args = args;
this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions); this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions);
logDebug(`Started recording`); logger.debug(`Started recording`);
logDebug('options', this.options); logger.debug('options', this.options);
logDebug(` ${this.cmd} ${this.args.join(' ')}`); logger.debug(` ${this.cmd} ${this.args.join(' ')}`);
return this.start(); return this.start();
} }
@@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`,
}); });
err.on('data', (chunk) => { err.on('data', (chunk) => {
logDebug(`STDERR: ${chunk}`); logger.debug(`STDERR: ${chunk}`);
}); });
rec.on('data', (chunk) => { rec.on('data', (chunk) => {
logDebug(`Recording ${chunk.length} bytes`); logger.debug(`Recording ${chunk.length} bytes`);
}); });
rec.on('end', () => { rec.on('end', () => {
logDebug('Recording ended'); logger.debug('Recording ended');
}); });
return this; return this;
@@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`,
this.process.kill('SIGSTOP'); this.process.kill('SIGSTOP');
this._stream.pause(); this._stream.pause();
logDebug('Paused recording'); logger.debug('Paused recording');
} }
resume() { resume() {
@@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`,
this.process.kill('SIGCONT'); this.process.kill('SIGCONT');
this._stream.resume(); this._stream.resume();
logDebug('Resumed recording'); logger.debug('Resumed recording');
} }
isPaused() { isPaused() {

View File

@@ -0,0 +1,30 @@
import { Client } from '@gradio/client';
import path from 'node:path';
import fs from 'node:fs';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
const textPath = path.join(process.cwd(), 'build', '01-kevisual.md');
const exampleAudio = fs.readFileSync(videoTestPath);
// const exampleAudio = await response_0.blob();
const text = fs.readFileSync(textPath, 'utf-8');
const client = await Client.connect('http://192.168.31.220:50000/');
const result = await client.predict('/generate_audio', {
// tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
tts_text: text,
mode_checkbox_group: '3s极速复刻',
sft_dropdown: '',
prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。',
prompt_wav_upload: exampleAudio,
prompt_wav_record: null,
instruct_text: '',
seed: 3,
stream: false,
speed: 1,
});
console.log(result.data);

View File

@@ -0,0 +1,53 @@
import { Client } from '@gradio/client';
type CosyVoiceTTSOptions = {
url: string;
};
type AudioOptions = {
tts_text: string;
mode_checkbox_group: string;
sft_dropdown: string;
prompt_text: string;
prompt_wav_upload?: any;
prompt_wav_record: any | null;
instruct_text: string;
seed: number;
stream: boolean;
speed: number;
};
export class CosyVoiceTTS {
private client: Client;
private url: string;
isInit = false;
constructor(opts?: CosyVoiceTTSOptions) {
this.url = opts?.url || 'http://localhost:50000/';
}
async init() {
const url = this.url;
const client = await Client.connect(url);
this.client = client;
this.isInit = true;
return true;
}
generateAudio = async (opts?: Partial<AudioOptions>) => {
if (!this.isInit) {
await this.init();
}
const data: AudioOptions = {
tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
mode_checkbox_group: '3s极速复刻',
sft_dropdown: '',
prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。',
// prompt_wav_upload: exampleAudio,
prompt_wav_record: null,
instruct_text: '',
seed: 3,
stream: false,
speed: 1,
...opts,
};
const result = await this.client.predict('/generate_audio', data);
return result;
};
}

View File

@@ -2,7 +2,7 @@ import vosk from 'vosk';
import { Recording } from '../../recorder/index.ts'; import { Recording } from '../../recorder/index.ts';
import fs from 'fs'; import fs from 'fs';
import path from 'path'; import path from 'path';
import { audioPath, sleep } from './common.ts'; import { audioPath, sleep, mySpeechText } from './common.ts';
import { encodeWav, decodeWav } from '../../utils/convert.ts'; import { encodeWav, decodeWav } from '../../utils/convert.ts';
// 需要先下载Vosk模型 // 需要先下载Vosk模型
// const MODEL_PATH = 'vosk-model-small-en-us-0.15'; // const MODEL_PATH = 'vosk-model-small-en-us-0.15';
@@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) {
const wakeWords = ['欢迎']; // 自定义唤醒词列表 const wakeWords = ['欢迎']; // 自定义唤醒词列表
const audioBuffer = fs.readFileSync(audioFilePath); const audioBuffer = fs.readFileSync(audioFilePath);
const pcmBuffer = decodeWav(audioBuffer); const pcmBuffer = decodeWav(audioBuffer);
const result = await rec.acceptWaveformAsync(pcmBuffer); const result = rec.acceptWaveform(pcmBuffer);
console.log('result', result, rec.result()); console.log('result', result, rec.result());
// const result = await rec.acceptWaveformAsync(pcmBuffer);
// console.log('result', result, rec.result());
// return new Promise((resolve) => { // return new Promise((resolve) => {
// const pcmBufferLength = Buffer.byteLength(pcmBuffer); // const pcmBufferLength = Buffer.byteLength(pcmBuffer);
// console.log('pcmBufferLength', pcmBufferLength); // console.log('pcmBufferLength', pcmBufferLength);
@@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) {
// }); // });
} }
detectWithVosk(audioPath).then((result) => { // detectWithVosk(audioPath).then((result) => {
// console.log('result', result);
// });
detectWithVosk(mySpeechText).then((result) => {
console.log('result', result); console.log('result', result);
}); });

View File

@@ -6,7 +6,13 @@ export const config = dotenv.config({
}).parsed; }).parsed;
export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav'); export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav'); export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav'); export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const model_all = 'models/vosk-model-cn-0.22';
const model_small = 'models/vosk-model-small-cn-0.22';
export const MODEL_PATH = path.join(process.cwd(), model_small);
// export const MODEL_PATH = path.join(process.cwd(), model_all);

178
src/wake/test/stream.ts Normal file
View File

@@ -0,0 +1,178 @@
import vosk from 'vosk';
import { Recording } from '../../recorder/index.ts';
import fs, { WriteStream } from 'fs';
import path from 'path';
import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
import { encodeWav, decodeWav } from '../../utils/convert.ts';
const streamText = async (audioFilePath: string) => {
if (!fs.existsSync(MODEL_PATH)) {
console.error('请先下载Vosk模型');
return false;
}
const model = new vosk.Model(MODEL_PATH);
const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
const audioBuffer = fs.readFileSync(audioFilePath);
const pcmBuffer = decodeWav(audioBuffer);
for (let i = 0; i < pcmBuffer.length; i += 1024) {
const chunk = pcmBuffer.subarray(i, i + 1024);
if (rec.acceptWaveform(chunk)) {
const result = rec.result();
console.log('Streamed Result:', result);
} else {
const partialResult = rec.partialResult();
console.log('Partial Result:', partialResult);
}
// await sleep(100); // 模拟延时
}
return true;
};
// 测试流式处理
// streamText(mySpeechText)
// .then((result) => {
// console.log('Final Result:', result);
// })
// .catch((error) => {
// console.error('Error during streaming:', error);
// });
const record = async () => {
const recording = new Recording({
sampleRate: 16000,
channels: 1,
});
recording.start();
const stream = recording.stream();
console.log('Recording started...', stream);
const model = new vosk.Model(MODEL_PATH);
const rec = new vosk.Recognizer({
model: model,
sampleRate: 16000,
grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
});
console.log('Vosk Recognizer initialized...');
// 创建累积缓冲区
let accumulatedBuffer = Buffer.alloc(0);
const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
stream.on('data', (data: Buffer) => {
// const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
const pcmBuffer = data; // 假设数据已经是PCM格式
// 将新数据追加到累积缓冲区
accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
// 当积累的数据足够大时处理它
if (accumulatedBuffer.length >= PROCESS_SIZE) {
if (rec.acceptWaveform(accumulatedBuffer)) {
const result = rec.result();
console.log('Recorded Result:', result);
// 检查是否包含唤醒词
if (result.text) {
const detect = detectWakeWord(result.text);
if (detect.detected) {
console.log(`检测到唤醒词: "${detect.word}",置信度: ${detect.confidence}`);
}
// 执行唤醒后的操作
}
} else {
const partialResult = rec.partialResult();
console.log('Partial Result:', partialResult);
}
// 清空累积缓冲区
accumulatedBuffer = Buffer.alloc(0);
}
});
// 添加停止录音的处理
stream.on('end', () => {
// 处理剩余的缓冲区数据
if (accumulatedBuffer.length > 0) {
if (rec.acceptWaveform(accumulatedBuffer)) {
const result = rec.result();
console.log('Final Recorded Result:', result);
}
}
// 获取最终结果
const finalResult = rec.finalResult();
console.log('Final Complete Result:', finalResult);
// 释放资源
rec.free();
model.free();
});
// 返回一个用于停止录音的函数
return {
stop: () => {
recording.stop();
},
};
};
// 添加唤醒配置
const wakeConfig = {
words: ['你好小小', '嗨小小', '小小', '秀秀'],
threshold: 0.75, // 匹配置信度阈值
minWordCount: 2, // 最小词数
};
// 优化唤醒词检测
function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
let bestMatch = { detected: false, confidence: 0, word: '' };
for (const wakeWord of wakeConfig.words) {
// 计算文本与唤醒词的相似度
const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
bestMatch = { detected: true, confidence, word: wakeWord };
}
}
return bestMatch;
}
// 简单的字符串相似度计算函数
function calculateSimilarity(str1: string, str2: string): number {
if (str1.includes(str2)) return 1.0;
// 计算莱文斯坦距离的简化版本
const longer = str1.length > str2.length ? str1 : str2;
const shorter = str1.length > str2.length ? str2 : str1;
// 如果短字符串为空相似度为0
if (shorter.length === 0) return 0;
// 简单的相似度计算 - 可以替换为更复杂的算法
let matchCount = 0;
for (let i = 0; i <= longer.length - shorter.length; i++) {
const segment = longer.substring(i, i + shorter.length);
let localMatches = 0;
for (let j = 0; j < shorter.length; j++) {
if (segment[j] === shorter[j]) localMatches++;
}
matchCount = Math.max(matchCount, localMatches);
}
return matchCount / shorter.length;
}
// 启动录音并在适当的时候停止
(async () => {
const recorder = await record();
// 可选30秒后自动停止录音
setTimeout(() => {
console.log('Stopping recording...');
recorder.stop();
}, 10 * 30 * 1000);
})();

View File

@@ -1,6 +1,15 @@
const isBrowser = process?.env?.BROWSER === 'true'; const isBrowser = (typeof process === 'undefined') ||
import { EventEmitter } from 'events'; (typeof window !== 'undefined' && typeof window.document !== 'undefined') ||
(typeof process !== 'undefined' && process?.env?.BROWSER === 'true');
const chantHttpToWs = (url: string) => {
if (url.startsWith('http://')) {
return url.replace('http://', 'ws://');
}
if (url.startsWith('https://')) {
return url.replace('https://', 'wss://');
}
return url;
};
type WebSocketOptions = { type WebSocketOptions = {
/** /**
* 是否拒绝不安全的证书, in node only * 是否拒绝不安全的证书, in node only
@@ -11,13 +20,14 @@ type WebSocketOptions = {
}; };
export const initWs = async (url: string, options?: WebSocketOptions) => { export const initWs = async (url: string, options?: WebSocketOptions) => {
let ws: WebSocket; let ws: WebSocket;
url = chantHttpToWs(url);
if (isBrowser) { if (isBrowser) {
ws = new WebSocket(url); ws = new WebSocket(url);
} else { } else {
const WebSocket = await import('ws').then((module) => module.default); const WebSocket = await import('ws').then((module) => module.default);
const { rejectUnauthorized, headers, ...rest } = options || {}; const { rejectUnauthorized, headers, ...rest } = options || {};
ws = new WebSocket(url, { ws = new WebSocket(url, {
rejectUnauthorized: rejectUnauthorized || true, rejectUnauthorized: rejectUnauthorized ?? true,
headers: headers, headers: headers,
...rest, ...rest,
}) as any; }) as any;
@@ -30,12 +40,3 @@ interface EventEmitterOptions {
*/ */
captureRejections?: boolean | undefined; captureRejections?: boolean | undefined;
} }
/**
* 初始化一个事件发射器
* @param opts 事件发射器选项
* @returns 事件发射器
*/
export const initEmitter = (opts?: EventEmitterOptions) => {
const emitter = new EventEmitter(opts);
return emitter;
};

View File

@@ -1,41 +0,0 @@
import { defineConfig } from 'tsup';
// import glob from 'fast-glob';
// const services = glob.sync('src/services/*.ts');
import fs from 'fs';
const clean = () => {
const distDir = 'dist';
if (fs.existsSync(distDir)) {
fs.rmSync(distDir, { recursive: true, force: true });
}
};
clean();
const entrys = ['src/index.ts'];
const nodeEntrys = ['src/dev.ts'];
const getCommonConfig = (opts = {}) => {
return {
entry: opts.entry,
outExtension: ({ format }) => ({
js: format === 'esm' ? '.mjs' : '.js',
}),
splitting: false,
sourcemap: false,
// clean: true,
format: 'esm',
external: ['dotenv'],
dts: true,
outDir: 'dist',
tsconfig: 'tsconfig.json',
...opts,
define: {
'process.env.IS_BROWSER': JSON.stringify(process.env.BROWSER || false),
...opts.define,
},
};
};
export default defineConfig([
// getCommonConfig({ entry: entrys, define: { 'process.env.IS_BROWSER': JSON.stringify(true) } }), // 浏览器
getCommonConfig({ entry: nodeEntrys, define: { 'process.env.IS_BROWSER': JSON.stringify(false) } }), // node
]);

View File

@@ -0,0 +1 @@
在一无所知中,梦里的一天结束了一个新的轮回,便会开始。