fix funasr

This commit is contained in:
熊潇 2025-05-19 01:01:38 +08:00
parent 8e04962cc1
commit a1df51f56b
7 changed files with 176 additions and 129 deletions

76
.gitignore vendored
View File

@ -1,21 +1,69 @@
node_modules node_modules
dist # mac
.DS_Store
app.config.json5
apps.config.json
deploy.tar.gz
cache-file
/apps
logs
.env* .env*
!.env.example !.env*example
dist
build
logs
.turbo
pack-dist
# astro
.astro
# next
.next
# nuxt
.nuxt
# vercel
.vercel
# vuepress
.vuepress/dist
# coverage
coverage/
# typescript
*.tsbuildinfo
# debug logs
*.log
*.tmp
# vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
# idea
.idea
# system
Thumbs.db
ehthumbs.db
Desktop.ini
# temp files
*.tmp
*.temp
# local development
*.local
public/r
.pnpm-store
models models
videos/tts_mix.wav videos/tts_mix.mp3

View File

@ -49,6 +49,7 @@
"vosk": "^0.3.39" "vosk": "^0.3.39"
}, },
"devDependencies": { "devDependencies": {
"@kevisual/logger": "^0.0.3",
"@kevisual/types": "^0.0.6", "@kevisual/types": "^0.0.6",
"@kevisual/use-config": "^1.0.10", "@kevisual/use-config": "^1.0.10",
"@rollup/plugin-alias": "^5.1.1", "@rollup/plugin-alias": "^5.1.1",

View File

@ -3,40 +3,62 @@ import net from 'net';
import path from 'path'; import path from 'path';
import fs from 'fs'; import fs from 'fs';
const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
const ws = new VideoWS({ // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// url: 'wss://192.168.31.220:10095', const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
url: 'wss://funasr.xiongxiao.me', // const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
isFile: true, const name = 'output-1746007775571.mp3';
onConnect: async () => { const url = 'wss://funasr.xiongxiao.me';
console.log('onConnect'); const videoTestPath2 = path.join(process.cwd(), 'build', name);
const data = fs.readFileSync(videoTestPath); // const ws = new VideoWS({
let sampleBuf = new Uint8Array(data); // // url: 'wss://192.168.31.220:10095',
// url: 'wss://funasr.xiongxiao.me',
// isFile: true,
// // mode: 'offline',
// wav_format: 'mp3',
// onConnect: async () => {
// console.log('onConnect');
// const data = fs.readFileSync(videoTestPath);
// let sampleBuf = new Uint8Array(data);
var chunk_size = 960; // for asr chunk_size [5, 10, 5] // var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0; // let totalsend = 0;
let len = 0; // let len = 0;
ws.start(); // ws.start();
while (sampleBuf.length >= chunk_size) { // while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size); // const sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length; // totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); // sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
if (len === 100) { // if (len === 100) {
// ws.stop(); // // ws.stop();
// ws.start(); // // ws.start();
await new Promise((resolve) => setTimeout(resolve, 1000)); // // await new Promise((resolve) => setTimeout(resolve, 1000));
} // }
ws.send(sendBuf);
len++; // await new Promise((resolve) => setTimeout(resolve, 10));
} // ws.send(sendBuf);
ws.stop(); // len++;
console.log('len', len); // }
// await new Promise((resolve) => setTimeout(resolve, 1000));
// ws.stop();
// console.log('len', len);
// },
// });
// const server = net.createServer((socket) => {
// socket.on('data', (data) => {
// console.log('data', data);
// });
// });
// server.listen(10096);
const ws2 = new VideoWS({
url: url,
onConnect: async () => {
const data = fs.readFileSync(videoTestPath);
await ws2.sendBuffer(data, { wav_format: 'mp3' });
await new Promise((resolve) => setTimeout(resolve, 1000));
const data2 = fs.readFileSync(videoTestPath2);
await ws2.sendBuffer(data2, { wav_format: 'mp3' });
}, },
}); });
const server = net.createServer((socket) => {
socket.on('data', (data) => {
console.log('data', data);
});
});
server.listen(10096);

View File

@ -1,5 +1,7 @@
// import WebSocket from 'ws'; // import WebSocket from 'ws';
import { initWs } from '../../../ws-adapter/index.ts'; import { initWs } from '../../../ws-adapter/index.ts';
import { logger } from '@/logger/index.ts';
import { WSServer } from '../../provider/ws-server.ts';
export type VideoWSOptions = { export type VideoWSOptions = {
url?: string; url?: string;
@ -8,10 +10,22 @@ export type VideoWSOptions = {
mode?: VideoWsMode; mode?: VideoWsMode;
isFile?: boolean; isFile?: boolean;
onConnect?: () => void; onConnect?: () => void;
wav_format?: string;
}; };
export const videoWsMode = ['2pass', 'online', 'offline'] as const; export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number]; type VideoWsMode = (typeof videoWsMode)[number];
type OpenRequest = {
chunk_size: number[];
wav_name: string;
is_speaking: boolean;
chunk_interval: number;
// 逆文本标准化(ITN):
itn: boolean;
mode: VideoWsMode;
wav_format?: string;
audio_fs?: number;
hotwords?: string;
};
export type VideoWsResult = { export type VideoWsResult = {
isFinal: boolean; isFinal: boolean;
mode: VideoWsMode; mode: VideoWsMode;
@ -21,48 +35,21 @@ export type VideoWsResult = {
wav_name: string; wav_name: string;
}; };
export class VideoWS { export class VideoWS extends WSServer {
ws: WebSocket;
itn?: boolean; itn?: boolean;
mode?: VideoWsMode; mode?: VideoWsMode;
isFile?: boolean; wav_format?: string;
onConnect?: () => void;
constructor(options?: VideoWSOptions) { constructor(options?: VideoWSOptions) {
super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect });
this.itn = options?.itn || false;
this.itn = options?.itn || false; this.itn = options?.itn || false;
this.mode = options?.mode || 'online'; this.mode = options?.mode || 'online';
this.isFile = options?.isFile || false; this.wav_format = options?.wav_format;
this.initWs(options);
}
async initWs(options: VideoWSOptions) {
if (options?.ws) {
this.ws = options.ws;
} else {
this.ws = await initWs(options.url);
}
this.onConnect = options?.onConnect || (() => {});
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
} }
async onOpen() { async start(opts?: Partial<OpenRequest>) {
this.onConnect();
}
async start() {
let isFileMode = this.isFile;
const chunk_size = new Array(5, 10, 5); const chunk_size = new Array(5, 10, 5);
type OpenRequest = {
chunk_size: number[];
wav_name: string;
is_speaking: boolean;
chunk_interval: number;
itn: boolean;
mode: VideoWsMode;
wav_format?: string;
audio_fs?: number;
hotwords?: string;
};
const request: OpenRequest = { const request: OpenRequest = {
chunk_size: chunk_size, chunk_size: chunk_size,
wav_name: 'h5', // wav_name: 'h5', //
@ -70,17 +57,16 @@ export class VideoWS {
chunk_interval: 10, chunk_interval: 10,
itn: this.itn, itn: this.itn,
mode: this.mode || 'online', mode: this.mode || 'online',
...opts,
}; };
console.log('request', request); const file_sample_rate = 16000;
if (isFileMode) { request.wav_format = request.wav_format || this.wav_format || 'wav';
const file_ext = 'wav'; if ('wav' == request.wav_format) {
const file_sample_rate = 16000; request.wav_format = 'PCM';
request.wav_format = file_ext; request.audio_fs = file_sample_rate;
if (file_ext == 'wav') {
request.wav_format = 'PCM';
request.audio_fs = file_sample_rate;
}
} }
console.log('request', request);
this.ws.send(JSON.stringify(request)); this.ws.send(JSON.stringify(request));
} }
async stop() { async stop() {
@ -99,7 +85,28 @@ export class VideoWS {
this.ws.send(data); this.ws.send(data);
} }
} }
async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string }) {
const { wav_format = 'wav' } = opts || {};
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
let sampleBuf = new Uint8Array(data);
const ws = this;
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
let len = 0;
ws.start({ wav_format });
while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
await new Promise((resolve) => setTimeout(resolve, 10));
ws.send(sendBuf);
len++;
}
ws.stop();
}
}
async onMessage(event: MessageEvent) { async onMessage(event: MessageEvent) {
super.onMessage(event);
const data = event.data; const data = event.data;
try { try {
const result = JSON.parse(data.toString()); const result = JSON.parse(data.toString());

View File

@ -1,37 +1,6 @@
import { pino } from 'pino'; import { Logger } from '@kevisual/logger/node';
import { useConfig } from '@kevisual/use-config/env';
const config = useConfig(); const level = process.env.LOG_LEVEL || 'info';
export const logger = new Logger({
export const logger = pino({ level: level as any,
level: config.LOG_LEVEL || 'info',
transport: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard',
ignore: 'pid,hostname',
},
},
serializers: {
error: pino.stdSerializers.err,
req: pino.stdSerializers.req,
res: pino.stdSerializers.res,
},
// base: {
// app: 'ai-videos',
// env: process.env.NODE_ENV || 'development',
// },
}); });
export const logError = (message: string, data?: any) => logger.error({ data }, message);
export const logWarning = (message: string, data?: any) => logger.warn({ data }, message);
export const logInfo = (message: string, data?: any) => logger.info({ data }, message);
export const logDebug = (message: string, data?: any) => logger.debug({ data }, message);
export const log = {
error: logError,
warn: logWarning,
info: logInfo,
debug: logDebug,
};

View File

@ -1,9 +1,9 @@
import assert from 'assert'; import assert from 'assert';
import { logDebug, logInfo } from '../logger/index.ts'; import { logger } from '../logger/index.ts';
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'; import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
import recorders from '../recorder/recorders/index.ts'; import recorders from '../recorder/recorders/index.ts';
import Stream from 'stream'; import Stream from 'stream';
const logDebug = logger.debug;
export type RecordingOptions = { export type RecordingOptions = {
/* 采样率默认为16000 */ /* 采样率默认为16000 */
sampleRate?: number; sampleRate?: number;