From a1df51f56b14b919205cc331682fcb192c0717cd Mon Sep 17 00:00:00 2001 From: abearxiong Date: Mon, 19 May 2025 01:01:38 +0800 Subject: [PATCH] fix funasr --- .gitignore | 76 +++++++++++--- package.json | 1 + src/asr/provider/funasr/test/get-text.ts | 90 ++++++++++------- src/asr/provider/funasr/ws.ts | 95 ++++++++++-------- src/logger/index.ts | 39 +------ src/recorder/index.ts | 4 +- .../my_speech_text.wav | Bin 7 files changed, 176 insertions(+), 129 deletions(-) rename my_speech_text.wav => videos/my_speech_text.wav (100%) diff --git a/.gitignore b/.gitignore index 5b7164b..5981f55 100644 --- a/.gitignore +++ b/.gitignore @@ -1,21 +1,69 @@ node_modules -dist - -app.config.json5 - -apps.config.json - -deploy.tar.gz -cache-file - -/apps - -logs +# mac +.DS_Store .env* -!.env.example +!.env*example +dist +build +logs + +.turbo + +pack-dist + +# astro +.astro + +# next +.next + +# nuxt +.nuxt + +# vercel +.vercel + +# vuepress +.vuepress/dist + +# coverage +coverage/ + +# typescript +*.tsbuildinfo + +# debug logs +*.log +*.tmp + +# vscode +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json + +# idea +.idea + +# system +Thumbs.db +ehthumbs.db +Desktop.ini + +# temp files +*.tmp +*.temp + +# local development +*.local + +public/r + +.pnpm-store models -videos/tts_mix.wav \ No newline at end of file +videos/tts_mix.mp3 \ No newline at end of file diff --git a/package.json b/package.json index 13ab413..6c6aecf 100644 --- a/package.json +++ b/package.json @@ -49,6 +49,7 @@ "vosk": "^0.3.39" }, "devDependencies": { + "@kevisual/logger": "^0.0.3", "@kevisual/types": "^0.0.6", "@kevisual/use-config": "^1.0.10", "@rollup/plugin-alias": "^5.1.1", diff --git a/src/asr/provider/funasr/test/get-text.ts b/src/asr/provider/funasr/test/get-text.ts index 4f67449..93de37c 100644 --- a/src/asr/provider/funasr/test/get-text.ts +++ b/src/asr/provider/funasr/test/get-text.ts @@ -3,40 +3,62 @@ import net from 'net'; import path from 'path'; import fs from 'fs'; -const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); -const ws = new VideoWS({ - // url: 'wss://192.168.31.220:10095', - url: 'wss://funasr.xiongxiao.me', - isFile: true, - onConnect: async () => { - console.log('onConnect'); - const data = fs.readFileSync(videoTestPath); - let sampleBuf = new Uint8Array(data); +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); +const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); +// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); +const name = 'output-1746007775571.mp3'; +const url = 'wss://funasr.xiongxiao.me'; +const videoTestPath2 = path.join(process.cwd(), 'build', name); +// const ws = new VideoWS({ +// // url: 'wss://192.168.31.220:10095', +// url: 'wss://funasr.xiongxiao.me', +// isFile: true, +// // mode: 'offline', +// wav_format: 'mp3', +// onConnect: async () => { +// console.log('onConnect'); +// const data = fs.readFileSync(videoTestPath); +// let sampleBuf = new Uint8Array(data); - var chunk_size = 960; // for asr chunk_size [5, 10, 5] - let totalsend = 0; - let len = 0; - ws.start(); - while (sampleBuf.length >= chunk_size) { - const sendBuf = sampleBuf.slice(0, chunk_size); - totalsend = totalsend + sampleBuf.length; - sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); - if (len === 100) { - // ws.stop(); - // ws.start(); - await new Promise((resolve) => setTimeout(resolve, 1000)); - } - ws.send(sendBuf); - len++; - } - ws.stop(); - console.log('len', len); +// var chunk_size = 960; // for asr chunk_size [5, 10, 5] +// let totalsend = 0; +// let len = 0; +// ws.start(); +// while (sampleBuf.length >= chunk_size) { +// const sendBuf = sampleBuf.slice(0, chunk_size); +// totalsend = totalsend + sampleBuf.length; +// sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); +// if (len === 100) { +// // ws.stop(); +// // ws.start(); +// // await new Promise((resolve) => setTimeout(resolve, 1000)); +// } + +// await new Promise((resolve) => setTimeout(resolve, 10)); +// ws.send(sendBuf); +// len++; +// } +// await new Promise((resolve) => setTimeout(resolve, 1000)); +// ws.stop(); +// console.log('len', len); +// }, +// }); + +// const server = net.createServer((socket) => { +// socket.on('data', (data) => { +// console.log('data', data); +// }); +// }); +// server.listen(10096); + +const ws2 = new VideoWS({ + url: url, + onConnect: async () => { + const data = fs.readFileSync(videoTestPath); + await ws2.sendBuffer(data, { wav_format: 'mp3' }); + await new Promise((resolve) => setTimeout(resolve, 1000)); + const data2 = fs.readFileSync(videoTestPath2); + await ws2.sendBuffer(data2, { wav_format: 'mp3' }); }, }); - -const server = net.createServer((socket) => { - socket.on('data', (data) => { - console.log('data', data); - }); -}); -server.listen(10096); diff --git a/src/asr/provider/funasr/ws.ts b/src/asr/provider/funasr/ws.ts index 8349068..656c2a5 100644 --- a/src/asr/provider/funasr/ws.ts +++ b/src/asr/provider/funasr/ws.ts @@ -1,5 +1,7 @@ // import WebSocket from 'ws'; import { initWs } from '../../../ws-adapter/index.ts'; +import { logger } from '@/logger/index.ts'; +import { WSServer } from '../../provider/ws-server.ts'; export type VideoWSOptions = { url?: string; @@ -8,10 +10,22 @@ export type VideoWSOptions = { mode?: VideoWsMode; isFile?: boolean; onConnect?: () => void; + wav_format?: string; }; export const videoWsMode = ['2pass', 'online', 'offline'] as const; type VideoWsMode = (typeof videoWsMode)[number]; - +type OpenRequest = { + chunk_size: number[]; + wav_name: string; + is_speaking: boolean; + chunk_interval: number; + // 逆文本标准化(ITN): + itn: boolean; + mode: VideoWsMode; + wav_format?: string; + audio_fs?: number; + hotwords?: string; +}; export type VideoWsResult = { isFinal: boolean; mode: VideoWsMode; @@ -21,48 +35,21 @@ export type VideoWsResult = { wav_name: string; }; -export class VideoWS { - ws: WebSocket; +export class VideoWS extends WSServer { itn?: boolean; mode?: VideoWsMode; - isFile?: boolean; - onConnect?: () => void; + wav_format?: string; constructor(options?: VideoWSOptions) { + super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect }); + this.itn = options?.itn || false; this.itn = options?.itn || false; this.mode = options?.mode || 'online'; - this.isFile = options?.isFile || false; - this.initWs(options); - } - async initWs(options: VideoWSOptions) { - if (options?.ws) { - this.ws = options.ws; - } else { - this.ws = await initWs(options.url); - } - this.onConnect = options?.onConnect || (() => {}); - this.ws.onopen = this.onOpen.bind(this); - this.ws.onmessage = this.onMessage.bind(this); - this.ws.onerror = this.onError.bind(this); - this.ws.onclose = this.onClose.bind(this); + this.wav_format = options?.wav_format; } - async onOpen() { - this.onConnect(); - } - async start() { - let isFileMode = this.isFile; + async start(opts?: Partial) { const chunk_size = new Array(5, 10, 5); - type OpenRequest = { - chunk_size: number[]; - wav_name: string; - is_speaking: boolean; - chunk_interval: number; - itn: boolean; - mode: VideoWsMode; - wav_format?: string; - audio_fs?: number; - hotwords?: string; - }; + const request: OpenRequest = { chunk_size: chunk_size, wav_name: 'h5', // @@ -70,17 +57,16 @@ export class VideoWS { chunk_interval: 10, itn: this.itn, mode: this.mode || 'online', + ...opts, }; - console.log('request', request); - if (isFileMode) { - const file_ext = 'wav'; - const file_sample_rate = 16000; - request.wav_format = file_ext; - if (file_ext == 'wav') { - request.wav_format = 'PCM'; - request.audio_fs = file_sample_rate; - } + const file_sample_rate = 16000; + request.wav_format = request.wav_format || this.wav_format || 'wav'; + if ('wav' == request.wav_format) { + request.wav_format = 'PCM'; + request.audio_fs = file_sample_rate; } + console.log('request', request); + this.ws.send(JSON.stringify(request)); } async stop() { @@ -99,7 +85,28 @@ export class VideoWS { this.ws.send(data); } } + async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string }) { + const { wav_format = 'wav' } = opts || {}; + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + let sampleBuf = new Uint8Array(data); + const ws = this; + var chunk_size = 960; // for asr chunk_size [5, 10, 5] + let totalsend = 0; + let len = 0; + ws.start({ wav_format }); + while (sampleBuf.length >= chunk_size) { + const sendBuf = sampleBuf.slice(0, chunk_size); + totalsend = totalsend + sampleBuf.length; + sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length); + await new Promise((resolve) => setTimeout(resolve, 10)); + ws.send(sendBuf); + len++; + } + ws.stop(); + } + } async onMessage(event: MessageEvent) { + super.onMessage(event); const data = event.data; try { const result = JSON.parse(data.toString()); diff --git a/src/logger/index.ts b/src/logger/index.ts index 6495e0f..00b486a 100644 --- a/src/logger/index.ts +++ b/src/logger/index.ts @@ -1,37 +1,6 @@ -import { pino } from 'pino'; -import { useConfig } from '@kevisual/use-config/env'; +import { Logger } from '@kevisual/logger/node'; -const config = useConfig(); - -export const logger = pino({ - level: config.LOG_LEVEL || 'info', - transport: { - target: 'pino-pretty', - options: { - colorize: true, - translateTime: 'SYS:standard', - ignore: 'pid,hostname', - }, - }, - serializers: { - error: pino.stdSerializers.err, - req: pino.stdSerializers.req, - res: pino.stdSerializers.res, - }, - // base: { - // app: 'ai-videos', - // env: process.env.NODE_ENV || 'development', - // }, +const level = process.env.LOG_LEVEL || 'info'; +export const logger = new Logger({ + level: level as any, }); - -export const logError = (message: string, data?: any) => logger.error({ data }, message); -export const logWarning = (message: string, data?: any) => logger.warn({ data }, message); -export const logInfo = (message: string, data?: any) => logger.info({ data }, message); -export const logDebug = (message: string, data?: any) => logger.debug({ data }, message); - -export const log = { - error: logError, - warn: logWarning, - info: logInfo, - debug: logDebug, -}; diff --git a/src/recorder/index.ts b/src/recorder/index.ts index 0cedcd9..7136949 100644 --- a/src/recorder/index.ts +++ b/src/recorder/index.ts @@ -1,9 +1,9 @@ import assert from 'assert'; -import { logDebug, logInfo } from '../logger/index.ts'; +import { logger } from '../logger/index.ts'; import { ChildProcessWithoutNullStreams, spawn } from 'child_process'; import recorders from '../recorder/recorders/index.ts'; import Stream from 'stream'; - +const logDebug = logger.debug; export type RecordingOptions = { /* 采样率,默认为16000 */ sampleRate?: number; diff --git a/my_speech_text.wav b/videos/my_speech_text.wav similarity index 100% rename from my_speech_text.wav rename to videos/my_speech_text.wav