update

更新 src/asr/provider/volcengine/auc.ts
update
2025-10-14 23:04:59 +08:00 · 2025-10-14 22:52:24 +08:00 · 2025-10-13 22:13:19 +08:00 · 2025-10-03 18:43:57 +08:00 · 2025-08-23 22:34:36 +08:00 · 2025-06-23 18:34:54 +08:00
45 changed files with 3609 additions and 3319 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,17 +1,69 @@
 node_modules
-dist
+# mac
-
+.DS_Store
 app.config.json5
 apps.config.json
 deploy.tar.gz
 cache-file
 /apps
 logs
 .env*
-!.env.example
+!.env*example
 dist
 build
 logs
 .turbo
 pack-dist
 # astro
 .astro
 # next
 .next
 # nuxt
 .nuxt
 # vercel
 .vercel
 # vuepress
 .vuepress/dist
 # coverage
 coverage/
 # typescript
 *.tsbuildinfo
 # debug logs
 *.log
 *.tmp
 # vscode
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
 !.vscode/launch.json
 !.vscode/extensions.json
 # idea
 .idea
 # system
 Thumbs.db
 ehthumbs.db
 Desktop.ini
 # temp files
 *.tmp
 *.temp
 # local development
 *.local
 public/r
 .pnpm-store
 models
 videos/tts_mix.mp3
--- a/examples/batch-send-files.ts
+++ b/examples/batch-send-files.ts
@@ -0,0 +1,100 @@
 import { EventEmitter } from 'eventemitter3';
 import { VideoWS, VideoWsResult, sleep } from '../src/asr/provider/funasr/ws.ts';
 import fs from 'node:fs';
 import path from 'node:path';
 type BatchSendOptions = {
  vws: VideoWS;
  files: string[];
  matchText?: string;
  emitter?: EventEmitter;
 };
 export class BatchSendFiles {
  files: string[];
  vws: VideoWS;
  emitter: EventEmitter;
  constructor({ vws, files, emitter }: BatchSendOptions) {
    this.files = files;
    this.vws = vws;
    this.emitter = emitter || vws.emitter;
  }
  async init() {
    const isConnected = await this.vws.isConnected();
    if (!isConnected) {
      console.error('链接失败:', isConnected);
    }
    this.send();
  }
  waitOne() {
    return new Promise((resolve) => {
      this.vws.emitter.once('result', (data) => {
        resolve(data);
      });
    });
  }
  async checkAudioFile(file: string) {
    const stats = fs.statSync(file);
    if (!stats.isFile()) {
      throw new Error(`File not found: ${file}`);
    }
    const ext = path.extname(file).toLowerCase();
    const validExtensions = ['.wav', '.mp3', '.flac', '.ogg', '.aac'];
    if (!validExtensions.includes(ext)) {
      throw new Error(`Invalid file type: ${ext}. Supported types are: ${validExtensions.join(', ')}`);
    }
    const fileSize = stats.size;
    if (fileSize === 0) {
      throw new Error(`File is empty: ${file}`);
    }
    const maxSize = 100 * 1024 * 1024; // 100 MB
    if (fileSize > maxSize) {
      throw new Error(`File size exceeds limit: ${fileSize} bytes. Maximum allowed size is ${maxSize} bytes.`);
    }
    return {
      file,
      ext,
      size: fileSize,
      isValid: true,
    };
  }
  async send() {
    const textList: { file: string; text: string }[] = [];
    for (const file of this.files) {
      let wav_format = 'wav';
      try {
        const ck = await this.checkAudioFile(file);
        if (ck.ext !== '.wav') {
          wav_format = ck.ext.replace('.', '');
        }
      } catch (error) {
        console.error('Error checking file:', error);
        continue;
      }
      const data = fs.readFileSync(file);
      const wait = this.waitOne();
      await this.vws.sendBuffer(data, { wav_format });
      await sleep(1000);
      console.log('File sent:', file);
      const result: VideoWsResult = (await wait) as any;
      console.log('Result:', result.text);
      textList.push({ file, text: result.text });
      console.log('----------------------');
    }
    this.emitter.emit('send-done', textList);
  }
 }
 // const batchSend = new BatchSendFiles({
 //   vws: ws,
 //   // files: [audioTestPath],
 //   files: [videoTestPath, audioTestPath],
 // });
 // batchSend.init();
 // batchSend.emitter.on('send-done', (data) => {
 //   const matchText = '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。';
 //   const textList = data as { file: string; text: string }[];
 //   for (const item of textList) {
 //     const getText = item.text || '';
 //     const distance = natural.JaroWinklerDistance(getText, matchText);
 //     console.log(`File: ${item.file}, \nText: ${item.text}\nDistance: ${distance}`);
 //   }
 //   // console.log('Batch processing done:', data);
 // });
--- a/package.json
+++ b/package.json
@@ -1,22 +1,16 @@
 {
-  "name": "demo-app",
+  "name": "@kevisual/video-tools",
-  "version": "0.0.1",
+  "version": "0.0.5",
  "description": "",
  "main": "index.js",
-  "basename": "/root/demo-app",
+  "basename": "/root/video-tools",
  "app": {
-    "key": "demo-app",
+    "key": "video-tools",
    "entry": "dist/app.mjs",
-    "type": "system-app",
+    "type": "system-app"
    "files": [
      "dist"
    ]
  },
  "scripts": {
    "watch": "rollup -c rollup.config.mjs -w",
    "build": "rollup -c rollup.config.mjs",
    "dev": "cross-env NODE_ENV=development nodemon --delay 2.5 -e js,cjs,mjs --exec node dist/app.mjs",
    "dev:watch": "cross-env NODE_ENV=development concurrently -n \"Watch,Dev\" -c \"green,blue\" \"npm run watch\" \"sleep 1 && npm run dev\" ",
    "dev:bun": "bun run src/dev.ts --watch",
    "test": "tsx  test/**/*.ts",
    "clean": "rm -rf dist",
@@ -30,58 +24,55 @@
  "types": "types/index.d.ts",
  "files": [
    "dist",
-    "src"
+    "src",
    "examples"
  ],
  "publishConfig": {
    "access": "public"
  },
  "dependencies": {
-    "@kevisual/code-center-module": "0.0.18",
+    "@gradio/client": "^1.15.1",
-    "@kevisual/mark": "0.0.7",
+    "@kevisual/router": "0.0.21",
-    "@kevisual/router": "0.0.10",
+    "@kevisual/use-config": "^1.0.17",
-    "@kevisual/use-config": "^1.0.10",
+    "@kevisual/video": "^0.0.2",
    "cookie": "^1.0.2",
    "crypto-js": "^4.2.0",
    "dayjs": "^1.11.13",
-    "formidable": "^3.5.2",
+    "eventemitter3": "^5.0.1",
    "formidable": "^3.5.4",
    "lodash-es": "^4.17.21",
-    "node-record-lpcm16": "^1.0.1"
+    "nanoid": "^5.1.5"
  },
  "devDependencies": {
-    "@kevisual/types": "^0.0.6",
+    "@alicloud/pop-core": "^1.8.0",
-    "@kevisual/use-config": "^1.0.10",
+    "@kevisual/logger": "^0.0.4",
-    "@rollup/plugin-alias": "^5.1.1",
+    "@kevisual/types": "^0.0.10",
-    "@rollup/plugin-commonjs": "^28.0.3",
+    "@kevisual/use-config": "^1.0.17",
    "@rollup/plugin-json": "^6.1.0",
    "@rollup/plugin-node-resolve": "^16.0.1",
    "@rollup/plugin-replace": "^6.0.2",
    "@rollup/plugin-typescript": "^12.1.2",
    "@types/crypto-js": "^4.2.2",
    "@types/formidable": "^3.4.5",
    "@types/lodash-es": "^4.17.12",
-    "@types/node": "^22.14.0",
+    "@types/node": "^22.15.29",
    "@types/vosk": "^0.3.1",
    "@types/ws": "^8.18.1",
-    "commander": "^13.1.0",
+    "commander": "^14.0.0",
    "concurrently": "^9.1.2",
    "cross-env": "^7.0.3",
    "dotenv": "^16.5.0",
    "inquire": "^0.4.8",
-    "ioredis": "^5.6.0",
+    "ioredis": "^5.6.1",
-    "jsrepo": "^1.45.3",
+    "nodemon": "^3.1.10",
-    "nodemon": "^3.1.9",
+    "pg": "^8.16.0",
-    "pg": "^8.14.1",
+    "pm2": "^6.0.6",
    "pino": "^9.6.0",
    "pino-pretty": "^13.0.0",
    "pm2": "^6.0.5",
    "rimraf": "^6.0.1",
    "rollup": "^4.39.0",
    "rollup-plugin-copy": "^3.5.0",
    "rollup-plugin-dts": "^6.2.1",
    "rollup-plugin-esbuild": "^6.2.1",
    "sequelize": "^6.37.7",
    "tape": "^5.9.0",
-    "tsup": "^8.4.0",
+    "tsx": "^4.19.4",
    "tsx": "^4.19.3",
    "typescript": "^5.8.3",
-    "ws": "^8.18.1"
+    "ws": "npm:@kevisual/ws"
  },
-  "packageManager": "pnpm@10.7.1"
+  "exports": {
    "./src/*": "./src/*",
    "./examples/*": "./examples/*"
  },
  "packageManager": "pnpm@10.11.1"
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/rollup.config.mjs
+++ b/rollup.config.mjs
@@ -1,75 +0,0 @@
 import resolve from '@rollup/plugin-node-resolve';
 import commonjs from '@rollup/plugin-commonjs';
 import json from '@rollup/plugin-json';
 import path from 'path';
 import esbuild from 'rollup-plugin-esbuild';
 import alias from '@rollup/plugin-alias';
 import replace from '@rollup/plugin-replace';
 import pkgs from './package.json' with {type: 'json'};
 const isDev = process.env.NODE_ENV === 'development';
 const input = isDev ? './src/dev.ts' : './src/main.ts';
 /**
 * @type {import('rollup').RollupOptions}
 */
 const config = {
  input,
  output: {
    dir: './dist',
    entryFileNames: 'app.mjs',
    chunkFileNames: '[name]-[hash].mjs',
    format: 'esm',
  },
  plugins: [
    replace({
      preventAssignment: true, // 防止意外赋值
      DEV_SERVER: JSON.stringify(isDev), // 替换 process.env.NODE_ENV
      APP_VERSION: JSON.stringify(pkgs.version),
    }),
    alias({
      // only esbuild needs to be configured
      entries: [
        { find: '@', replacement: path.resolve('src') }, // 配置 @ 为 src 目录
        { find: 'http', replacement: 'node:http' },
        { find: 'https', replacement: 'node:https' },
        { find: 'fs', replacement: 'node:fs' },
        { find: 'path', replacement: 'node:path' },
        { find: 'crypto', replacement: 'node:crypto' },
        { find: 'zlib', replacement: 'node:zlib' },
        { find: 'stream', replacement: 'node:stream' },
        { find: 'net', replacement: 'node:net' },
        { find: 'tty', replacement: 'node:tty' },
        { find: 'tls', replacement: 'node:tls' },
        { find: 'buffer', replacement: 'node:buffer' },
        { find: 'timers', replacement: 'node:timers' },
        // { find: 'string_decoder', replacement: 'node:string_decoder' },
        { find: 'dns', replacement: 'node:dns' },
        { find: 'domain', replacement: 'node:domain' },
        { find: 'os', replacement: 'node:os' },
        { find: 'events', replacement: 'node:events' },
        { find: 'url', replacement: 'node:url' },
        { find: 'assert', replacement: 'node:assert' },
        { find: 'util', replacement: 'node:util' },
      ],
    }),
    resolve({
      preferBuiltins: true, // 强制优先使用内置模块
    }),
    commonjs(),
    esbuild({
      target: 'node22', //
      minify: false, // 启用代码压缩
      tsconfig: 'tsconfig.json',
    }),
    json(),
  ],
  external: [
    /@kevisual\/router(\/.*)?/, //, // 路由
    /@kevisual\/use-config(\/.*)?/, //
    'sequelize', // 数据库 orm
    'ioredis', // redis
    'pg', // pg
  ],
 };
 export default config;
--- a/src/asr/index.ts
+++ b/src/asr/index.ts
--- a/src/asr/provider/aliyun/aliyun-asr-server.ts
+++ b/src/asr/provider/aliyun/aliyun-asr-server.ts
@@ -0,0 +1,131 @@
 type AliAsrServerOptions = {
  baseUrl?: string;
  appkey: string;
  token: string;
  format?: string;
  sampleRate?: string;
  enablePunctuationPrediction?: boolean;
  enableInverseTextNormalization?: boolean;
  enableVoiceDetection?: boolean;
 };
 export class AliAsrServer {
  private baseUrl: string;
  private appkey: string;
  private token: string;
  private format: string;
  private sampleRate: string;
  private enablePunctuationPrediction: boolean;
  private enableInverseTextNormalization: boolean;
  private enableVoiceDetection: boolean;
  constructor(opts?: AliAsrServerOptions) {
    const {
      baseUrl = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr',
      appkey = '',
      token = '',
      format,
      sampleRate,
      enablePunctuationPrediction = true,
      enableInverseTextNormalization = true,
      enableVoiceDetection = false,
    } = opts || {};
    this.baseUrl = baseUrl;
    this.appkey = appkey;
    this.token = token;
    this.format = format;
    this.sampleRate = sampleRate;
    this.enablePunctuationPrediction = enablePunctuationPrediction;
    this.enableInverseTextNormalization = enableInverseTextNormalization;
    this.enableVoiceDetection = enableVoiceDetection;
  }
  buildRequestUrl(): string {
    const params = new URLSearchParams();
    params.append('appkey', this.appkey);
    this.format && params.append('format', this.format);
    this.sampleRate && params.append('sample_rate', this.sampleRate);
    if (this.enablePunctuationPrediction) {
      params.append('enable_punctuation_prediction', 'true');
    }
    if (this.enableInverseTextNormalization) {
      params.append('enable_inverse_text_normalization', 'true');
    }
    if (this.enableVoiceDetection) {
      params.append('enable_voice_detection', 'true');
    }
    return `${this.baseUrl}?${params.toString()}`;
  }
  async processAudio(audioContent: Buffer): Promise<any> {
    try {
      // 设置请求头
      const headers = {
        'X-NLS-Token': this.token,
        'Content-Type': 'application/octet-stream',
      };
      // 构建请求URL
      const requestUrl = this.buildRequestUrl();
      // 发送请求
      const response = await fetch(requestUrl, {
        method: 'POST',
        headers,
        body: audioContent,
      });
      // 处理响应
      if (!response.ok) {
        console.log(`The audio file recognized failed, http code: ${response.status}`);
        const v = await response.text();
        console.log('The audio file recognized response:', v);
        return null;
      }
      // 解析响应体
      // console.log('The audio file recognized response:', v);
      const body = await response.json();
      if (body.status === 20000000) {
        console.log('The audio file recognized result:');
        console.log(body);
        console.log('result: ' + body.result);
        console.log('The audio file recognized succeed!');
        return body;
      } else {
        console.log('The audio file recognized failed!');
        console.log(body);
        return null;
      }
    } catch (error) {
      if (error.code === 'ENOENT') {
        console.log('The audio file does not exist!');
      } else {
        console.log('Error during audio processing:', error);
      }
      return null;
    }
  }
 }
 // // 使用示例
 // async function main() {
 //   const asrServer = new AliAsrServer({
 //     appkey: '填入appkey',
 //     token: '填入服务鉴权Token',
 //     format: 'pcm',
 //     sampleRate: '16000',
 //     enablePunctuationPrediction: true,
 //     enableInverseTextNormalization: true,
 //     enableVoiceDetection: false,
 //   });
 //   const audioFile = '/path/to/nls-sample-16k.wav';
 //   await asrServer.processAudio(audioFile);
 // }
 // // 执行主函数
 // main().catch(console.error);
--- a/src/asr/provider/aliyun/base.ts
+++ b/src/asr/provider/aliyun/base.ts
@@ -0,0 +1,42 @@
 import RPCClient from '@alicloud/pop-core';
 interface TokenResponse {
  Token: {
    Id: string;
    ExpireTime: number;
  };
 }
 type AliCommonOptions = {
  accessKeyId: string;
  accessKeySecret: string;
 };
 export class AliCommon {
  private accessKeyId: string;
  private accessKeySecret: string;
  private endpoint: string;
  private apiVersion: string;
  token = '';
  expireTime = 0;
  constructor(opts?: AliCommonOptions) {
    this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
    this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
    this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
    this.apiVersion = '2019-02-28';
  }
  async getToken() {
    if (this.token && this.expireTime > Date.now()) {
      return this.token;
    }
    const client = new RPCClient({
      accessKeyId: this.accessKeyId,
      accessKeySecret: this.accessKeySecret,
      endpoint: this.endpoint,
      apiVersion: this.apiVersion,
    });
    const result = await client.request<TokenResponse>('CreateToken', {});
    this.token = result.Token.Id;
    this.expireTime = result.Token.ExpireTime * 1000;
    return result.Token.Id;
  }
 }
--- a/src/asr/provider/aliyun/test/get-text.ts
+++ b/src/asr/provider/aliyun/test/get-text.ts
@@ -0,0 +1,25 @@
 import { AliAsrServer } from '../aliyun-asr-server.ts';
 import fs from 'fs/promises';
 import path from 'path';
 // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
 // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
 // const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
 const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
 const name = 'output-1746007775571.mp3';
 const videoTestPath2 = path.join(process.cwd(), 'build', name);
 // 使用示例
 async function main() {
  const asrServer = new AliAsrServer({
    appkey: process.env.ALI_ASR_APP_KEY,
    token: process.env.ALI_ASR_TOKEN,
    format: 'mp3',
    // format: 'wav',
  });
  const audioContent = await fs.readFile(videoTestPath);
  await asrServer.processAudio(audioContent);
 }
 // 执行主函数
 main().catch(console.error);
--- a/src/asr/provider/aliyun/test/get-token.ts
+++ b/src/asr/provider/aliyun/test/get-token.ts
@@ -0,0 +1,10 @@
 import dotenv from 'dotenv';
 dotenv.config();
 import { AliCommon } from '../base.ts';
 const aliCommon = new AliCommon({
  accessKeyId: process.env.ALIYUN_AK_ID,
  accessKeySecret: process.env.ALIYUN_AK_SECRET,
 });
 aliCommon.getToken().then(console.log);
--- a/src/asr/provider/funasr/test/get-text.ts
+++ b/src/asr/provider/funasr/test/get-text.ts
@@ -3,40 +3,80 @@ import net from 'net';
 import path from 'path';
 import fs from 'fs';
-const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
-const ws = new VideoWS({
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
-  // url: 'wss://192.168.31.220:10095',
+// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
-  url: 'wss://funasr.xiongxiao.me',
+const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
-  isFile: true,
+const videoTestPath3 = path.join(process.cwd(), 'funasr_test.wav');
-  onConnect: async () => {
+const name = 'output-1746007775571.mp3';
-    console.log('onConnect');
+const videoTestPath2 = path.join(process.cwd(), 'build', name);
    const data = fs.readFileSync(videoTestPath);
    let sampleBuf = new Uint8Array(data);
-    var chunk_size = 960; // for asr chunk_size [5, 10, 5]
+const url = 'wss://funasr.xiongxiao.me';
-    let totalsend = 0;
+const url5 = 'https://1.15.101.247:10095'; // pro
-    let len = 0;
+// const ws = new VideoWS({
-    ws.start();
+//   // url: 'wss://192.168.31.220:10095',
-    while (sampleBuf.length >= chunk_size) {
+//   url: 'wss://funasr.xiongxiao.me',
-      const sendBuf = sampleBuf.slice(0, chunk_size);
+//   isFile: true,
-      totalsend = totalsend + sampleBuf.length;
+//   // mode: 'offline',
-      sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
+//   wav_format: 'mp3',
-      if (len === 100) {
+//   onConnect: async () => {
-        // ws.stop();
+//     console.log('onConnect');
-        // ws.start();
+//     const data = fs.readFileSync(videoTestPath);
-        await new Promise((resolve) => setTimeout(resolve, 1000));
+//     let sampleBuf = new Uint8Array(data);
 //     var chunk_size = 960; // for asr chunk_size [5, 10, 5]
 //     let totalsend = 0;
 //     let len = 0;
 //     ws.start();
 //     while (sampleBuf.length >= chunk_size) {
 //       const sendBuf = sampleBuf.slice(0, chunk_size);
 //       totalsend = totalsend + sampleBuf.length;
 //       sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
 //       if (len === 100) {
 //         // ws.stop();
 //         // ws.start();
 //         // await new Promise((resolve) => setTimeout(resolve, 1000));
 //       }
 //       await new Promise((resolve) => setTimeout(resolve, 10));
 //       ws.send(sendBuf);
 //       len++;
 //     }
 //     await new Promise((resolve) => setTimeout(resolve, 1000));
 //     ws.stop();
 //     console.log('len', len);
 //   },
 // });
 // const server = net.createServer((socket) => {
 //   socket.on('data', (data) => {
 //     console.log('data', data);
 //   });
 // });
 // server.listen(10096);
 const ws2 = new VideoWS({
  url: url5,
  mode: '2pass',
  onConnect: async () => {
    const data = fs.readFileSync(videoTestPath3);
    // await ws2.sendBuffer(data, { wav_format: 'mp3' });
    // await new Promise((resolve) => setTimeout(resolve, 1000));
    // const data2 = fs.readFileSync(videoTestPath2);
    // await ws2.sendBuffer(data2, { wav_format: 'mp3' });
    ws2.emitter.on('message', (event) => {
      console.log('message', event.data);
    });
    ws2.emitter.on('result', (result) => {
      if (result.is_final) {
        console.log('Final result:', result);
        process.exit(0);
      }
-      ws.send(sendBuf);
+    });
-      len++;
+    await ws2.start();
-    }
+    await ws2.sendBuffer(data, { online: true });
-    ws.stop();
+    setTimeout(() => {
-    console.log('len', len);
+      ws2.stop();
    }, 4000);
  },
 });
 const server = net.createServer((socket) => {
  socket.on('data', (data) => {
    console.log('data', data);
  });
 });
 server.listen(10096);
--- a/src/asr/provider/funasr/test/recorder.ts
+++ b/src/asr/provider/funasr/test/recorder.ts
@@ -1,35 +1,61 @@
 import { VideoWS } from '../ws.ts';
 import path from 'node:path';
 import net from 'net';
 import { Recording } from '../../../../recorder/index.ts';
 import Stream from 'stream';
 import fs from 'node:fs'; // 新增
 const recorder = new Recording({
  sampleRate: 16000,
  channels: 1, //
  audioType: 'wav',
  threshold: 0,
  recorder: 'rec',
  silence: '1.0',
  endOnSilence: true,
 });
 const writeFilePath = path.join(process.cwd(), 'funasr_test.wav');
 const fileStream = fs.createWriteStream(writeFilePath, { encoding: 'binary' });
 const url = 'wss://funasr.xiongxiao.me';
 const url3 = 'wss://pro.xiongxiao.me:10095';
 const url4 = 'wss://121.4.112.18:10095'; // aliyun
 const url5 = 'https://1.15.101.247:10095'; // pro
 const recorder = new Recording();
 const writeStream = new Stream.Writable();
 const ws = new VideoWS({
-  url: 'wss://192.168.31.220:10095',
+  url: url5,
  isFile: false,
  // mode: 'online',
  mode: '2pass',
  wsOptions: {
    rejectUnauthorized: false,
  },
  onConnect: async () => {
    console.log('onConnect');
-    let chunks: Buffer = Buffer.alloc(0);
+    ws.start();
-    var chunk_size = 960; // for asr chunk_size [5, 10, 5]
+
-    let totalsend = 0;
+    recorder.start();
    let len = 0;
    recorder.stream().on('data', (chunk) => {
-      chunks = Buffer.concat([chunks, chunk]);
+      // ws.sendBuffer(chunk, { online: true });
-      if (chunks.length > chunk_size) {
+      // console.log('Sending audio chunk:', chunk.length);
-        ws.send(chunks);
+      ws.send(chunk)
-        totalsend += chunks.length;
+      fileStream.write(chunk); // 新增：将音频数据写入文件
-        chunks = Buffer.alloc(0);
+      len += chunk.length;
      }
    });
-    ws.start();
+
    setTimeout(() => {
      ws.stop();
      fileStream.end(); // 新增：关闭文件流
      setTimeout(() => {
        process.exit(0);
      }, 1000);
      console.log('len', len);
-    }, 20000);
+    }, 10 * 1000);
    ws.emitter.on('message', (event) => {
      console.log('message', event.data);
    });
  },
 });
@@ -38,4 +64,4 @@ const server = net.createServer((socket) => {
    console.log('data', data);
  });
 });
-server.listen(10096);
+server.listen(10097);
--- a/src/asr/provider/funasr/ws.ts
+++ b/src/asr/provider/funasr/ws.ts
@@ -1,16 +1,42 @@
-import WebSocket from 'ws';
+// import WebSocket from 'ws';
-
+import { EventEmitter } from 'eventemitter3';
-type VideoWSOptions = {
+import { WSServer, WSSOptions } from '../../provider/ws-server.ts';
 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 export type VideoWSOptions = {
  url?: string;
  ws?: WebSocket;
  itn?: boolean;
-  mode?: string;
+  mode?: VideoWsMode;
  isFile?: boolean;
  onConnect?: () => void;
  wav_format?: string;
  emitter?: EventEmitter;
 } & {
  wsOptions?: WSSOptions['wsOptions'];
 };
 export const videoWsMode = ['2pass', 'online', 'offline'] as const;
 type VideoWsMode = (typeof videoWsMode)[number];
 type OpenRequest = {
  // 语音分片大小(单位: 毫秒):
  chunk_size: number[];
  // 音频文件名:
  wav_name: string;
  // 是否正在说话:
  is_speaking: boolean;
  // 分片间隔(单位: 毫秒):
  chunk_interval: number;
  // 逆文本标准化(ITN):
  itn: boolean;
  // 模式:
  // '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
  mode: VideoWsMode;
  // 音频格式:
  wav_format?: string; // 'wav' - PCM格式, 'mp3' - MP3格式等
  // 音频采样率(单位: Hz):
  audio_fs?: number;
  // 热词列表:
  hotwords?: string;
 };
 export const VideoWsMode = ['2pass', 'online', 'offline'];
 type VideoWsMode = (typeof VideoWsMode)[number];
 export type VideoWsResult = {
  isFinal: boolean;
  mode: VideoWsMode;
@@ -20,46 +46,21 @@ export type VideoWsResult = {
  wav_name: string;
 };
-export class VideoWS {
+export class VideoWS extends WSServer {
  ws: WebSocket;
  itn?: boolean;
  mode?: VideoWsMode;
-  isFile?: boolean;
+  wav_format?: string;
  onConnect?: () => void;
  constructor(options?: VideoWSOptions) {
-    this.ws =
+    super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
-      options?.ws ||
+    this.itn = options?.itn || false;
      new WebSocket(options.url, {
        rejectUnauthorized: false,
      });
    this.itn = options?.itn || false;
    this.mode = options?.mode || 'online';
-    this.isFile = options?.isFile || false;
+    this.wav_format = options?.wav_format;
    this.onConnect = options?.onConnect || (() => {});
    this.ws.onopen = this.onOpen.bind(this);
    this.ws.onmessage = this.onMessage.bind(this);
    this.ws.onerror = this.onError.bind(this);
    this.ws.onclose = this.onClose.bind(this);
  }
-  async onOpen() {
+  async start(opts?: Partial<OpenRequest>) {
    this.onConnect();
  }
  async start() {
    let isFileMode = this.isFile;
    const chunk_size = new Array(5, 10, 5);
-    type OpenRequest = {
+    console.log('start', chunk_size);
      chunk_size: number[];
      wav_name: string;
      is_speaking: boolean;
      chunk_interval: number;
      itn: boolean;
      mode: VideoWsMode;
      wav_format?: string;
      audio_fs?: number;
      hotwords?: string;
    };
    const request: OpenRequest = {
      chunk_size: chunk_size,
      wav_name: 'h5', //
@@ -67,16 +68,13 @@ export class VideoWS {
      chunk_interval: 10,
      itn: this.itn,
      mode: this.mode || 'online',
      ...opts,
    };
-    console.log('request', request);
+    const file_sample_rate = 16000;
-    if (isFileMode) {
+    request.wav_format = request.wav_format || this.wav_format || 'wav';
-      const file_ext = 'wav';
+    if ('wav' == request.wav_format) {
-      const file_sample_rate = 16000;
+      request.wav_format = 'PCM';
-      request.wav_format = file_ext;
+      request.audio_fs = file_sample_rate;
      if (file_ext == 'wav') {
        request.wav_format = 'PCM';
        request.audio_fs = file_sample_rate;
      }
    }
    this.ws.send(JSON.stringify(request));
  }
@@ -96,11 +94,41 @@ export class VideoWS {
      this.ws.send(data);
    }
  }
  /**
   * 发送音频数据, 离线
   * @param data 音频数据
   * @param opts 选项
   */
  async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string; online?: boolean }) {
    const { wav_format = 'wav', online = false } = opts || {};
    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
      let sampleBuf = new Uint8Array(data);
      const ws = this;
      var chunk_size = 960; // for asr chunk_size [5, 10, 5]
      let totalsend = 0;
      let len = 0;
      if (!online) ws.start({ wav_format });
      while (sampleBuf.length >= chunk_size) {
        const sendBuf = sampleBuf.slice(0, chunk_size);
        totalsend = totalsend + sampleBuf.length;
        sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
        await new Promise((resolve) => setTimeout(resolve, 10));
        ws.send(sendBuf);
        len++;
      }
      if (!online) ws.stop();
    }
  }
  async onMessage(event: MessageEvent) {
    super.onMessage(event);
    const data = event.data;
    try {
      const result = JSON.parse(data.toString());
-      console.log('result', result);
+      if (result?.is_final !== undefined && result?.text) {
        // console.log('result', result, typeof result);
        this.emitter.emit('result', result);
      }
      // console.log('onMessage-result', result);
    } catch (error) {
      console.log('error', error);
    }
--- a/src/asr/provider/provider.ts
+++ b/src/asr/provider/provider.ts
--- a/src/asr/provider/volcengine/asr-ws-big-model-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-big-model-client.ts
@@ -0,0 +1,494 @@
 import * as zlib from 'node:zlib';
 import { promisify } from 'node:util';
 import { nanoid } from 'nanoid';
 import { VolcEngineBase, uuid } from './base.ts';
 // Promisify zlib methods
 const gzipPromise = promisify(zlib.gzip);
 const gunzipPromise = promisify(zlib.gunzip);
 // Protocol constants
 const PROTOCOL_VERSION = 0b0001;
 const DEFAULT_HEADER_SIZE = 0b0001;
 // Message Type
 const FULL_CLIENT_REQUEST = 0b0001;
 const AUDIO_ONLY_REQUEST = 0b0010;
 const FULL_SERVER_RESPONSE = 0b1001;
 const SERVER_ACK = 0b1011;
 const SERVER_ERROR_RESPONSE = 0b1111;
 // Message Type Specific Flags
 const NO_SEQUENCE = 0b0000; // no check sequence
 const POS_SEQUENCE = 0b0001;
 const NEG_SEQUENCE = 0b0010;
 const NEG_WITH_SEQUENCE = 0b0011;
 const NEG_SEQUENCE_1 = 0b0011;
 // Message Serialization
 const NO_SERIALIZATION = 0b0000;
 const JSON_SERIALIZATION = 0b0001;
 // Message Compression
 const NO_COMPRESSION = 0b0000;
 const GZIP_COMPRESSION = 0b0001;
 /**
 * Generate header for the WebSocket request
 */
 function generateHeader(
  messageType = FULL_CLIENT_REQUEST,
  messageTypeSpecificFlags = NO_SEQUENCE,
  serialMethod = JSON_SERIALIZATION,
  compressionType = GZIP_COMPRESSION,
  reservedData = 0x00,
 ): Buffer {
  const header = Buffer.alloc(4);
  const headerSize = 1;
  header[0] = (PROTOCOL_VERSION << 4) | headerSize;
  header[1] = (messageType << 4) | messageTypeSpecificFlags;
  header[2] = (serialMethod << 4) | compressionType;
  header[3] = reservedData;
  return header;
 }
 /**
 * Generate the sequence part of the request
 */
 function generateBeforePayload(sequence: number): Buffer {
  const beforePayload = Buffer.alloc(4);
  beforePayload.writeInt32BE(sequence);
  return beforePayload;
 }
 export type ParsedMessage = {
  isLastPackage: boolean;
  payloadSequence?: number;
  payloadMsg?: {
    audio_info?: {
      duration: number;
    };
    result?: {
      additions?: {
        log_id?: string;
      };
      text?: string;
      utterances?: Array<{
        additions?: {
          fixed_prefix_result?: string;
        };
        definite?: boolean;
        end_time?: number;
        start_time?: number;
        text?: string;
        words?: Array<{
          end_time: number;
          start_time: number;
          text: string;
        }>;
      }>;
    };
    error?: any;
  };
  payloadSize?: number;
  code?: number;
  seq?: number;
 };
 /**
 * Parse response from the WebSocket server
 */
 function parseResponse(res: Buffer): any {
  const protocolVersion = res[0] >> 4;
  const headerSize = res[0] & 0x0f;
  const messageType = res[1] >> 4;
  const messageTypeSpecificFlags = res[1] & 0x0f;
  const serializationMethod = res[2] >> 4;
  const messageCompression = res[2] & 0x0f;
  const reserved = res[3];
  const headerExtensions = res.slice(4, headerSize * 4);
  const payload = res.slice(headerSize * 4);
  const result: any = {
    isLastPackage: false,
  };
  let payloadMsg = null;
  let payloadSize = 0;
  let offset = 0;
  if (messageTypeSpecificFlags & 0x01) {
    // receive frame with sequence
    const seq = payload.readInt32BE(0);
    result.payloadSequence = seq;
    offset += 4;
  }
  if (messageTypeSpecificFlags & 0x02) {
    // receive last package
    result.isLastPackage = true;
  }
  const remainingPayload = payload.slice(offset);
  if (messageType === FULL_SERVER_RESPONSE) {
    payloadSize = remainingPayload.readInt32BE(0);
    payloadMsg = remainingPayload.slice(4);
  } else if (messageType === SERVER_ACK) {
    const seq = remainingPayload.readInt32BE(0);
    result.seq = seq;
    if (remainingPayload.length >= 8) {
      payloadSize = remainingPayload.readUInt32BE(4);
      payloadMsg = remainingPayload.slice(8);
    }
  } else if (messageType === SERVER_ERROR_RESPONSE) {
    const code = remainingPayload.readUInt32BE(0);
    result.code = code;
    payloadSize = remainingPayload.readUInt32BE(4);
    payloadMsg = remainingPayload.slice(8);
  }
  if (!payloadMsg) {
    return result;
  }
  if (messageCompression === GZIP_COMPRESSION) {
    try {
      const decompressed = zlib.gunzipSync(payloadMsg);
      payloadMsg = decompressed;
    } catch (error) {
      console.error('Error decompressing payload:', error);
    }
  }
  if (serializationMethod === JSON_SERIALIZATION) {
    try {
      payloadMsg = JSON.parse(payloadMsg.toString('utf-8'));
    } catch (error) {
      console.error('Error parsing JSON payload:', error);
    }
  } else if (serializationMethod !== NO_SERIALIZATION) {
    payloadMsg = payloadMsg.toString('utf-8');
  }
  result.payloadMsg = payloadMsg;
  result.payloadSize = payloadSize;
  return result;
 }
 /**
 * Read WAV file information
 */
 async function readWavInfo(data: Buffer): Promise<{
  channels: number;
  sampleWidth: number;
  sampleRate: number;
  frames: number;
  audioData: Buffer;
 }> {
  // This is a simplified WAV parser - in production you should use a proper library
  if (data.length < 44) {
    throw new Error('Invalid WAV file: too short');
  }
  // Check WAV header
  if (data.slice(0, 4).toString() !== 'RIFF' || data.slice(8, 12).toString() !== 'WAVE') {
    throw new Error('Invalid WAV file: not a WAV format');
  }
  // Parse header information
  const channels = data.readUInt16LE(22);
  const sampleRate = data.readUInt32LE(24);
  const bitsPerSample = data.readUInt16LE(34);
  const sampleWidth = bitsPerSample / 8;
  // Find data chunk
  let offset = 12; // Start after "WAVE"
  let dataSize = 0;
  let audioData: Buffer = Buffer.alloc(0);
  while (offset < data.length) {
    const chunkType = data.slice(offset, offset + 4).toString();
    const chunkSize = data.readUInt32LE(offset + 4);
    if (chunkType === 'data') {
      dataSize = chunkSize;
      audioData = data.slice(offset + 8, offset + 8 + chunkSize);
      break;
    }
    offset += 8 + chunkSize;
  }
  const frames = dataSize / (channels * sampleWidth);
  return {
    channels,
    sampleWidth,
    sampleRate,
    frames,
    audioData,
  };
 }
 /**
 * Check if data is a valid WAV file
 */
 function judgeWav(data: Buffer): boolean {
  if (data.length < 44) {
    return false;
  }
  return data.slice(0, 4).toString() === 'RIFF' && data.slice(8, 12).toString() === 'WAVE';
 }
 /**
 * Slice data into chunks
 */
 function* sliceData(data: Buffer, chunkSize: number): Generator<[Buffer, boolean]> {
  const dataLen = data.length;
  let offset = 0;
  while (offset + chunkSize < dataLen) {
    yield [data.slice(offset, offset + chunkSize), false];
    offset += chunkSize;
  }
  yield [data.slice(offset, dataLen), true];
 }
 const format = ['wav', 'mp3', 'pcm'] as const;
 type AsrClientFormat = (typeof format)[number];
 interface AsrClientOptions {
  segDuration?: number;
  wsUrl?: string;
  uid?: string;
  format?: AsrClientFormat;
  rate?: number;
  bits?: number;
  channel?: number;
  codec?: string;
  authMethod?: string;
  hotWords?: string[];
  streaming?: boolean;
  mp3SegSize?: number;
  resourceId?: string;
  token?: string;
  appid?: string;
 }
 interface AudioItem {
  id: string | number;
  path: string;
 }
 /**
 * ASR WebSocket Client
 */
 export class AsrWsClient extends VolcEngineBase {
  private successCode: number = 1000;
  private segDuration: number;
  private format: string;
  private rate: number;
  private bits: number;
  private channel: number;
  private codec: string;
  private hotWords: string[] | null;
  private mp3SegSize: number;
  private reqEvent: number = 1;
  private uid: string;
  private seq: number = 1;
  private hasSendFullClientRequest: boolean = false;
  constructor(options: AsrClientOptions = {}) {
    super({
      url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v3/sauc/bigmodel',
      onConnect: () => this.onWsConnect(),
      wsOptions: {
        headers: {
          'X-Api-Resource-Id': options.resourceId || 'volc.bigasr.sauc.duration',
          'X-Api-Access-Key': options.token || '',
          'X-Api-App-Key': options.appid || '',
          'X-Api-Request-Id': uuid(),
        },
      },
    });
    this.segDuration = options.segDuration || 200;
    this.uid = options.uid || 'test';
    this.format = options.format || 'wav';
    this.rate = options.rate || 16000;
    this.bits = options.bits || 16;
    this.channel = options.channel || 1;
    this.codec = options.codec || 'raw';
    this.hotWords = options.hotWords || null;
    this.mp3SegSize = options.mp3SegSize || 1000;
  }
  private onWsConnect() {
    console.log('ASR Big Model WebSocket connected');
  }
  /**
   * Construct request parameters
   */
  private constructRequest(reqId: string, data?: any): any {
    return {
      user: {
        uid: this.uid,
      },
      audio: {
        format: this.format,
        sample_rate: this.rate,
        bits: this.bits,
        channel: this.channel,
        codec: this.codec,
      },
      request: {
        model_name: 'bigmodel',
        enable_punc: true,
        // result_type: 'single', // all, single
        result_type: 'all',
      },
    };
  }
  async sendFullClientRequest() {
    if (this.hasSendFullClientRequest) {
      return;
    }
    this.seq = 1;
    const seq = this.seq;
    const reqId = nanoid();
    const requestParams = this.constructRequest(reqId);
    // Prepare and send initial request
    const payloadStr = JSON.stringify(requestParams);
    const compressedPayload = await gzipPromise(Buffer.from(payloadStr));
    const fullClientRequest = Buffer.concat([
      generateHeader(FULL_CLIENT_REQUEST, POS_SEQUENCE),
      generateBeforePayload(seq),
      Buffer.alloc(4),
      compressedPayload,
    ]);
    // Set payload size
    fullClientRequest.writeUInt32BE(compressedPayload.length, 8);
    // Send initial request
    (this as any).ws.send(fullClientRequest);
    this.hasSendFullClientRequest = true;
  }
  /**
   * Process audio data in segments
   */
  private async segmentDataProcessor(audioData: Buffer, segmentSize: number): Promise<any> {
    await this.sendFullClientRequest();
    const that = this;
    if (this.isError) {
      return;
    }
    const sendVoice = async (audioData: Buffer, segmentSize: number) => {
      that.setCanSend(false);
      for (const [chunk, last] of sliceData(audioData, segmentSize)) {
        that.seq += 1;
        const isEnd = that.isEnd && last; // 结束了，而且是语音的最后一包
        console.log('chunkSize', Buffer.byteLength(chunk), segmentSize, 'last', last);
        if (isEnd) {
          that.seq = -that.seq;
        }
        const seq = that.seq;
        const compressedChunk = await gzipPromise(chunk);
        const messageType = AUDIO_ONLY_REQUEST;
        const flags = isEnd ? NEG_WITH_SEQUENCE : POS_SEQUENCE;
        const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);
        // Set payload size
        audioRequest.writeUInt32BE(compressedChunk.length, 8);
        // Send audio chunk
        this.ws.send(audioRequest);
        // 待测试， 是否需要等待
        // const chunkResponse = await new Promise<any>((resolveChunk) => {
        //   const onChunkMessage = (chunkEvent: MessageEvent) => {
        //     (this as any).ws.removeEventListener('message', onChunkMessage);
        //     const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
        //     resolveChunk(parsed);
        //   };
        //   (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
        // });
        // if (last) {
        //   console.log('last', JSON.stringify(chunkResponse));
        //   break;
        // }
      }
      that.setCanSend(true);
    };
    // Wait for response
    await sendVoice(audioData, segmentSize);
  }
  async onMessage(event: MessageEvent) {
    try {
      const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
      // console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
      if (typeof event.data === 'string') {
        throw new Error('event.data is string: ' + event.data);
      }
      // console.log('parsed', parsed.payloadSequence, parsed.payloadMsg.result.text);
      if (parsed?.payloadMsg?.error) {
        this.emitter.emit('error', parsed);
        this.isError = true;
      }
      this.emitter.emit('message', parsed);
      if (parsed.isLastPackage) {
        this.emitter.emit('end', parsed);
      }
    } catch (error) {
      console.error('Error processing response:', error);
    }
  }
  public async sendVoiceFile(data: Buffer) {
    try {
      if (this.format === 'mp3') {
        const segmentSize = this.mp3SegSize;
        return await this.segmentDataProcessor(data, segmentSize);
      }
      if (this.format === 'wav') {
        const wavInfo = await readWavInfo(data);
        const sizePerSec = wavInfo.channels * wavInfo.sampleWidth * wavInfo.sampleRate;
        const segmentSize = Math.floor((sizePerSec * this.segDuration) / 1000);
        // 3200
        return await this.segmentDataProcessor(data, segmentSize);
      }
      if (this.format === 'pcm') {
        const segmentSize = Math.floor((this.rate * 2 * this.channel * this.segDuration) / 500);
        return await this.segmentDataProcessor(data, segmentSize);
      }
      throw new Error('Unsupported format');
    } catch (error) {
      console.error('Error executing ASR:', error);
      throw error;
    }
  }
  async setIsEnd(isEnd: boolean) {
      super.setIsEnd(isEnd);
      if (isEnd) {
        // 发送空白包
        const emptyBuffer = Buffer.alloc(10000);
        this.sendVoiceStream(emptyBuffer);
      }
  }
  /**
   * 发送语音流, 最小10000
   * @param data
   * @returns
   */
  public async sendVoiceStream(data: Buffer) {
    let segmentSize = Buffer.byteLength(data);
    return await this.segmentDataProcessor(data, segmentSize);
  }
 }
--- a/src/asr/provider/volcengine/asr-ws-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-client.ts
@@ -0,0 +1,484 @@
 import * as fs from 'fs/promises';
 import * as zlib from 'zlib';
 import { promisify } from 'util';
 import { VolcEngineBase, uuid } from './base.ts';
 // Promisify zlib methods
 const gzipPromise = promisify(zlib.gzip);
 const gunzipPromise = promisify(zlib.gunzip);
 // Protocol constants
 const PROTOCOL_VERSION = 0b0001;
 const DEFAULT_HEADER_SIZE = 0b0001;
 // Message Type
 const CLIENT_FULL_REQUEST = 0b0001;
 const CLIENT_AUDIO_ONLY_REQUEST = 0b0010;
 const SERVER_FULL_RESPONSE = 0b1001;
 const SERVER_ACK = 0b1011;
 const SERVER_ERROR_RESPONSE = 0b1111;
 // Message Type Specific Flags
 const NO_SEQUENCE = 0b0000; // no check sequence
 const POS_SEQUENCE = 0b0001;
 const NEG_SEQUENCE = 0b0010;
 const NEG_SEQUENCE_1 = 0b0011;
 // Message Serialization
 const NO_SERIALIZATION = 0b0000;
 const JSON_SERIALIZATION = 0b0001;
 const THRIFT = 0b0011;
 const CUSTOM_TYPE = 0b1111;
 // Message Compression
 const NO_COMPRESSION = 0b0000;
 const GZIP = 0b0001;
 const CUSTOM_COMPRESSION = 0b1111;
 /**
 * Generate header for WebSocket requests
 */
 function generateHeader(
  version = PROTOCOL_VERSION,
  messageType = CLIENT_FULL_REQUEST,
  messageTypeSpecificFlags = NO_SEQUENCE,
  serialMethod = JSON_SERIALIZATION,
  compressionType = GZIP,
  reservedData = 0x00,
 ): Buffer {
  const header = Buffer.alloc(4);
  const headerSize = 1;
  header[0] = (version << 4) | headerSize;
  header[1] = (messageType << 4) | messageTypeSpecificFlags;
  header[2] = (serialMethod << 4) | compressionType;
  header[3] = reservedData;
  return header;
 }
 /**
 * Generate full default header for client request
 */
 function generateFullDefaultHeader(): Buffer {
  return generateHeader();
 }
 /**
 * Generate audio default header for client request
 */
 function generateAudioDefaultHeader(): Buffer {
  return generateHeader(PROTOCOL_VERSION, CLIENT_AUDIO_ONLY_REQUEST);
 }
 /**
 * Generate last audio default header for client request
 */
 function generateLastAudioDefaultHeader(): Buffer {
  return generateHeader(PROTOCOL_VERSION, CLIENT_AUDIO_ONLY_REQUEST, NEG_SEQUENCE);
 }
 /**
 * Parse response from the WebSocket server
 */
 function parseResponse(res: Buffer): any {
  const protocolVersion = res[0] >> 4;
  const headerSize = res[0] & 0x0f;
  const messageType = res[1] >> 4;
  const messageTypeSpecificFlags = res[1] & 0x0f;
  const serializationMethod = res[2] >> 4;
  const messageCompression = res[2] & 0x0f;
  const reserved = res[3];
  const headerExtensions = res.slice(4, headerSize * 4);
  const payload = res.slice(headerSize * 4);
  const result: any = {};
  let payloadMsg = null;
  let payloadSize = 0;
  if (messageType === SERVER_FULL_RESPONSE) {
    payloadSize = payload.readInt32BE(0);
    payloadMsg = payload.slice(4);
  } else if (messageType === SERVER_ACK) {
    const seq = payload.readInt32BE(0);
    result.seq = seq;
    if (payload.length >= 8) {
      payloadSize = payload.readUInt32BE(4);
      payloadMsg = payload.slice(8);
    }
  } else if (messageType === SERVER_ERROR_RESPONSE) {
    const code = payload.readUInt32BE(0);
    result.code = code;
    payloadSize = payload.readUInt32BE(4);
    payloadMsg = payload.slice(8);
  }
  if (!payloadMsg) {
    return result;
  }
  if (messageCompression === GZIP) {
    try {
      payloadMsg = zlib.gunzipSync(payloadMsg);
    } catch (error) {
      console.error('Error decompressing payload:', error);
    }
  }
  if (serializationMethod === JSON_SERIALIZATION) {
    try {
      payloadMsg = JSON.parse(payloadMsg.toString('utf-8'));
    } catch (error) {
      console.error('Error parsing JSON payload:', error);
    }
  } else if (serializationMethod !== NO_SERIALIZATION) {
    payloadMsg = payloadMsg.toString('utf-8');
  }
  result.payloadMsg = payloadMsg;
  result.payloadSize = payloadSize;
  return result;
 }
 /**
 * Read WAV file information
 */
 async function readWavInfo(data: Buffer): Promise<{
  channels: number;
  sampleWidth: number;
  sampleRate: number;
  frames: number;
  wavBytes: Buffer;
 }> {
  // Simple WAV parser - in production you should use a proper library
  if (data.length < 44) {
    throw new Error('Invalid WAV file: too short');
  }
  // Check WAV header
  if (data.slice(0, 4).toString() !== 'RIFF' || data.slice(8, 12).toString() !== 'WAVE') {
    throw new Error('Invalid WAV file: not a WAV format');
  }
  // Parse header information
  const channels = data.readUInt16LE(22);
  const sampleRate = data.readUInt32LE(24);
  const bitsPerSample = data.readUInt16LE(34);
  const sampleWidth = bitsPerSample / 8;
  // Find data chunk
  let offset = 12; // Start after "WAVE"
  let dataSize = 0;
  let wavBytes: Buffer = Buffer.alloc(0);
  while (offset < data.length) {
    const chunkType = data.slice(offset, offset + 4).toString();
    const chunkSize = data.readUInt32LE(offset + 4);
    if (chunkType === 'data') {
      dataSize = chunkSize;
      wavBytes = data.slice(offset + 8, offset + 8 + chunkSize);
      break;
    }
    offset += 8 + chunkSize;
  }
  const frames = dataSize / (channels * sampleWidth);
  return {
    channels,
    sampleWidth,
    sampleRate,
    frames,
    wavBytes,
  };
 }
 /**
 * Generator to slice data into chunks
 */
 function* sliceData(data: Buffer, chunkSize: number): Generator<[Buffer, boolean]> {
  const dataLen = data.length;
  let offset = 0;
  while (offset + chunkSize < dataLen) {
    yield [data.slice(offset, offset + chunkSize), false];
    offset += chunkSize;
  }
  yield [data.slice(offset, dataLen), true];
 }
 enum AudioType {
  LOCAL = 1, // 使用本地音频文件
 }
 interface AsrClientOptions {
  segDuration?: number;
  nbest?: number;
  appid?: string;
  token?: string;
  wsUrl?: string;
  uid?: string;
  workflow?: string;
  showLanguage?: boolean;
  showUtterances?: boolean;
  resultType?: string;
  format?: string;
  sampleRate?: number;
  language?: string;
  bits?: number;
  channel?: number;
  codec?: string;
  audioType?: AudioType;
  mp3SegSize?: number;
  cluster?: string;
 }
 interface AudioItem {
  id: string | number;
  path: string;
 }
 // 流式语音识别
 export class AsrWsClient extends VolcEngineBase {
  private audioPath: string;
  private cluster: string;
  private successCode: number = 1000;
  private segDuration: number;
  private nbest: number;
  private appid: string;
  private token: string;
  private uid: string;
  private workflow: string;
  private showLanguage: boolean;
  private showUtterances: boolean;
  private resultType: string;
  private format: string;
  private rate: number;
  private language: string;
  private bits: number;
  private channel: number;
  private codec: string;
  private audioType: AudioType;
  private mp3SegSize: number;
  constructor(audioPath: string, cluster: string, options: AsrClientOptions = {}) {
    super({
      url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v2/asr',
      onConnect: () => this.onWsConnect(),
      wsOptions: {
        headers: {
          Authorization: `Bearer; ${options.token}`,
        },
      },
    });
    this.audioPath = audioPath;
    this.cluster = cluster;
    this.segDuration = options.segDuration || 15000;
    this.nbest = options.nbest || 1;
    this.appid = options.appid || '';
    this.token = options.token || '';
    this.uid = options.uid || 'test';
    this.workflow = options.workflow || 'audio_in,resample,partition,vad,fe,decode,itn,nlu_punctuate';
    this.showLanguage = options.showLanguage || false;
    this.showUtterances = options.showUtterances || false;
    this.resultType = options.resultType || 'full';
    this.format = options.format || 'wav';
    this.rate = options.sampleRate || 16000;
    this.language = options.language || 'zh-CN';
    this.bits = options.bits || 16;
    this.channel = options.channel || 1;
    this.codec = options.codec || 'raw';
    this.audioType = options.audioType || AudioType.LOCAL;
    this.mp3SegSize = options.mp3SegSize || 10000;
  }
  private onWsConnect() {
    console.log('ASR WebSocket connected');
  }
  /**
   * Construct request parameters
   */
  private constructRequest(reqId: string): any {
    return {
      app: {
        appid: this.appid,
        cluster: this.cluster,
        token: this.token,
      },
      user: {
        uid: this.uid,
      },
      request: {
        reqid: reqId,
        nbest: this.nbest,
        workflow: this.workflow,
        show_language: this.showLanguage,
        show_utterances: this.showUtterances,
        result_type: this.resultType,
        sequence: 1,
      },
      audio: {
        format: this.format,
        rate: this.rate,
        language: this.language,
        bits: this.bits,
        channel: this.channel,
        codec: this.codec,
      },
    };
  }
  /**
   * Process audio data in segments
   */
  async segmentDataProcessor(wavData: Buffer, segmentSize: number): Promise<any> {
    const reqId = uuid();
    // Construct full client request and compress
    const requestParams = this.constructRequest(reqId);
    const payloadBytes = Buffer.from(JSON.stringify(requestParams));
    const compressedPayload = await gzipPromise(payloadBytes);
    // Create full client request
    const fullClientRequest = Buffer.concat([
      generateFullDefaultHeader(),
      Buffer.alloc(4), // payload size placeholder
      compressedPayload,
    ]);
    // Set payload size
    fullClientRequest.writeInt32BE(compressedPayload.length, 4);
    return new Promise(async (resolve, reject) => {
      try {
        this.ws.send(fullClientRequest);
        const onMessage = async (event: MessageEvent) => {
          const res = parseResponse(Buffer.from(event.data as ArrayBuffer));
          if ('payloadMsg' in res && res.payloadMsg?.code !== this.successCode) {
            resolve(res);
            return;
          }
          let seq = 1;
          let lastMessage = null;
          for (const [chunk, last] of sliceData(wavData, segmentSize)) {
            // Compress chunk if needed
            const compressedChunk = await gzipPromise(chunk);
            // Create audio-only request
            const audioOnlyHeader = last ? generateLastAudioDefaultHeader() : generateAudioDefaultHeader();
            const audioOnlyRequest = Buffer.concat([
              audioOnlyHeader,
              Buffer.alloc(4), // payload size placeholder
              compressedChunk,
            ]);
            // Set payload size
            audioOnlyRequest.writeInt32BE(compressedChunk.length, 4);
            // Send audio data
            this.ws.send(audioOnlyRequest);
            // Wait for response
            const response = await new Promise<any>((resolveChunk) => {
              const messageHandler = (messageEvent: MessageEvent) => {
                const result = parseResponse(Buffer.from(messageEvent.data as ArrayBuffer));
                this.ws.removeEventListener('message', messageHandler);
                resolveChunk(result);
              };
              this.ws.addEventListener('message', messageHandler);
            });
            if ('payloadMsg' in response && response.payloadMsg?.code !== this.successCode) {
              resolve(response);
              return;
            }
            lastMessage = response;
            if (last) {
              break;
            }
            seq++;
          }
          resolve(lastMessage);
        };
        this.ws.addEventListener('message', onMessage, { once: true });
      } catch (error) {
        reject(error);
      }
    });
  }
  /**
   * Execute ASR on the audio file
   */
  async execute(): Promise<any> {
    try {
      const data = await fs.readFile(this.audioPath);
      if (this.format === 'mp3') {
        return await this.segmentDataProcessor(data, this.mp3SegSize);
      }
      if (this.format !== 'wav') {
        throw new Error('Format should be wav or mp3');
      }
      const wavInfo = await readWavInfo(data);
      const sizePerSec = wavInfo.channels * wavInfo.sampleWidth * wavInfo.sampleRate;
      const segmentSize = Math.floor((sizePerSec * this.segDuration) / 1000);
      return await this.segmentDataProcessor(data, segmentSize);
    } catch (error) {
      console.error('Error executing ASR:', error);
      throw error;
    }
  }
 }
 /**
 * Execute ASR on a single audio file
 */
 export async function executeOne(audioItem: AudioItem, cluster: string, options: AsrClientOptions = {}): Promise<any> {
  if (!('id' in audioItem) || !('path' in audioItem)) {
    throw new Error('Audio item must have id and path properties');
  }
  const audioId = audioItem.id;
  const audioPath = audioItem.path;
  const audioType = AudioType.LOCAL;
  const asrClient = new AsrWsClient(audioPath, cluster, {
    ...options,
    audioType,
  });
  const result = await asrClient.execute();
  return { id: audioId, path: audioPath, result };
 }
 /**
 * Test function
 */
 export async function testOne(audioPath: string, cluster: string, appid: string, token: string, audioFormat: string): Promise<void> {
  const result = await executeOne(
    {
      id: 1,
      path: audioPath,
    },
    cluster,
    {
      appid,
      token,
      format: audioFormat,
    },
  );
  console.log(result);
 }
--- a/src/asr/provider/volcengine/auc.ts
+++ b/src/asr/provider/volcengine/auc.ts
@@ -0,0 +1,136 @@
 // https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts
 import { nanoid } from "nanoid"
 export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
 export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
 export const AsrBase = 'volc.bigasr.auc'
 export const AsrTurbo = 'volc.bigasr.auc_turbo'
 const uuid = () => nanoid()
 type AsrOptions = {
  url?: string
  appid?: string
  token?: string
  type?: AsrType
 }
 type AsrType = 'flash' | 'standard' | 'turbo'
 export class Asr {
  url: string = FlashURL
  appid: string = ""
  token: string = ""
  type: AsrType = 'flash'
  constructor(options: AsrOptions = {}) {
    this.appid = options.appid || ""
    this.token = options.token || ""
    this.type = options.type || 'flash'
    if (this.type !== 'flash') {
      this.url = AsrBaseURL
    }
    if (!this.appid || !this.token) {
      throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
    }
  }
  header() {
    const model = this.type === 'flash' ? AsrTurbo : AsrBase
    return {
      "X-Api-App-Key": this.appid,
      "X-Api-Access-Key": this.token,
      "X-Api-Resource-Id": model,
      "X-Api-Request-Id": uuid(),
      "X-Api-Sequence": "-1",
    }
  }
  submit(body: AsrRequest) {
    if (!body.audio || (!body.audio.url && !body.audio.data)) {
      throw new Error("audio.url or audio.data is required")
    }
    const data: AsrRequest = {
      ...body,
    }
    return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
  }
  async getText(body: AsrRequest) {
    const res = await this.submit(body)
    return res.json()
  }
 }
 export type AsrResponse = {
  audio_info: {
    /**
     * 音频时长，单位为 ms
     */
    duration: number;
  };
  result: {
    additions: {
      duration: string;
    };
    text: string;
    utterances: Array<{
      end_time: number;
      start_time: number;
      text: string;
      words: Array<{
        confidence: number;
        end_time: number;
        start_time: number;
        text: string;
      }>;
    }>;
  };
 }
 export interface AsrRequest {
  user?: {
    uid: string;
  };
  audio: {
    url?: string;
    data?: string;
    format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
    codec?: 'raw' | 'opus'; 	// raw / opus，默认为 raw(pcm) 。
    rate?: 8000 | 16000; // 采样率，支持 8000 或 16000，默认为 16000 。
    channel?: 1 | 2; // 声道数，支持 1 或 2，默认为 1。
  };
  request?: {
    model_name?: string; // 识别模型名称，如 "bigmodel"
    enable_words?: boolean; // 是否开启词级别时间戳，默认为 false。
    enable_sentence_info?: boolean; // 是否开启句子级别时间戳，默认为 false。
    enable_utterance_info?: boolean; // 是否开启语句级别时间戳，默认为 true。
    enable_punctuation_prediction?: boolean; // 是否开启标点符号预测，默认为 true。
    enable_inverse_text_normalization?: boolean; // 是否开启文本规范化，默认为 true。
    enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别，默认为 false。
    audio_channel_count?: 1 | 2; // 音频声道数，仅在 enable_separate_recognition_per_channel 开启时有效，支持 1 或 2，默认为 1。
    max_sentence_silence?: number; // 句子最大静音时间，仅在 enable_sentence_info 开启时有效，单位为 ms，默认为 800。
    custom_words?: string[];
    enable_channel_split?: boolean; // 是否开启声道分离
    enable_ddc?: boolean; // 是否开启 DDC（双通道降噪）
    enable_speaker_info?: boolean; // 是否开启说话人分离
    enable_punc?: boolean; // 是否开启标点符号预测（简写）
    enable_itn?: boolean; // 是否开启文本规范化（简写）
    vad_segment?: boolean; // 是否开启 VAD 断句
    show_utterances?: boolean; // 是否返回语句级别结果
    corpus?: {
      boosting_table_name?: string;
      correct_table_name?: string;
      context?: string;
    };
  };
 }
 // const main = async () => {
 //   const base64Audio = wavToBase64(audioPath);
 //   const auc = new Asr({
 //     appid: config.VOLCENGINE_AUC_APPID,
 //     token: config.VOLCENGINE_AUC_TOKEN,
 //   });
 //   const result = await auc.getText({ audio: { data: base64Audio } });
 //   console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
 // }
 // main();
--- a/src/asr/provider/volcengine/base.ts
+++ b/src/asr/provider/volcengine/base.ts
@@ -0,0 +1,57 @@
 import { WSServer } from '../../provider/ws-server.ts';
 import { nanoid } from 'nanoid';
 export const uuid = () => nanoid(16);
 type VolcEngineBaseOptions = {
  url?: string;
  ws?: WebSocket;
  onConnect?: () => void;
  wsOptions?: {
    headers?: {
      'X-Api-Resource-Id'?: string;
      'X-Api-Access-Key'?: string;
      'X-Api-App-Key'?: string;
      'X-Api-Request-Id'?: string;
      Authorization?: string;
    };
  };
 };
 export class VolcEngineBase extends WSServer {
  canSend = false;
  isEnd: boolean = false;
  isError: boolean = false;
  constructor(opts: VolcEngineBaseOptions) {
    super({
      url: opts.url,
      ws: opts.ws,
      onConnect: opts.onConnect,
      wsOptions: opts.wsOptions,
    });
  }
  async onOpen() {
    console.log('VolcEngineBase onOpen');
    // 发送认证信息
    this.emitter.emit('open');
  }
  async isCanSend() {
    if (this.canSend) {
      return true;
    }
    return new Promise((resolve) => {
      this.emitter.once('canSend', () => {
        resolve(true);
      });
    });
  }
  async setCanSend(canSend: boolean) {
    this.canSend = canSend;
    if (canSend) {
      this.emitter.emit('canSend', canSend);
    }
  }
  async setIsEnd(isEnd: boolean) {
    this.isEnd = isEnd;
  }
 }
--- a/src/asr/provider/volcengine/test/asr-bigmodel.ts
+++ b/src/asr/provider/volcengine/test/asr-bigmodel.ts
@@ -0,0 +1,28 @@
 import { AsrWsClient } from '../asr-ws-big-model-client.ts';
 import { audioPath, audioPath2, blankAudioPath, config } from './common.ts';
 import fs from 'fs';
 // const asr = new AsrWsClient('videos/asr_example.wav');
 // tsx src/asr/provider/volcengine/test/asr-bigmodel.ts
 const main = async () => {
  const audioId = '123';
  const asrClient = new AsrWsClient({
    appid: config.VOLCENGINE_ASR_MODEL_APPID,
    token: config.VOLCENGINE_ASR_MODEL_TOKEN,
  });
  asrClient.emitter.on('message', (result) => {
    console.log('识别结果', JSON.stringify(result, null, 2));
  })
  asrClient.emitter.on('end', (result) => {
    console.log('识别结束', JSON.stringify(result, null, 2));
  })
  await new Promise((resolve) => setTimeout(resolve, 2000));
  const data = fs.readFileSync(audioPath);
  await asrClient.sendVoiceFile(data);
  // await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
  asrClient.setIsEnd(true);
  // await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
 };
 main();
--- a/src/asr/provider/volcengine/test/asr.ts
+++ b/src/asr/provider/volcengine/test/asr.ts
@@ -0,0 +1,16 @@
 import { AsrWsClient } from '../asr-ws-client.ts';
 import { audioPath, config, sleep } from './common.ts';
 const asr = new AsrWsClient(audioPath, 'volcengine_input_common', {
  appid: config.APP_ID,
  token: config.TOKEN,
 });
 // tsx src/asr/provider/volcengine/test/asr.ts
 const main = async () => {
  await sleep(1000);
  const result = await asr.execute();
  console.log('result', JSON.stringify(result, null, 2));
 };
 main();
--- a/src/asr/provider/volcengine/test/auc.ts
+++ b/src/asr/provider/volcengine/test/auc.ts
@@ -0,0 +1,21 @@
 import { audioPath, config, sleep } from './common.ts';
 import { Asr } from '../auc.ts';
 import fs from 'fs';
 import util from 'node:util';
 const wavToBase64 = (filePath: string) => {
  const data = fs.readFileSync(filePath);
  return data.toString('base64');
 };
 const main = async () => {
  const base64Audio = wavToBase64(audioPath);
  const auc = new Asr({
    appid: config.VOLCENGINE_AUC_APPID,
    token: config.VOLCENGINE_AUC_TOKEN,
  });
  const result = await auc.getText({ audio: { data: base64Audio } });
  console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
 }
 main();
--- a/src/asr/provider/volcengine/test/common.ts
+++ b/src/asr/provider/volcengine/test/common.ts
@@ -0,0 +1,12 @@
 import path from 'path';
 import dotenv from 'dotenv';
 export const config = dotenv.config({
  path: path.join(process.cwd(), '.env'),
 }).parsed;
 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
 export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
 export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
--- a/src/asr/provider/volcengine/test/recorder.ts
+++ b/src/asr/provider/volcengine/test/recorder.ts
@@ -0,0 +1,39 @@
 import { AsrWsClient } from '../asr-ws-big-model-client.ts';
 import { audioPath, config, sleep } from '../test/common.ts';
 import net from 'net';
 import { Recording } from '../../../../recorder/index.ts';
 import Stream from 'stream';
 const recorder = new Recording();
 const asrClient = new AsrWsClient({
  appid: config.APP_ID,
  token: config.TOKEN,
 });
 // tsx src/asr/provider/volcengine/test/recorder.ts
 const main = async () => {
  // await asrClient.sendVoiceFile(fs.readFileSync(audioPath));
  const send = (data: Buffer) => {
    asrClient.sendVoiceStream(data);
  };
  let chunks: Buffer = Buffer.alloc(0);
  var chunk_size = 960; // for asr chunk_size [5, 10, 5]
  let totalsend = 0;
  recorder.stream().on('data', (chunk) => {
    chunks = Buffer.concat([chunks, chunk]);
    const chunkSize = Buffer.byteLength(chunks);
    if (chunkSize > chunk_size) {
      send(chunks);
      totalsend += chunks.length;
      chunks = Buffer.alloc(0);
    }
  });
  // setTimeout(() => {
  //   recorder.stop();
  // }, 10000);
 };
 main();
--- a/src/asr/provider/ws-server.ts
+++ b/src/asr/provider/ws-server.ts
@@ -0,0 +1,98 @@
 import { EventEmitter } from 'eventemitter3';
 import { initWs } from '../../ws-adapter/index.ts';
 import type { ClientOptions } from 'ws';
 export type WSSOptions = {
  url: string;
  ws?: WebSocket;
  onConnect?: () => void;
  wsOptions?: ClientOptions;
  emitter?: EventEmitter;
 };
 export class WSServer {
  ws: WebSocket;
  onConnect?: () => void;
  connected: boolean;
  emitter: EventEmitter;
  url: string;
  wsOptions?: ClientOptions;
  constructor(opts: WSSOptions) {
    this.connected = false;
    this.url = opts.url;
    this.wsOptions = opts.wsOptions;
    this.initWs(opts);
  }
  async initWs(opts: WSSOptions) {
    if (opts.ws) {
      this.ws = opts.ws;
    }
    this.emitter = opts.emitter || new EventEmitter();
    this.ws = await initWs(opts.url, opts.wsOptions);
    this.onConnect = opts?.onConnect || (() => {});
    this.ws.onopen = this.onOpen.bind(this);
    this.ws.onmessage = this.onMessage.bind(this);
    this.ws.onerror = this.onError.bind(this);
    this.ws.onclose = this.onClose.bind(this);
  }
  async reconnect() {
    this.ws = await initWs(this.url, this.wsOptions);
    this.ws.onopen = this.onOpen.bind(this);
    this.ws.onmessage = this.onMessage.bind(this);
    this.ws.onerror = this.onError.bind(this);
    this.ws.onclose = this.onClose.bind(this);
  }
  /**
   * 连接成功 ws 事件
   */
  async onOpen() {
    this.connected = true;
    this?.onConnect?.();
    this.emitter.emit('open');
  }
  /**
   * 检查是否连接
   * @returns
   */
  async isConnected() {
    if (this.connected) {
      return true;
    }
    return new Promise((resolve) => {
      this.emitter.once('open', () => {
        resolve(true);
      });
    });
  }
  /**
   * 收到消息 ws 事件
   * @param event
   */
  async onMessage(event: MessageEvent) {
    // console.log('WSS onMessage', event);
    this.emitter.emit('message', event);
  }
  /**
   * ws 错误事件
   * @param event
   */
  async onError(event: Event) {
    console.error('WSS onError');
    this.emitter.emit('error', event);
  }
  /**
   * ws 关闭事件
   * @param event
   */
  async onClose(event: CloseEvent) {
    console.error('WSS onClose');
    this.emitter.emit('close', event);
    this.connected = false;
  }
  /**
   * 关闭 ws 连接
   */
  async close() {
    if (this.ws.readyState === WebSocket.OPEN) {
      this.ws.close();
    }
  }
 }
--- a/src/dev.ts
+++ b/src/dev.ts
@@ -14,7 +14,7 @@ const record = new Recording({
 });
 record.stream().pipe(file);
-setTimeout(() => {
+// setTimeout(() => {
-  record.stop();
+//   record.stop();
-  process.exit(0);
+//   process.exit(0);
-}, 5000);
+// }, 5000);
--- a/src/logger/index.ts
+++ b/src/logger/index.ts
@@ -1,37 +1,6 @@
-import { pino } from 'pino';
+import { Logger } from '@kevisual/logger/node';
 import { useConfig } from '@kevisual/use-config/env';
-const config = useConfig();
+const level = process.env.LOG_LEVEL || 'info';
-
+export const logger = new Logger({
-export const logger = pino({
+  level: level as any,
  level: config.LOG_LEVEL || 'info',
  transport: {
    target: 'pino-pretty',
    options: {
      colorize: true,
      translateTime: 'SYS:standard',
      ignore: 'pid,hostname',
    },
  },
  serializers: {
    error: pino.stdSerializers.err,
    req: pino.stdSerializers.req,
    res: pino.stdSerializers.res,
  },
  // base: {
  //   app: 'ai-videos',
  //   env: process.env.NODE_ENV || 'development',
  // },
 });
 export const logError = (message: string, data?: any) => logger.error({ data }, message);
 export const logWarning = (message: string, data?: any) => logger.warn({ data }, message);
 export const logInfo = (message: string, data?: any) => logger.info({ data }, message);
 export const logDebug = (message: string, data?: any) => logger.debug({ data }, message);
 export const log = {
  error: logError,
  warn: logWarning,
  info: logInfo,
  debug: logDebug,
 };
--- a/src/recorder/index.ts
+++ b/src/recorder/index.ts
@@ -1,9 +1,8 @@
 import assert from 'assert';
-import { logDebug, logInfo } from '../logger/index.ts';
+import { logger } from '../logger/index.ts';
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
-import recorders from './recorders/index.ts';
+import recorders from '../recorder/recorders/index.ts';
 import Stream from 'stream';
 export type RecordingOptions = {
  /* 采样率，默认为16000 */
  sampleRate?: number;
@@ -64,9 +63,9 @@ export class Recording {
    this.args = args;
    this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions);
-    logDebug(`Started recording`);
+    logger.debug(`Started recording`);
-    logDebug('options', this.options);
+    logger.debug('options', this.options);
-    logDebug(` ${this.cmd} ${this.args.join(' ')}`);
+    logger.debug(` ${this.cmd} ${this.args.join(' ')}`);
    return this.start();
  }
@@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`,
    });
    err.on('data', (chunk) => {
-      logDebug(`STDERR: ${chunk}`);
+      logger.debug(`STDERR: ${chunk}`);
    });
    rec.on('data', (chunk) => {
-      logDebug(`Recording ${chunk.length} bytes`);
+      logger.debug(`Recording ${chunk.length} bytes`);
    });
    rec.on('end', () => {
-      logDebug('Recording ended');
+      logger.debug('Recording ended');
    });
    return this;
@@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`,
    this.process.kill('SIGSTOP');
    this._stream.pause();
-    logDebug('Paused recording');
+    logger.debug('Paused recording');
  }
  resume() {
@@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`,
    this.process.kill('SIGCONT');
    this._stream.resume();
-    logDebug('Resumed recording');
+    logger.debug('Resumed recording');
  }
  isPaused() {
--- a/src/tts/provider/cosyvoice/test/tts.ts
+++ b/src/tts/provider/cosyvoice/test/tts.ts
@@ -0,0 +1,30 @@
 import { Client } from '@gradio/client';
 import path from 'node:path';
 import fs from 'node:fs';
 // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
 // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
 // const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
 const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
 const name = 'output-1746007775571.mp3';
 const videoTestPath2 = path.join(process.cwd(), 'build', name);
 const textPath = path.join(process.cwd(), 'build', '01-kevisual.md');
 const exampleAudio = fs.readFileSync(videoTestPath);
 // const exampleAudio = await response_0.blob();
 const text = fs.readFileSync(textPath, 'utf-8');
 const client = await Client.connect('http://192.168.31.220:50000/');
 const result = await client.predict('/generate_audio', {
  // tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
  tts_text: text,
  mode_checkbox_group: '3s极速复刻',
  sft_dropdown: '',
  prompt_text: '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。',
  prompt_wav_upload: exampleAudio,
  prompt_wav_record: null,
  instruct_text: '',
  seed: 3,
  stream: false,
  speed: 1,
 });
 console.log(result.data);
--- a/src/tts/provider/cosyvoice/tts.ts
+++ b/src/tts/provider/cosyvoice/tts.ts
@@ -0,0 +1,53 @@
 import { Client } from '@gradio/client';
 type CosyVoiceTTSOptions = {
  url: string;
 };
 type AudioOptions = {
  tts_text: string;
  mode_checkbox_group: string;
  sft_dropdown: string;
  prompt_text: string;
  prompt_wav_upload?: any;
  prompt_wav_record: any | null;
  instruct_text: string;
  seed: number;
  stream: boolean;
  speed: number;
 };
 export class CosyVoiceTTS {
  private client: Client;
  private url: string;
  isInit = false;
  constructor(opts?: CosyVoiceTTSOptions) {
    this.url = opts?.url || 'http://localhost:50000/';
  }
  async init() {
    const url = this.url;
    const client = await Client.connect(url);
    this.client = client;
    this.isInit = true;
    return true;
  }
  generateAudio = async (opts?: Partial<AudioOptions>) => {
    if (!this.isInit) {
      await this.init();
    }
    const data: AudioOptions = {
      tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
      mode_checkbox_group: '3s极速复刻',
      sft_dropdown: '',
      prompt_text: '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。',
      // prompt_wav_upload: exampleAudio,
      prompt_wav_record: null,
      instruct_text: '',
      seed: 3,
      stream: false,
      speed: 1,
      ...opts,
    };
    const result = await this.client.predict('/generate_audio', data);
    return result;
  };
 }
--- a/src/tts/provider/volcengine/test/common.ts
+++ b/src/tts/provider/volcengine/test/common.ts
@@ -0,0 +1,12 @@
 import path from 'path';
 import dotenv from 'dotenv';
 export const config = dotenv.config({
  path: path.join(process.cwd(), '.env'),
 }).parsed;
 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
 export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
 export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
--- a/src/tts/provider/volcengine/test/mix.ts
+++ b/src/tts/provider/volcengine/test/mix.ts
@@ -0,0 +1,56 @@
 import { config } from './common.ts';
 import { runDemo, TtsMix } from '../tts-mix.ts';
 const appId = config.APP_ID;
 const token = config.TOKEN;
 // const speaker = 'zh_female_shuangkuaisisi_moon_bigtts';
 const speaker = 'zh_female_roumeinvyou_emo_v2_mars_bigtts';
 const text = '明朝开国皇帝朱元璋也称这本书为,万物之根';
 const outputPath = 'videos/tts_mix.wav';
 const text2 =
  '明朝开国皇帝朱元璋曾盛赞《道德经》为"万物之根"，认为这部道家经典蕴含着治国安邦的至理。这位出身寒微的帝王在建立大明王朝后，深刻体会到老子"无为而治"的智慧，将其奉为治国圭臬。朱元璋不仅亲自批注《道德经》，更命翰林学士编修《御注道德经》，将其中"治大国若烹小鲜"等思想运用于轻徭薄赋的惠民政策中';
 // 按15个字分割文本为数组
 const text2Arr = [];
 for (let i = 0; i < text2.length; i += 2) {
  text2Arr.push(text2.slice(i, i + 2));
 }
 const sleep = async (ms = 1000) => {
  return new Promise((resolve) => {
    setTimeout(() => {
      resolve(true);
    }, ms);
  });
 };
 const mockSendText = (ttsMax: TtsMix) => {
  return new Promise(async (resolve, reject) => {
    for (let i = 0; i < text2Arr.length; i++) {
      const text = text2Arr[i];
      console.log('开始', i, text);
      ttsMax.emitter.emit('text', text);
      await sleep(10);
      // console.log('完成', i, text);
    }
    ttsMax.emitter.emit('textEnd');
  });
 };
 // tsx src/tts/provider/volcengine/test/mix.ts
 const main = async () => {
  try {
    console.log('开始', appId, token);
    // await runDemo(appId, token, speaker, text, outputPath);
    const ttsMax = new TtsMix(appId, token);
    setTimeout(() => {
      mockSendText(ttsMax).then(() => {
        console.log('完成');
      });
    }, 10000);
    // await ttsMax.getVoiceDemo(speaker, text, outputPath);
    await ttsMax.getVoiceDemo(speaker, '', outputPath, false);
    console.log('完成');
  } catch (err) {
    console.log(err);
  }
 };
 main();
--- a/src/tts/provider/volcengine/tts-mix.ts
+++ b/src/tts/provider/volcengine/tts-mix.ts
@@ -0,0 +1,536 @@
 // https://www.volcengine.com/docs/6561/1329505#%E7%A4%BA%E4%BE%8Bsamples
 import { WebSocket } from 'ws';
 import { EventEmitter } from 'eventemitter3';
 import fs from 'fs/promises';
 import { nanoid } from 'nanoid';
 const uuidv4 = nanoid;
 const PROTOCOL_VERSION = 0b0001;
 const DEFAULT_HEADER_SIZE = 0b0001;
 // Message Type:
 const FULL_CLIENT_REQUEST = 0b0001;
 const AUDIO_ONLY_RESPONSE = 0b1011;
 const FULL_SERVER_RESPONSE = 0b1001;
 const ERROR_INFORMATION = 0b1111;
 // Message Type Specific Flags
 const MsgTypeFlagNoSeq = 0b0000; // Non-terminal packet with no sequence
 const MsgTypeFlagPositiveSeq = 0b1; // Non-terminal packet with sequence > 0
 const MsgTypeFlagLastNoSeq = 0b10; // last packet with no sequence
 const MsgTypeFlagNegativeSeq = 0b11; // Payload contains event number (int32)
 const MsgTypeFlagWithEvent = 0b100;
 // Message Serialization
 const NO_SERIALIZATION = 0b0000;
 const JSON_TYPE = 0b0001;
 // Message Compression
 const COMPRESSION_NO = 0b0000;
 const COMPRESSION_GZIP = 0b0001;
 const EVENT_NONE = 0;
 const EVENT_Start_Connection = 1;
 const EVENT_FinishConnection = 2;
 const EVENT_ConnectionStarted = 50; // 成功建连
 const EVENT_ConnectionFailed = 51; // 建连失败（可能是无法通过权限认证）
 const EVENT_ConnectionFinished = 52; // 连接结束
 // 上行Session事件
 const EVENT_StartSession = 100;
 const EVENT_FinishSession = 102;
 // 下行Session事件
 const EVENT_SessionStarted = 150;
 const EVENT_SessionFinished = 152;
 const EVENT_SessionFailed = 153;
 // 上行通用事件
 const EVENT_TaskRequest = 200;
 // 下行TTS事件
 const EVENT_TTSSentenceStart = 350;
 const EVENT_TTSSentenceEnd = 351;
 const EVENT_TTSResponse = 352;
 class Header {
  headerSize: number;
  protocolVersion: number;
  messageType: number;
  messageTypeSpecificFlags: number;
  serialMethod: number;
  compressionType: number;
  reservedData: number;
  constructor(
    protocolVersion: number = PROTOCOL_VERSION,
    headerSize: number = DEFAULT_HEADER_SIZE,
    messageType: number = 0,
    messageTypeSpecificFlags: number = 0,
    serialMethod: number = NO_SERIALIZATION,
    compressionType: number = COMPRESSION_NO,
    reservedData: number = 0,
  ) {
    this.headerSize = headerSize;
    this.protocolVersion = protocolVersion;
    this.messageType = messageType;
    this.messageTypeSpecificFlags = messageTypeSpecificFlags;
    this.serialMethod = serialMethod;
    this.compressionType = compressionType;
    this.reservedData = reservedData;
  }
  asBytes(): Buffer {
    return Buffer.from([
      (this.protocolVersion << 4) | this.headerSize,
      (this.messageType << 4) | this.messageTypeSpecificFlags,
      (this.serialMethod << 4) | this.compressionType,
      this.reservedData,
    ]);
  }
 }
 class Optional {
  event: number;
  sessionId: string | null;
  errorCode: number;
  connectionId: string | null;
  responseMetaJson: string | null;
  sequence: number | null;
  constructor(event: number = EVENT_NONE, sessionId: string | null = null, sequence: number | null = null) {
    this.event = event;
    this.sessionId = sessionId;
    this.errorCode = 0;
    this.connectionId = null;
    this.responseMetaJson = null;
    this.sequence = sequence;
  }
  // 转成 byte 序列
  asBytes(): Buffer {
    const optionBytes = Buffer.alloc(0);
    let result = optionBytes;
    if (this.event !== EVENT_NONE) {
      const eventBuffer = Buffer.alloc(4);
      eventBuffer.writeInt32BE(this.event);
      result = Buffer.concat([result, eventBuffer]);
    }
    if (this.sessionId !== null) {
      const sessionIdBytes = Buffer.from(this.sessionId);
      const sizeBuffer = Buffer.alloc(4);
      sizeBuffer.writeInt32BE(sessionIdBytes.length);
      result = Buffer.concat([result, sizeBuffer, sessionIdBytes]);
    }
    if (this.sequence !== null) {
      const sequenceBuffer = Buffer.alloc(4);
      sequenceBuffer.writeInt32BE(this.sequence);
      result = Buffer.concat([result, sequenceBuffer]);
    }
    return result;
  }
 }
 class Response {
  optional: Optional;
  header: Header;
  payload: Buffer | null;
  constructor(header: Header, optional: Optional) {
    this.optional = optional;
    this.header = header;
    this.payload = null;
  }
  toString(): string {
    return this.payload?.toString() || '';
  }
 }
 // 发送事件
 async function sendEvent(ws: WebSocket, header: Buffer, optional: Buffer | null = null, payload: Buffer | null = null): Promise<void> {
  return new Promise((resolve, reject) => {
    const fullClientRequest: Buffer[] = [header];
    if (optional !== null) {
      fullClientRequest.push(optional);
    }
    if (payload !== null) {
      const payloadSizeBuffer = Buffer.alloc(4);
      payloadSizeBuffer.writeInt32BE(payload.length);
      fullClientRequest.push(payloadSizeBuffer, payload);
    }
    ws.send(Buffer.concat(fullClientRequest), (err) => {
      if (err) reject(err);
      else resolve();
    });
  });
 }
 // 读取 res 数组某段 字符串内容
 function readResContent(res: Buffer, offset: number): [string, number] {
  const contentSize = res.readInt32BE(offset);
  offset += 4;
  const content = res.slice(offset, offset + contentSize).toString();
  offset += contentSize;
  return [content, offset];
 }
 // 读取 payload
 function readResPayload(res: Buffer, offset: number): [Buffer, number] {
  const payloadSize = res.readInt32BE(offset);
  offset += 4;
  const payload = res.slice(offset, offset + payloadSize);
  offset += payloadSize;
  return [payload, offset];
 }
 // 解析响应结果
 function parserResponse(res: Buffer | string): Response {
  if (typeof res === 'string') {
    throw new Error(res);
  }
  const response = new Response(new Header(), new Optional());
  // 解析结果
  // header
  const header = response.header;
  const num = 0b00001111;
  header.protocolVersion = (res[0] >> 4) & num;
  header.headerSize = res[0] & 0x0f;
  header.messageType = (res[1] >> 4) & num;
  header.messageTypeSpecificFlags = res[1] & 0x0f;
  header.serialMethod = (res[2] >> 4) & num;
  header.compressionType = res[2] & 0x0f;
  header.reservedData = res[3];
  let offset = 4;
  const optional = response.optional;
  if (header.messageType === FULL_SERVER_RESPONSE || header.messageType === AUDIO_ONLY_RESPONSE) {
    // read event
    if (header.messageTypeSpecificFlags === MsgTypeFlagWithEvent) {
      optional.event = res.readInt32BE(offset);
      offset += 4;
      if (optional.event === EVENT_NONE) {
        return response;
      }
      // read connectionId
      else if (optional.event === EVENT_ConnectionStarted) {
        [optional.connectionId, offset] = readResContent(res, offset);
      } else if (optional.event === EVENT_ConnectionFailed) {
        [optional.responseMetaJson, offset] = readResContent(res, offset);
      } else if (optional.event === EVENT_SessionStarted || optional.event === EVENT_SessionFailed || optional.event === EVENT_SessionFinished) {
        [optional.sessionId, offset] = readResContent(res, offset);
        [optional.responseMetaJson, offset] = readResContent(res, offset);
      } else {
        [optional.sessionId, offset] = readResContent(res, offset);
        [response.payload, offset] = readResPayload(res, offset);
      }
    }
  } else if (header.messageType === ERROR_INFORMATION) {
    optional.errorCode = res.readInt32BE(offset);
    offset += 4;
    [response.payload, offset] = readResPayload(res, offset);
  }
  return response;
 }
 function printResponse(res: Response, tag: string): void {
  // console.log(`===>${tag} header:`, res.header, res.optional.event);
  // console.log(`===>${tag} optional:`, res.optional);
 }
 function getPayloadBytes(
  uid: string = '1234',
  event: number = EVENT_NONE,
  text: string = '',
  speaker: string = '',
  audioFormat: string = 'mp3',
  audioSampleRate: number = 24000,
 ): Buffer {
  return Buffer.from(
    JSON.stringify({
      user: { uid },
      event,
      namespace: 'BidirectionalTTS',
      req_params: {
        text,
        speaker,
        audio_params: {
          format: audioFormat,
          sample_rate: audioSampleRate,
        },
      },
    }),
  );
 }
 async function startConnection(websocket: WebSocket): Promise<void> {
  const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent).asBytes();
  const optional = new Optional(EVENT_Start_Connection).asBytes();
  const payload = Buffer.from('{}');
  return await sendEvent(websocket, header, optional, payload);
 }
 async function startSession(websocket: WebSocket, speaker: string, sessionId: string): Promise<void> {
  const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
  const optional = new Optional(EVENT_StartSession, sessionId).asBytes();
  const payload = getPayloadBytes('1234', EVENT_StartSession, '', speaker);
  return await sendEvent(websocket, header, optional, payload);
 }
 async function sendText(ws: WebSocket, speaker: string, text: string, sessionId: string): Promise<void> {
  const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
  console.log('sendText=========', text);
  const optional = new Optional(EVENT_TaskRequest, sessionId).asBytes();
  const payload = getPayloadBytes('1234', EVENT_TaskRequest, text, speaker);
  return await sendEvent(ws, header, optional, payload);
 }
 async function finishSession(ws: WebSocket, sessionId: string): Promise<void> {
  const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
  const optional = new Optional(EVENT_FinishSession, sessionId).asBytes();
  const payload = Buffer.from('{}');
  return await sendEvent(ws, header, optional, payload);
 }
 async function finishConnection(ws: WebSocket): Promise<void> {
  const header = new Header(PROTOCOL_VERSION, DEFAULT_HEADER_SIZE, FULL_CLIENT_REQUEST, MsgTypeFlagWithEvent, JSON_TYPE).asBytes();
  const optional = new Optional(EVENT_FinishConnection).asBytes();
  const payload = Buffer.from('{}');
  return await sendEvent(ws, header, optional, payload);
 }
 type RunOptions = {
  id?: string;
  autoEnd?: boolean;
 };
 export async function runDemo(
  appId: string,
  token: string,
  speaker: string,
  text: string,
  outputPath: string,
  emitter?: EventEmitter,
  opts: RunOptions = {},
 ): Promise<void> {
  const autoEnd = opts.autoEnd ?? true;
  return new Promise((resolve, reject) => {
    const wsHeader = {
      'X-Api-App-Key': appId,
      'X-Api-Access-Key': token,
      'X-Api-Resource-Id': 'volc.service_type.10029',
      'X-Api-Connect-Id': uuidv4(),
    };
    const url = 'wss://openspeech.bytedance.com/api/v3/tts/bidirection';
    const ws = new WebSocket(url, { headers: wsHeader });
    const filename = outputPath.split('/').pop() || '';
    // 开始连接
    let isBegin = true;
    const writeFileEmitter = (data: Buffer) => {
      const value: TTSWriteType = {
        type: 'tts-mix',
        filename,
        data,
      };
      if (isBegin) {
        value.isBegin = true;
        isBegin = false;
      }
      emitter?.emit?.('writeFile', value);
    };
    const finishEmitter = () => {
      emitter?.emit?.('writeFile', {
        type: 'tts-mix',
        isEnd: true,
        data: Buffer.from(''),
        filename,
      });
    };
    ws.on('open', async () => {
      try {
        await startConnection(ws);
        let fileHandle: fs.FileHandle | null = null;
        let sessionId: string = '';
        let isFirstResponse = true;
        let cacheText = '';
        const emitOk = (id: string, code = 200) => {
          emitter.emit(id, { code, msg: 'ok' });
        };
        emitter.on('text', async ({ text, id }) => {
          await sendText(ws, speaker, text, sessionId);
          emitOk(id);
        });
        emitter.on('textEnd', async ({ id }) => {
          console.log('text end');
          await finishSession(ws, sessionId);
          emitOk(id);
        });
        ws.on('message', async (data) => {
          try {
            const res = parserResponse(data as Buffer);
            printResponse(res, 'message res:');
            if (res.optional.event === EVENT_ConnectionStarted) {
              sessionId = uuidv4().replace(/-/g, '');
              await startSession(ws, speaker, sessionId);
              return;
            } else if (res.optional.event === EVENT_ConnectionFinished) {
              ws.close();
              resolve();
              return;
            }
            if (res.optional.event === EVENT_SessionStarted && isFirstResponse) {
              isFirstResponse = false;
              console.log('start session', sessionId, autoEnd);
              emitter.emit('isConnect', sessionId);
              text && (await sendText(ws, speaker, text, sessionId));
              autoEnd && (await finishSession(ws, sessionId));
              fileHandle = await fs.open(outputPath, 'w');
            } else if (!isFirstResponse) {
              if (res.optional.event === EVENT_TTSResponse && res.header.messageType === AUDIO_ONLY_RESPONSE && res.payload && fileHandle) {
                await fileHandle.write(res.payload);
                writeFileEmitter(res.payload);
              } else if (res.optional.event === EVENT_TTSSentenceStart || res.optional.event === EVENT_TTSSentenceEnd) {
                // continue
              } else {
                // 152
                if (fileHandle) {
                  await fileHandle.close();
                  fileHandle = null;
                }
                await finishConnection(ws);
                finishEmitter();
              }
            }
          } catch (err) {
            ws.close();
            reject(err);
          }
        });
      } catch (err) {
        ws.close();
        reject(err);
      }
    });
    ws.on('error', (err) => {
      reject(err);
    });
  });
 }
 type TTSWriteType = {
  type: 'tts-mix';
  filename: string;
  data?: Buffer;
  isBegin?: boolean;
  isEnd?: boolean;
  index?: number;
 };
 export class TtsMix {
  appId: string;
  token: string;
  emitter: EventEmitter;
  isStart = false;
  constructor(appId: string, token: string) {
    this.appId = appId;
    this.token = token;
    this.emitter = new EventEmitter();
    this.emitter.on('isConnect', () => {
      this.isStart = true;
    });
  }
  /**
   * 获取语音
   * @param speaker 说话人
   * @param text 文本
   * @param outputPath 输出路径
   * @returns
   */
  async getVoiceDemo(speaker: string, text: string, outputPath: string, autoEnd = true): Promise<void> {
    const id = nanoid();
    const listenId = 'text' + id;
    return runDemo(this.appId, this.token, speaker, text, outputPath, this.emitter, { autoEnd, id: listenId });
  }
  async isConnect() {
    if (this.isStart) {
      return Promise.resolve(true);
    }
    return new Promise((resolve) => {
      this.emitter.once('isConnect', resolve);
    });
  }
  async sendText(text: string): Promise<{ code?: number; msg?: string }> {
    const id = nanoid();
    return new Promise((resolve) => {
      this.emitter.once(id, resolve);
      this.emitter.emit('text', { text, id });
    });
  }
  async sendTextEnd() {
    const id = nanoid();
    return new Promise((resolve) => {
      this.emitter.once(id, resolve);
      this.emitter.emit('textEnd', { id });
    });
  }
  /**
   * 写入文件的时候同步的流监听
   * @param callback
   * @returns
   */
  onWriteFile(callback: (data: TTSWriteType) => void) {
    this.emitter.on('writeFile', callback);
    return () => {
      this.emitter.off?.('writeFile', callback);
    };
  }
  /**
   * 缓冲区写入
   * @TIPS 如果数据过小，音频在前端播放的时候，会卡顿
   * @param callback
   * @returns
   */
  onWriteFileBuffer(callback: (data: TTSWriteType) => void, opts?: { chunkSize?: number }) {
    let index = 0;
    let sendBuffer: Buffer = Buffer.alloc(0);
    let chunkSize = opts?.chunkSize || 1024 * 50; // 50kb
    const callbackBuff = (data: TTSWriteType) => {
      index++;
      if (data.isBegin) {
        callback(data);
        return;
      }
      const { data: videoBuffer } = data;
      const sendValue = {
        ...data,
        index,
      };
      sendBuffer = Buffer.concat([sendBuffer, videoBuffer]);
      const sendBufferLenght = Buffer.byteLength(sendBuffer);
      if (sendBufferLenght > chunkSize) {
        sendValue.data = sendBuffer;
        callback(sendValue);
        sendBuffer = Buffer.alloc(0);
      } else if (data.isEnd) {
        sendValue.data = sendBuffer;
        callback(sendValue);
        sendBuffer = Buffer.alloc(0);
      }
    };
    this.emitter.on('writeFile', callbackBuff);
    return () => {
      this.emitter.off?.('writeFile', callbackBuff);
    };
  }
 }
--- a/src/utils/convert.ts
+++ b/src/utils/convert.ts
@@ -0,0 +1,82 @@
 type EncodeWavOptions = {
  numChannels?: number;
  sampleRate?: number;
  byteRate?: number;
 };
 /**
 * 编码pcm文件为wav文件
 * @param rawPCM
 * @param options
 * @returns
 */
 export function encodeWav(rawPCM: Buffer | string, options?: EncodeWavOptions) {
  if (typeof rawPCM === 'string') {
    rawPCM = Buffer.from(rawPCM, 'binary');
  }
  if (!Buffer.isBuffer(rawPCM)) {
    throw new TypeError('pcm data must be Buffer or string');
  }
  const opt = options || {};
  const sampleRate = opt.sampleRate || 16000;
  const numChannels = opt.numChannels || 1;
  const byteRate = opt.byteRate || 16;
  const buf = rawPCM;
  const header = Buffer.alloc(44);
  header.write('RIFF', 0);
  header.writeUInt32LE(buf.length, 4);
  header.write('WAVE', 8);
  header.write('fmt ', 12);
  header.writeUInt8(16, 16);
  header.writeUInt8(1, 20);
  header.writeUInt8(numChannels, 22);
  header.writeUInt32LE(sampleRate, 24);
  header.writeUInt32LE(byteRate, 28);
  header.writeUInt8(4, 32);
  header.writeUInt8(16, 34);
  header.write('data', 36);
  header.writeUInt32LE(buf.length + 44 - 8, 40);
  return Buffer.concat([header, buf]);
 }
 /**
 * 解码wav文件
 * @param rawWav
 * @returns
 */
 export function decodeWav(rawWav: Buffer | string) {
  if (typeof rawWav === 'string') {
    rawWav = Buffer.from(rawWav, 'binary');
  }
  if (!Buffer.isBuffer(rawWav)) {
    throw new TypeError('pcm data must be Buffer or string');
  }
  // remove the header of pcm format
  rawWav = rawWav.subarray(44);
  return rawWav;
 }
 export const converter = {
  encodeWav,
  decodeWav,
 };
 /**
 * 生成无声音频
 * @param duration 音频时长（秒）
 * @param sampleRate 采样率
 * @param channels 声道数
 * @returns 无声音频
 */
 export const generateSilent = (duration: number = 2, sampleRate: number = 16000, channels: number = 1) => {
  const bufferSize = Math.floor(duration * sampleRate * channels * 2); // 2 bytes per sample
  const silent = Buffer.alloc(bufferSize);
  return silent;
 };
--- a/src/wake/test/build.ts
+++ b/src/wake/test/build.ts
@@ -0,0 +1,57 @@
 import vosk from 'vosk';
 import { Recording } from '../../recorder/index.ts';
 import fs from 'fs';
 import path from 'path';
 import { audioPath, sleep, mySpeechText } from './common.ts';
 import { encodeWav, decodeWav } from '../../utils/convert.ts';
 // 需要先下载Vosk模型
 // const MODEL_PATH = 'vosk-model-small-en-us-0.15';
 const MODEL_PATH = path.join(process.cwd(), 'models/vosk-model-small-cn-0.22');
 async function detectWithVosk(audioFilePath) {
  if (!fs.existsSync(MODEL_PATH)) {
    console.error('请先下载Vosk模型');
    return false;
  }
  const model = new vosk.Model(MODEL_PATH);
  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
  // const wakeWords = ['hey computer', 'okay jarvis']; // 自定义唤醒词列表
  const wakeWords = ['欢迎']; // 自定义唤醒词列表
  const audioBuffer = fs.readFileSync(audioFilePath);
  const pcmBuffer = decodeWav(audioBuffer);
  const result = rec.acceptWaveform(pcmBuffer);
  console.log('result', result, rec.result());
  // const result = await rec.acceptWaveformAsync(pcmBuffer);
  // console.log('result', result, rec.result());
  // return new Promise((resolve) => {
  //   const pcmBufferLength = Buffer.byteLength(pcmBuffer);
  //   console.log('pcmBufferLength', pcmBufferLength);
  //   const bufferLength = 1024 * 8;
  //   let index = 0;
  //   for (let i = 0; i < pcmBufferLength; i += bufferLength) {
  //     const chunk = pcmBuffer.subarray(i, i + bufferLength);
  //     index++;
  //     if (rec.acceptWaveform(chunk)) {
  //       const result = rec.result();
  //       console.log('=========result', result, index);
  //       const text = result.text.toLowerCase();
  //       if (wakeWords.some((word) => text.includes(word))) {
  //         resolve(true);
  //       }
  //     }
  //   }
  //   resolve(false);
  // });
 }
 // detectWithVosk(audioPath).then((result) => {
 //   console.log('result', result);
 // });
 detectWithVosk(mySpeechText).then((result) => {
  console.log('result', result);
 });
--- a/src/wake/test/common.ts
+++ b/src/wake/test/common.ts
@@ -0,0 +1,18 @@
 import path from 'path';
 import dotenv from 'dotenv';
 export const config = dotenv.config({
  path: path.join(process.cwd(), '.env'),
 }).parsed;
 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
 export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
 export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
 export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 const model_all = 'models/vosk-model-cn-0.22';
 const model_small = 'models/vosk-model-small-cn-0.22';
 export const MODEL_PATH = path.join(process.cwd(), model_small);
 // export const MODEL_PATH = path.join(process.cwd(), model_all);
--- a/src/wake/test/stream.ts
+++ b/src/wake/test/stream.ts
@@ -0,0 +1,178 @@
 import vosk from 'vosk';
 import { Recording } from '../../recorder/index.ts';
 import fs, { WriteStream } from 'fs';
 import path from 'path';
 import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
 import { encodeWav, decodeWav } from '../../utils/convert.ts';
 const streamText = async (audioFilePath: string) => {
  if (!fs.existsSync(MODEL_PATH)) {
    console.error('请先下载Vosk模型');
    return false;
  }
  const model = new vosk.Model(MODEL_PATH);
  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
  const audioBuffer = fs.readFileSync(audioFilePath);
  const pcmBuffer = decodeWav(audioBuffer);
  for (let i = 0; i < pcmBuffer.length; i += 1024) {
    const chunk = pcmBuffer.subarray(i, i + 1024);
    if (rec.acceptWaveform(chunk)) {
      const result = rec.result();
      console.log('Streamed Result:', result);
    } else {
      const partialResult = rec.partialResult();
      console.log('Partial Result:', partialResult);
    }
    // await sleep(100); // 模拟延时
  }
  return true;
 };
 // 测试流式处理
 // streamText(mySpeechText)
 //   .then((result) => {
 //     console.log('Final Result:', result);
 //   })
 //   .catch((error) => {
 //     console.error('Error during streaming:', error);
 //   });
 const record = async () => {
  const recording = new Recording({
    sampleRate: 16000,
    channels: 1,
  });
  recording.start();
  const stream = recording.stream();
  console.log('Recording started...', stream);
  const model = new vosk.Model(MODEL_PATH);
  const rec = new vosk.Recognizer({
    model: model,
    sampleRate: 16000,
    grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
  });
  console.log('Vosk Recognizer initialized...');
  // 创建累积缓冲区
  let accumulatedBuffer = Buffer.alloc(0);
  const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
  stream.on('data', (data: Buffer) => {
    // const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
    const pcmBuffer = data; // 假设数据已经是PCM格式
    // 将新数据追加到累积缓冲区
    accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
    // 当积累的数据足够大时处理它
    if (accumulatedBuffer.length >= PROCESS_SIZE) {
      if (rec.acceptWaveform(accumulatedBuffer)) {
        const result = rec.result();
        console.log('Recorded Result:', result);
        // 检查是否包含唤醒词
        if (result.text) {
          const detect = detectWakeWord(result.text);
          if (detect.detected) {
            console.log(`检测到唤醒词: "${detect.word}"，置信度: ${detect.confidence}`);
          }
          // 执行唤醒后的操作
        }
      } else {
        const partialResult = rec.partialResult();
        console.log('Partial Result:', partialResult);
      }
      // 清空累积缓冲区
      accumulatedBuffer = Buffer.alloc(0);
    }
  });
  // 添加停止录音的处理
  stream.on('end', () => {
    // 处理剩余的缓冲区数据
    if (accumulatedBuffer.length > 0) {
      if (rec.acceptWaveform(accumulatedBuffer)) {
        const result = rec.result();
        console.log('Final Recorded Result:', result);
      }
    }
    // 获取最终结果
    const finalResult = rec.finalResult();
    console.log('Final Complete Result:', finalResult);
    // 释放资源
    rec.free();
    model.free();
  });
  // 返回一个用于停止录音的函数
  return {
    stop: () => {
      recording.stop();
    },
  };
 };
 // 添加唤醒配置
 const wakeConfig = {
  words: ['你好小小', '嗨小小', '小小', '秀秀'],
  threshold: 0.75, // 匹配置信度阈值
  minWordCount: 2, // 最小词数
 };
 // 优化唤醒词检测
 function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
  if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
  let bestMatch = { detected: false, confidence: 0, word: '' };
  for (const wakeWord of wakeConfig.words) {
    // 计算文本与唤醒词的相似度
    const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
    console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
    if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
      bestMatch = { detected: true, confidence, word: wakeWord };
    }
  }
  return bestMatch;
 }
 // 简单的字符串相似度计算函数
 function calculateSimilarity(str1: string, str2: string): number {
  if (str1.includes(str2)) return 1.0;
  // 计算莱文斯坦距离的简化版本
  const longer = str1.length > str2.length ? str1 : str2;
  const shorter = str1.length > str2.length ? str2 : str1;
  // 如果短字符串为空，相似度为0
  if (shorter.length === 0) return 0;
  // 简单的相似度计算 - 可以替换为更复杂的算法
  let matchCount = 0;
  for (let i = 0; i <= longer.length - shorter.length; i++) {
    const segment = longer.substring(i, i + shorter.length);
    let localMatches = 0;
    for (let j = 0; j < shorter.length; j++) {
      if (segment[j] === shorter[j]) localMatches++;
    }
    matchCount = Math.max(matchCount, localMatches);
  }
  return matchCount / shorter.length;
 }
 // 启动录音并在适当的时候停止
 (async () => {
  const recorder = await record();
  // 可选：30秒后自动停止录音
  setTimeout(() => {
    console.log('Stopping recording...');
    recorder.stop();
  }, 10 * 30 * 1000);
 })();
--- a/src/ws-adapter/browser.ts
+++ b/src/ws-adapter/browser.ts
@@ -0,0 +1,18 @@
 // @ts-nocheck
 // https://github.com/maxogden/websocket-stream/blob/48dc3ddf943e5ada668c31ccd94e9186f02fafbd/ws-fallback.js
 let ws: typeof WebSocket;
 if (typeof WebSocket !== 'undefined') {
  ws = WebSocket;
 } else if (typeof MozWebSocket !== 'undefined') {
  ws = MozWebSocket;
 } else if (typeof global !== 'undefined') {
  ws = global.WebSocket || global.MozWebSocket;
 } else if (typeof window !== 'undefined') {
  ws = window.WebSocket || window.MozWebSocket;
 } else if (typeof self !== 'undefined') {
  ws = self.WebSocket || self.MozWebSocket;
 }
 export default ws;
--- a/src/ws-adapter/index.ts
+++ b/src/ws-adapter/index.ts
@@ -0,0 +1,42 @@
 const isBrowser = (typeof process === 'undefined') || 
  (typeof window !== 'undefined' && typeof window.document !== 'undefined') || 
  (typeof process !== 'undefined' && process?.env?.BROWSER === 'true');
 const chantHttpToWs = (url: string) => {
  if (url.startsWith('http://')) {
    return url.replace('http://', 'ws://');
  }
  if (url.startsWith('https://')) {
    return url.replace('https://', 'wss://');
  }
  return url;
 };
 type WebSocketOptions = {
  /**
   * 是否拒绝不安全的证书, in node only
   */
  rejectUnauthorized?: boolean;
  headers?: Record<string, string>;
  [key: string]: any;
 };
 export const initWs = async (url: string, options?: WebSocketOptions) => {
  let ws: WebSocket;
  url = chantHttpToWs(url);
  if (isBrowser) {
    ws = new WebSocket(url);
  } else {
    const WebSocket = await import('ws').then((module) => module.default);
    const { rejectUnauthorized, headers, ...rest } = options || {};
    ws = new WebSocket(url, {
      rejectUnauthorized: rejectUnauthorized ?? true,
      headers: headers,
      ...rest,
    }) as any;
  }
  return ws;
 };
 interface EventEmitterOptions {
  /**
   * Enables automatic capturing of promise rejection.
   */
  captureRejections?: boolean | undefined;
 }
--- a/src/ws-adapter/node.ts
+++ b/src/ws-adapter/node.ts
@@ -0,0 +1,3 @@
 import ws from 'ws';
 export default ws;
--- a/test.wav
+++ b/test.wav
--- a/tsup.config.mjs
+++ b/tsup.config.mjs
@@ -1,20 +0,0 @@
 import { defineConfig } from 'tsup';
 // import glob from 'fast-glob';
 // const services = glob.sync('src/services/*.ts');
 const entrys = ['src/index.ts'];
 export default defineConfig({
  entry: entrys,
  outExtension: ({ format }) => ({
    js: format === 'esm' ? '.mjs' : '.js',
  }),
  splitting: false,
  sourcemap: false,
  clean: true,
  format: 'esm',
  external: ['dotenv'],
  dts: true,
  outDir: 'dist',
  tsconfig: 'tsconfig.json',
 });
--- a/videos/asr_example2.wav
+++ b/videos/asr_example2.wav
--- a/videos/blank.wav
+++ b/videos/blank.wav
--- a/videos/my_speech_text.txt
+++ b/videos/my_speech_text.txt
@@ -0,0 +1 @@
 在一无所知中，梦里的一天结束了一个新的轮回，便会开始。
--- a/videos/my_speech_text.wav
+++ b/videos/my_speech_text.wav
Author	SHA1	Message	Date
xiongxiao	9e94a4d898	update	2025-10-14 23:04:59 +08:00
熊潇	d4475cb2f2	更新 src/asr/provider/volcengine/auc.ts	2025-10-14 22:52:24 +08:00
xiongxiao	5603d09e80	update	2025-10-13 22:13:19 +08:00
xiongxiao	78cc6dcf55	update	2025-10-03 18:43:57 +08:00
abearxiong	8047577165	temp test	2025-08-23 22:34:36 +08:00
abearxiong	e4596b4fde	add batch send file to get text	2025-06-23 18:34:54 +08:00
abearxiong	767e436eb8	fix: fix ws	2025-06-23 10:38:01 +08:00
abearxiong	203fa1f103	fix:	2025-06-22 15:18:44 +08:00
abearxiong	87769076c8	fix: add src code	2025-06-22 13:34:34 +08:00
abearxiong	4a9568447e	remove some dependencies	2025-06-22 12:46:24 +08:00
abearxiong	b3b64ec59c	bump version	2025-06-04 10:09:49 +08:00
abearxiong	232d799575	"feat: 更新ASR服务连接配置，优化录音流处理及模型路径"	2025-06-02 12:38:53 +08:00
abearxiong	e638d7907a	test	2025-05-24 00:10:21 +08:00
abearxiong	38b4e58124	add txt	2025-05-20 12:17:52 +08:00
abearxiong	776e0800e9	tts for cosyvoice and funasr and aliyun	2025-05-20 00:39:21 +08:00
abearxiong	54da76bf9d	阿里云一句话识别	2025-05-19 01:44:24 +08:00
abearxiong	a1df51f56b	fix funasr	2025-05-19 01:01:38 +08:00
abearxiong	8e04962cc1	fix: 优化模块	2025-05-09 23:11:23 +08:00
abearxiong	512fe796b2	temp	2025-05-06 23:05:57 +08:00
abearxiong	c7e3fb9129	perf: video tts mix add writer buffer	2025-04-23 16:31:36 +08:00
abearxiong	3ecc9353c7	feat: 添加流获取音频的事件	2025-04-23 15:41:52 +08:00
abearxiong	ad5bcd5be8	clear console log	2025-04-22 13:41:49 +08:00
abearxiong	17ce93d5bd	temp	2025-04-20 01:32:28 +08:00
abearxiong	5e781499e9	feat: add tss module demo	2025-04-18 23:55:40 +08:00
abearxiong	fdc3985b93	temp: add test	2025-04-18 18:28:34 +08:00
abearxiong	d92b93c6f9	init videos volcengine	2025-04-17 02:29:03 +08:00
abearxiong	fd3ad7b7e4	clear	2025-04-17 00:28:46 +08:00
		`@@ -0,0 +1 @@`
							`在一无所知中，梦里的一天结束了一个新的轮回，便会开始。`