update

更新 src/asr/provider/volcengine/auc.ts
update
2025-10-14 23:04:59 +08:00 · 2025-10-14 22:52:24 +08:00 · 2025-10-13 22:13:19 +08:00 · 2025-10-03 18:43:57 +08:00 · 2025-08-23 22:34:36 +08:00 · 2025-06-23 18:34:54 +08:00
30 changed files with 1617 additions and 1787 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,21 +1,69 @@
 node_modules

-dist
-
-app.config.json5
-
-apps.config.json
-
-deploy.tar.gz
-cache-file
-
-/apps
-
-logs
+# mac
+.DS_Store

 .env*
-!.env.example
+!.env*example

+dist
+build
+logs
+
+.turbo
+
+pack-dist
+
+# astro
+.astro
+
+# next
+.next
+
+# nuxt
+.nuxt
+
+# vercel
+.vercel
+
+# vuepress
+.vuepress/dist
+
+# coverage
+coverage/
+
+# typescript
+*.tsbuildinfo
+
+# debug logs
+*.log
+*.tmp
+
+# vscode
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+
+# idea
+.idea
+
+# system
+Thumbs.db
+ehthumbs.db
+Desktop.ini
+
+# temp files
+*.tmp
+*.temp
+
+# local development
+*.local
+
+public/r
+
+.pnpm-store
 models

-videos/tts_mix.wav
+videos/tts_mix.mp3
--- a/examples/batch-send-files.ts
+++ b/examples/batch-send-files.ts
@@ -0,0 +1,100 @@
+import { EventEmitter } from 'eventemitter3';
+import { VideoWS, VideoWsResult, sleep } from '../src/asr/provider/funasr/ws.ts';
+import fs from 'node:fs';
+import path from 'node:path';
+type BatchSendOptions = {
+  vws: VideoWS;
+  files: string[];
+  matchText?: string;
+  emitter?: EventEmitter;
+};
+export class BatchSendFiles {
+  files: string[];
+  vws: VideoWS;
+  emitter: EventEmitter;
+  constructor({ vws, files, emitter }: BatchSendOptions) {
+    this.files = files;
+    this.vws = vws;
+    this.emitter = emitter || vws.emitter;
+  }
+  async init() {
+    const isConnected = await this.vws.isConnected();
+    if (!isConnected) {
+      console.error('链接失败:', isConnected);
+    }
+    this.send();
+  }
+  waitOne() {
+    return new Promise((resolve) => {
+      this.vws.emitter.once('result', (data) => {
+        resolve(data);
+      });
+    });
+  }
+  async checkAudioFile(file: string) {
+    const stats = fs.statSync(file);
+    if (!stats.isFile()) {
+      throw new Error(`File not found: ${file}`);
+    }
+    const ext = path.extname(file).toLowerCase();
+    const validExtensions = ['.wav', '.mp3', '.flac', '.ogg', '.aac'];
+    if (!validExtensions.includes(ext)) {
+      throw new Error(`Invalid file type: ${ext}. Supported types are: ${validExtensions.join(', ')}`);
+    }
+    const fileSize = stats.size;
+    if (fileSize === 0) {
+      throw new Error(`File is empty: ${file}`);
+    }
+    const maxSize = 100 * 1024 * 1024; // 100 MB
+    if (fileSize > maxSize) {
+      throw new Error(`File size exceeds limit: ${fileSize} bytes. Maximum allowed size is ${maxSize} bytes.`);
+    }
+    return {
+      file,
+      ext,
+      size: fileSize,
+      isValid: true,
+    };
+  }
+  async send() {
+    const textList: { file: string; text: string }[] = [];
+    for (const file of this.files) {
+      let wav_format = 'wav';
+      try {
+        const ck = await this.checkAudioFile(file);
+        if (ck.ext !== '.wav') {
+          wav_format = ck.ext.replace('.', '');
+        }
+      } catch (error) {
+        console.error('Error checking file:', error);
+        continue;
+      }
+      const data = fs.readFileSync(file);
+      const wait = this.waitOne();
+      await this.vws.sendBuffer(data, { wav_format });
+      await sleep(1000);
+      console.log('File sent:', file);
+      const result: VideoWsResult = (await wait) as any;
+      console.log('Result:', result.text);
+      textList.push({ file, text: result.text });
+      console.log('----------------------');
+    }
+    this.emitter.emit('send-done', textList);
+  }
+}
+// const batchSend = new BatchSendFiles({
+//   vws: ws,
+//   // files: [audioTestPath],
+//   files: [videoTestPath, audioTestPath],
+// });
+// batchSend.init();
+// batchSend.emitter.on('send-done', (data) => {
+//   const matchText = '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。';
+//   const textList = data as { file: string; text: string }[];
+//   for (const item of textList) {
+//     const getText = item.text || '';
+//     const distance = natural.JaroWinklerDistance(getText, matchText);
+//     console.log(`File: ${item.file}, \nText: ${item.text}\nDistance: ${distance}`);
+//   }
+//   // console.log('Batch processing done:', data);
+// });
--- a/package.json
+++ b/package.json
@@ -1,22 +1,16 @@
 {
  "name": "@kevisual/video-tools",
-  "version": "0.0.1",
+  "version": "0.0.5",
  "description": "",
  "main": "index.js",
  "basename": "/root/video-tools",
  "app": {
    "key": "video-tools",
    "entry": "dist/app.mjs",
-    "type": "system-app",
-    "files": [
-      "dist"
-    ]
+    "type": "system-app"
  },
  "scripts": {
-    "watch": "rollup -c rollup.config.mjs -w",
    "build": "rollup -c rollup.config.mjs",
-    "dev": "cross-env NODE_ENV=development nodemon --delay 2.5 -e js,cjs,mjs --exec node dist/app.mjs",
-    "dev:watch": "cross-env NODE_ENV=development concurrently -n \"Watch,Dev\" -c \"green,blue\" \"npm run watch\" \"sleep 1 && npm run dev\" ",
    "dev:bun": "bun run src/dev.ts --watch",
    "test": "tsx  test/**/*.ts",
    "clean": "rm -rf dist",
@@ -30,61 +24,55 @@
  "types": "types/index.d.ts",
  "files": [
    "dist",
-    "src"
+    "src",
+    "examples"
  ],
  "publishConfig": {
    "access": "public"
  },
  "dependencies": {
-    "@kevisual/router": "0.0.10",
-    "@kevisual/use-config": "^1.0.10",
-    "@kevisual/video": "^0.0.1",
-    "@picovoice/porcupine-node": "^3.0.6",
+    "@gradio/client": "^1.15.1",
+    "@kevisual/router": "0.0.21",
+    "@kevisual/use-config": "^1.0.17",
+    "@kevisual/video": "^0.0.2",
    "cookie": "^1.0.2",
+    "crypto-js": "^4.2.0",
    "dayjs": "^1.11.13",
    "eventemitter3": "^5.0.1",
-    "formidable": "^3.5.2",
+    "formidable": "^3.5.4",
    "lodash-es": "^4.17.21",
-    "nanoid": "^5.1.5",
-    "vosk": "^0.3.39"
+    "nanoid": "^5.1.5"
  },
  "devDependencies": {
-    "@kevisual/types": "^0.0.6",
-    "@kevisual/use-config": "^1.0.10",
-    "@rollup/plugin-alias": "^5.1.1",
-    "@rollup/plugin-commonjs": "^28.0.3",
-    "@rollup/plugin-json": "^6.1.0",
-    "@rollup/plugin-node-resolve": "^16.0.1",
-    "@rollup/plugin-replace": "^6.0.2",
-    "@rollup/plugin-typescript": "^12.1.2",
+    "@alicloud/pop-core": "^1.8.0",
+    "@kevisual/logger": "^0.0.4",
+    "@kevisual/types": "^0.0.10",
+    "@kevisual/use-config": "^1.0.17",
    "@types/crypto-js": "^4.2.2",
    "@types/formidable": "^3.4.5",
    "@types/lodash-es": "^4.17.12",
-    "@types/node": "^22.14.1",
+    "@types/node": "^22.15.29",
    "@types/vosk": "^0.3.1",
    "@types/ws": "^8.18.1",
-    "commander": "^13.1.0",
+    "commander": "^14.0.0",
    "concurrently": "^9.1.2",
    "cross-env": "^7.0.3",
    "dotenv": "^16.5.0",
    "inquire": "^0.4.8",
    "ioredis": "^5.6.1",
-    "nodemon": "^3.1.9",
-    "pg": "^8.14.1",
-    "pino": "^9.6.0",
-    "pino-pretty": "^13.0.0",
-    "pm2": "^6.0.5",
+    "nodemon": "^3.1.10",
+    "pg": "^8.16.0",
+    "pm2": "^6.0.6",
    "rimraf": "^6.0.1",
-    "rollup": "^4.40.0",
-    "rollup-plugin-copy": "^3.5.0",
-    "rollup-plugin-dts": "^6.2.1",
-    "rollup-plugin-esbuild": "^6.2.1",
    "sequelize": "^6.37.7",
    "tape": "^5.9.0",
-    "tsup": "^8.4.0",
-    "tsx": "^4.19.3",
+    "tsx": "^4.19.4",
    "typescript": "^5.8.3",
    "ws": "npm:@kevisual/ws"
  },
-  "packageManager": "pnpm@10.8.1"
+  "exports": {
+    "./src/*": "./src/*",
+    "./examples/*": "./examples/*"
+  },
+  "packageManager": "pnpm@10.11.1"
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/rollup.config.mjs
+++ b/rollup.config.mjs
@@ -1,75 +0,0 @@
-import resolve from '@rollup/plugin-node-resolve';
-import commonjs from '@rollup/plugin-commonjs';
-import json from '@rollup/plugin-json';
-import path from 'path';
-import esbuild from 'rollup-plugin-esbuild';
-import alias from '@rollup/plugin-alias';
-import replace from '@rollup/plugin-replace';
-import pkgs from './package.json' with {type: 'json'};
-
-const isDev = process.env.NODE_ENV === 'development';
-const input = isDev ? './src/dev.ts' : './src/main.ts';
-/**
- * @type {import('rollup').RollupOptions}
- */
-const config = {
-  input,
-  output: {
-    dir: './dist',
-    entryFileNames: 'app.mjs',
-    chunkFileNames: '[name]-[hash].mjs',
-    format: 'esm',
-  },
-  plugins: [
-    replace({
-      preventAssignment: true, // 防止意外赋值
-      DEV_SERVER: JSON.stringify(isDev), // 替换 process.env.NODE_ENV
-      APP_VERSION: JSON.stringify(pkgs.version),
-    }),
-    alias({
-      // only esbuild needs to be configured
-      entries: [
-        { find: '@', replacement: path.resolve('src') }, // 配置 @ 为 src 目录
-        { find: 'http', replacement: 'node:http' },
-        { find: 'https', replacement: 'node:https' },
-        { find: 'fs', replacement: 'node:fs' },
-        { find: 'path', replacement: 'node:path' },
-        { find: 'crypto', replacement: 'node:crypto' },
-        { find: 'zlib', replacement: 'node:zlib' },
-        { find: 'stream', replacement: 'node:stream' },
-        { find: 'net', replacement: 'node:net' },
-        { find: 'tty', replacement: 'node:tty' },
-        { find: 'tls', replacement: 'node:tls' },
-        { find: 'buffer', replacement: 'node:buffer' },
-        { find: 'timers', replacement: 'node:timers' },
-        // { find: 'string_decoder', replacement: 'node:string_decoder' },
-        { find: 'dns', replacement: 'node:dns' },
-        { find: 'domain', replacement: 'node:domain' },
-        { find: 'os', replacement: 'node:os' },
-        { find: 'events', replacement: 'node:events' },
-        { find: 'url', replacement: 'node:url' },
-        { find: 'assert', replacement: 'node:assert' },
-        { find: 'util', replacement: 'node:util' },
-      ],
-    }),
-    resolve({
-      preferBuiltins: true, // 强制优先使用内置模块
-    }),
-    commonjs(),
-    esbuild({
-      target: 'node22', //
-      minify: false, // 启用代码压缩
-      tsconfig: 'tsconfig.json',
-    }),
-    json(),
-  ],
-  external: [
-    /@kevisual\/router(\/.*)?/, //, // 路由
-    /@kevisual\/use-config(\/.*)?/, //
-
-    'sequelize', // 数据库 orm
-    'ioredis', // redis
-    'pg', // pg
-  ],
-};
-export default config;
--- a/src/asr/provider/aliyun/aliyun-asr-server.ts
+++ b/src/asr/provider/aliyun/aliyun-asr-server.ts
@@ -0,0 +1,131 @@
+type AliAsrServerOptions = {
+  baseUrl?: string;
+  appkey: string;
+  token: string;
+  format?: string;
+  sampleRate?: string;
+  enablePunctuationPrediction?: boolean;
+  enableInverseTextNormalization?: boolean;
+  enableVoiceDetection?: boolean;
+};
+export class AliAsrServer {
+  private baseUrl: string;
+  private appkey: string;
+  private token: string;
+  private format: string;
+  private sampleRate: string;
+  private enablePunctuationPrediction: boolean;
+  private enableInverseTextNormalization: boolean;
+  private enableVoiceDetection: boolean;
+
+  constructor(opts?: AliAsrServerOptions) {
+    const {
+      baseUrl = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr',
+      appkey = '',
+      token = '',
+      format,
+      sampleRate,
+      enablePunctuationPrediction = true,
+      enableInverseTextNormalization = true,
+      enableVoiceDetection = false,
+    } = opts || {};
+    this.baseUrl = baseUrl;
+    this.appkey = appkey;
+    this.token = token;
+    this.format = format;
+    this.sampleRate = sampleRate;
+    this.enablePunctuationPrediction = enablePunctuationPrediction;
+    this.enableInverseTextNormalization = enableInverseTextNormalization;
+    this.enableVoiceDetection = enableVoiceDetection;
+  }
+
+  buildRequestUrl(): string {
+    const params = new URLSearchParams();
+    params.append('appkey', this.appkey);
+    this.format && params.append('format', this.format);
+    this.sampleRate && params.append('sample_rate', this.sampleRate);
+
+    if (this.enablePunctuationPrediction) {
+      params.append('enable_punctuation_prediction', 'true');
+    }
+
+    if (this.enableInverseTextNormalization) {
+      params.append('enable_inverse_text_normalization', 'true');
+    }
+
+    if (this.enableVoiceDetection) {
+      params.append('enable_voice_detection', 'true');
+    }
+
+    return `${this.baseUrl}?${params.toString()}`;
+  }
+
+  async processAudio(audioContent: Buffer): Promise<any> {
+    try {
+      // 设置请求头
+      const headers = {
+        'X-NLS-Token': this.token,
+        'Content-Type': 'application/octet-stream',
+      };
+
+      // 构建请求URL
+      const requestUrl = this.buildRequestUrl();
+
+      // 发送请求
+      const response = await fetch(requestUrl, {
+        method: 'POST',
+        headers,
+        body: audioContent,
+      });
+
+      // 处理响应
+      if (!response.ok) {
+        console.log(`The audio file recognized failed, http code: ${response.status}`);
+        const v = await response.text();
+        console.log('The audio file recognized response:', v);
+        return null;
+      }
+      // 解析响应体
+      // console.log('The audio file recognized response:', v);
+      const body = await response.json();
+
+      if (body.status === 20000000) {
+        console.log('The audio file recognized result:');
+        console.log(body);
+        console.log('result: ' + body.result);
+        console.log('The audio file recognized succeed!');
+        return body;
+      } else {
+        console.log('The audio file recognized failed!');
+        console.log(body);
+        return null;
+      }
+    } catch (error) {
+      if (error.code === 'ENOENT') {
+        console.log('The audio file does not exist!');
+      } else {
+        console.log('Error during audio processing:', error);
+      }
+      return null;
+    }
+  }
+}
+
+// // 使用示例
+// async function main() {
+//   const asrServer = new AliAsrServer({
+//     appkey: '填入appkey',
+//     token: '填入服务鉴权Token',
+//     format: 'pcm',
+//     sampleRate: '16000',
+//     enablePunctuationPrediction: true,
+//     enableInverseTextNormalization: true,
+//     enableVoiceDetection: false,
+//   });
+
+//   const audioFile = '/path/to/nls-sample-16k.wav';
+//   await asrServer.processAudio(audioFile);
+// }
+
+// // 执行主函数
+// main().catch(console.error);
--- a/src/asr/provider/aliyun/base.ts
+++ b/src/asr/provider/aliyun/base.ts
@@ -0,0 +1,42 @@
+import RPCClient from '@alicloud/pop-core';
+
+interface TokenResponse {
+  Token: {
+    Id: string;
+    ExpireTime: number;
+  };
+}
+type AliCommonOptions = {
+  accessKeyId: string;
+  accessKeySecret: string;
+};
+export class AliCommon {
+  private accessKeyId: string;
+  private accessKeySecret: string;
+  private endpoint: string;
+  private apiVersion: string;
+  token = '';
+  expireTime = 0;
+  constructor(opts?: AliCommonOptions) {
+    this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
+    this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
+    this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
+    this.apiVersion = '2019-02-28';
+  }
+  async getToken() {
+    if (this.token && this.expireTime > Date.now()) {
+      return this.token;
+    }
+    const client = new RPCClient({
+      accessKeyId: this.accessKeyId,
+      accessKeySecret: this.accessKeySecret,
+      endpoint: this.endpoint,
+      apiVersion: this.apiVersion,
+    });
+
+    const result = await client.request<TokenResponse>('CreateToken', {});
+    this.token = result.Token.Id;
+    this.expireTime = result.Token.ExpireTime * 1000;
+    return result.Token.Id;
+  }
+}
--- a/src/asr/provider/aliyun/test/get-text.ts
+++ b/src/asr/provider/aliyun/test/get-text.ts
@@ -0,0 +1,25 @@
+import { AliAsrServer } from '../aliyun-asr-server.ts';
+import fs from 'fs/promises';
+import path from 'path';
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
+const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
+const name = 'output-1746007775571.mp3';
+const videoTestPath2 = path.join(process.cwd(), 'build', name);
+
+// 使用示例
+async function main() {
+  const asrServer = new AliAsrServer({
+    appkey: process.env.ALI_ASR_APP_KEY,
+    token: process.env.ALI_ASR_TOKEN,
+    format: 'mp3',
+    // format: 'wav',
+  });
+
+  const audioContent = await fs.readFile(videoTestPath);
+  await asrServer.processAudio(audioContent);
+}
+
+// 执行主函数
+main().catch(console.error);
--- a/src/asr/provider/aliyun/test/get-token.ts
+++ b/src/asr/provider/aliyun/test/get-token.ts
@@ -0,0 +1,10 @@
+import dotenv from 'dotenv';
+dotenv.config();
+import { AliCommon } from '../base.ts';
+
+const aliCommon = new AliCommon({
+  accessKeyId: process.env.ALIYUN_AK_ID,
+  accessKeySecret: process.env.ALIYUN_AK_SECRET,
+});
+
+aliCommon.getToken().then(console.log);
--- a/src/asr/provider/funasr/test/get-text.ts
+++ b/src/asr/provider/funasr/test/get-text.ts
@@ -3,40 +3,80 @@ import net from 'net';
 import path from 'path';
 import fs from 'fs';

-const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
-const ws = new VideoWS({
-  // url: 'wss://192.168.31.220:10095',
-  url: 'wss://funasr.xiongxiao.me',
-  isFile: true,
-  onConnect: async () => {
-    console.log('onConnect');
-    const data = fs.readFileSync(videoTestPath);
-    let sampleBuf = new Uint8Array(data);
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
+const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
+const videoTestPath3 = path.join(process.cwd(), 'funasr_test.wav');
+const name = 'output-1746007775571.mp3';
+const videoTestPath2 = path.join(process.cwd(), 'build', name);

-    var chunk_size = 960; // for asr chunk_size [5, 10, 5]
-    let totalsend = 0;
-    let len = 0;
-    ws.start();
-    while (sampleBuf.length >= chunk_size) {
-      const sendBuf = sampleBuf.slice(0, chunk_size);
-      totalsend = totalsend + sampleBuf.length;
-      sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
-      if (len === 100) {
-        // ws.stop();
+const url = 'wss://funasr.xiongxiao.me';
+const url5 = 'https://1.15.101.247:10095'; // pro
+// const ws = new VideoWS({
+//   // url: 'wss://192.168.31.220:10095',
+//   url: 'wss://funasr.xiongxiao.me',
+//   isFile: true,
+//   // mode: 'offline',
+//   wav_format: 'mp3',
+//   onConnect: async () => {
+//     console.log('onConnect');
+//     const data = fs.readFileSync(videoTestPath);
+//     let sampleBuf = new Uint8Array(data);
+
+//     var chunk_size = 960; // for asr chunk_size [5, 10, 5]
+//     let totalsend = 0;
+//     let len = 0;
 //     ws.start();
-        await new Promise((resolve) => setTimeout(resolve, 1000));
+//     while (sampleBuf.length >= chunk_size) {
+//       const sendBuf = sampleBuf.slice(0, chunk_size);
+//       totalsend = totalsend + sampleBuf.length;
+//       sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
+//       if (len === 100) {
+//         // ws.stop();
+//         // ws.start();
+//         // await new Promise((resolve) => setTimeout(resolve, 1000));
+//       }
+
+//       await new Promise((resolve) => setTimeout(resolve, 10));
+//       ws.send(sendBuf);
+//       len++;
+//     }
+//     await new Promise((resolve) => setTimeout(resolve, 1000));
+//     ws.stop();
+//     console.log('len', len);
+//   },
+// });
+
+// const server = net.createServer((socket) => {
+//   socket.on('data', (data) => {
+//     console.log('data', data);
+//   });
+// });
+// server.listen(10096);
+
+const ws2 = new VideoWS({
+  url: url5,
+  mode: '2pass',
+  onConnect: async () => {
+    const data = fs.readFileSync(videoTestPath3);
+    // await ws2.sendBuffer(data, { wav_format: 'mp3' });
+    // await new Promise((resolve) => setTimeout(resolve, 1000));
+    // const data2 = fs.readFileSync(videoTestPath2);
+    // await ws2.sendBuffer(data2, { wav_format: 'mp3' });
+    ws2.emitter.on('message', (event) => {
+      console.log('message', event.data);
+    });
+    ws2.emitter.on('result', (result) => {
+      if (result.is_final) {
+        console.log('Final result:', result);
+        process.exit(0);
      }
-      ws.send(sendBuf);
-      len++;
-    }
-    ws.stop();
-    console.log('len', len);
+    });
+    await ws2.start();
+    await ws2.sendBuffer(data, { online: true });
+    setTimeout(() => {
+      ws2.stop();
+    }, 4000);
  },
 });
-
-const server = net.createServer((socket) => {
-  socket.on('data', (data) => {
-    console.log('data', data);
-  });
-});
-server.listen(10096);
--- a/src/asr/provider/funasr/test/recorder.ts
+++ b/src/asr/provider/funasr/test/recorder.ts
@@ -1,35 +1,61 @@
 import { VideoWS } from '../ws.ts';
+import path from 'node:path';
 import net from 'net';
 import { Recording } from '../../../../recorder/index.ts';
 import Stream from 'stream';
+import fs from 'node:fs'; // 新增
+
+const recorder = new Recording({
+  sampleRate: 16000,
+  channels: 1, //
+  audioType: 'wav',
+  threshold: 0,
+  recorder: 'rec',
+  silence: '1.0',
+  endOnSilence: true,
+});
+const writeFilePath = path.join(process.cwd(), 'funasr_test.wav');
+const fileStream = fs.createWriteStream(writeFilePath, { encoding: 'binary' });
+
+const url = 'wss://funasr.xiongxiao.me';
+const url3 = 'wss://pro.xiongxiao.me:10095';
+const url4 = 'wss://121.4.112.18:10095'; // aliyun
+const url5 = 'https://1.15.101.247:10095'; // pro

-const recorder = new Recording();
-const writeStream = new Stream.Writable();
 const ws = new VideoWS({
-  url: 'wss://192.168.31.220:10095',
+  url: url5,
  isFile: false,
+  // mode: 'online',
+  mode: '2pass',
+  wsOptions: {
+    rejectUnauthorized: false,
+  },
  onConnect: async () => {
    console.log('onConnect');
-    let chunks: Buffer = Buffer.alloc(0);
-    var chunk_size = 960; // for asr chunk_size [5, 10, 5]
-    let totalsend = 0;
+    ws.start();
+
+    recorder.start();
    let len = 0;
    recorder.stream().on('data', (chunk) => {
-      chunks = Buffer.concat([chunks, chunk]);
-      if (chunks.length > chunk_size) {
-        ws.send(chunks);
-        totalsend += chunks.length;
-        chunks = Buffer.alloc(0);
-      }
+      // ws.sendBuffer(chunk, { online: true });
+      // console.log('Sending audio chunk:', chunk.length);
+      ws.send(chunk)
+      fileStream.write(chunk); // 新增：将音频数据写入文件
+      len += chunk.length;
    });
-    ws.start();
+
    setTimeout(() => {
      ws.stop();
+      fileStream.end(); // 新增：关闭文件流
      setTimeout(() => {
        process.exit(0);
      }, 1000);
      console.log('len', len);
-    }, 20000);
+    }, 10 * 1000);
+
+    ws.emitter.on('message', (event) => {
+      console.log('message', event.data);
+    });
  },
 });

@@ -38,4 +64,4 @@ const server = net.createServer((socket) => {
    console.log('data', data);
  });
 });
-server.listen(10096);
+server.listen(10097);
--- a/src/asr/provider/funasr/ws.ts
+++ b/src/asr/provider/funasr/ws.ts
@@ -1,6 +1,7 @@
 // import WebSocket from 'ws';
-import { initWs } from '../../../ws-adapter/index.ts';
-
+import { EventEmitter } from 'eventemitter3';
+import { WSServer, WSSOptions } from '../../provider/ws-server.ts';
+export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
 export type VideoWSOptions = {
  url?: string;
  ws?: WebSocket;
@@ -8,10 +9,34 @@ export type VideoWSOptions = {
  mode?: VideoWsMode;
  isFile?: boolean;
  onConnect?: () => void;
+  wav_format?: string;
+  emitter?: EventEmitter;
+} & {
+  wsOptions?: WSSOptions['wsOptions'];
 };
 export const videoWsMode = ['2pass', 'online', 'offline'] as const;
 type VideoWsMode = (typeof videoWsMode)[number];
-
+type OpenRequest = {
+  // 语音分片大小(单位: 毫秒):
+  chunk_size: number[];
+  // 音频文件名:
+  wav_name: string;
+  // 是否正在说话:
+  is_speaking: boolean;
+  // 分片间隔(单位: 毫秒):
+  chunk_interval: number;
+  // 逆文本标准化(ITN):
+  itn: boolean;
+  // 模式:
+  // '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
+  mode: VideoWsMode;
+  // 音频格式:
+  wav_format?: string; // 'wav' - PCM格式, 'mp3' - MP3格式等
+  // 音频采样率(单位: Hz):
+  audio_fs?: number;
+  // 热词列表:
+  hotwords?: string;
+};
 export type VideoWsResult = {
  isFinal: boolean;
  mode: VideoWsMode;
@@ -21,48 +46,21 @@ export type VideoWsResult = {
  wav_name: string;
 };

-export class VideoWS {
-  ws: WebSocket;
+export class VideoWS extends WSServer {
  itn?: boolean;
  mode?: VideoWsMode;
-  isFile?: boolean;
-  onConnect?: () => void;
+  wav_format?: string;
  constructor(options?: VideoWSOptions) {
+    super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
+    this.itn = options?.itn || false;
    this.itn = options?.itn || false;
    this.mode = options?.mode || 'online';
-    this.isFile = options?.isFile || false;
-    this.initWs(options);
-  }
-  async initWs(options: VideoWSOptions) {
-    if (options?.ws) {
-      this.ws = options.ws;
-    } else {
-      this.ws = await initWs(options.url);
-    }
-    this.onConnect = options?.onConnect || (() => {});
-    this.ws.onopen = this.onOpen.bind(this);
-    this.ws.onmessage = this.onMessage.bind(this);
-    this.ws.onerror = this.onError.bind(this);
-    this.ws.onclose = this.onClose.bind(this);
+    this.wav_format = options?.wav_format;
  }

-  async onOpen() {
-    this.onConnect();
-  }
-  async start() {
-    let isFileMode = this.isFile;
+  async start(opts?: Partial<OpenRequest>) {
    const chunk_size = new Array(5, 10, 5);
-    type OpenRequest = {
-      chunk_size: number[];
-      wav_name: string;
-      is_speaking: boolean;
-      chunk_interval: number;
-      itn: boolean;
-      mode: VideoWsMode;
-      wav_format?: string;
-      audio_fs?: number;
-      hotwords?: string;
-    };
+    console.log('start', chunk_size);
    const request: OpenRequest = {
      chunk_size: chunk_size,
      wav_name: 'h5', //
@@ -70,17 +68,14 @@ export class VideoWS {
      chunk_interval: 10,
      itn: this.itn,
      mode: this.mode || 'online',
+      ...opts,
    };
-    console.log('request', request);
-    if (isFileMode) {
-      const file_ext = 'wav';
    const file_sample_rate = 16000;
-      request.wav_format = file_ext;
-      if (file_ext == 'wav') {
+    request.wav_format = request.wav_format || this.wav_format || 'wav';
+    if ('wav' == request.wav_format) {
      request.wav_format = 'PCM';
      request.audio_fs = file_sample_rate;
    }
-    }
    this.ws.send(JSON.stringify(request));
  }
  async stop() {
@@ -99,11 +94,41 @@ export class VideoWS {
      this.ws.send(data);
    }
  }
+  /**
+   * 发送音频数据, 离线
+   * @param data 音频数据
+   * @param opts 选项
+   */
+  async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string; online?: boolean }) {
+    const { wav_format = 'wav', online = false } = opts || {};
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      let sampleBuf = new Uint8Array(data);
+      const ws = this;
+      var chunk_size = 960; // for asr chunk_size [5, 10, 5]
+      let totalsend = 0;
+      let len = 0;
+      if (!online) ws.start({ wav_format });
+      while (sampleBuf.length >= chunk_size) {
+        const sendBuf = sampleBuf.slice(0, chunk_size);
+        totalsend = totalsend + sampleBuf.length;
+        sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
+        await new Promise((resolve) => setTimeout(resolve, 10));
+        ws.send(sendBuf);
+        len++;
+      }
+      if (!online) ws.stop();
+    }
+  }
  async onMessage(event: MessageEvent) {
+    super.onMessage(event);
    const data = event.data;
    try {
      const result = JSON.parse(data.toString());
-      console.log('result', result);
+      if (result?.is_final !== undefined && result?.text) {
+        // console.log('result', result, typeof result);
+        this.emitter.emit('result', result);
+      }
+      // console.log('onMessage-result', result);
    } catch (error) {
      console.log('error', error);
    }
--- a/src/asr/provider/volcengine/asr-ws-big-model-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-big-model-client.ts
@@ -1,5 +1,5 @@
-import * as zlib from 'zlib';
-import { promisify } from 'util';
+import * as zlib from 'node:zlib';
+import { promisify } from 'node:util';
 import { nanoid } from 'nanoid';
 import { VolcEngineBase, uuid } from './base.ts';

@@ -61,6 +61,39 @@ function generateBeforePayload(sequence: number): Buffer {
  return beforePayload;
 }

+export type ParsedMessage = {
+  isLastPackage: boolean;
+  payloadSequence?: number;
+  payloadMsg?: {
+    audio_info?: {
+      duration: number;
+    };
+    result?: {
+      additions?: {
+        log_id?: string;
+      };
+      text?: string;
+      utterances?: Array<{
+        additions?: {
+          fixed_prefix_result?: string;
+        };
+        definite?: boolean;
+        end_time?: number;
+        start_time?: number;
+        text?: string;
+        words?: Array<{
+          end_time: number;
+          start_time: number;
+          text: string;
+        }>;
+      }>;
+    };
+    error?: any;
+  };
+  payloadSize?: number;
+  code?: number;
+  seq?: number;
+};
 /**
 * Parse response from the WebSocket server
 */
@@ -393,10 +426,11 @@ export class AsrWsClient extends VolcEngineBase {
    // Wait for response
    await sendVoice(audioData, segmentSize);
  }
+
  async onMessage(event: MessageEvent) {
    try {
      const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
-      console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
+      // console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
      if (typeof event.data === 'string') {
        throw new Error('event.data is string: ' + event.data);
      }
@@ -405,10 +439,9 @@ export class AsrWsClient extends VolcEngineBase {
        this.emitter.emit('error', parsed);
        this.isError = true;
      }
+      this.emitter.emit('message', parsed);
      if (parsed.isLastPackage) {
        this.emitter.emit('end', parsed);
-      } else {
-        this.emitter.emit('message', parsed);
      }
    } catch (error) {
      console.error('Error processing response:', error);
@@ -440,6 +473,14 @@ export class AsrWsClient extends VolcEngineBase {
      throw error;
    }
  }
+  async setIsEnd(isEnd: boolean) {
+      super.setIsEnd(isEnd);
+      if (isEnd) {
+        // 发送空白包
+        const emptyBuffer = Buffer.alloc(10000);
+        this.sendVoiceStream(emptyBuffer);
+      }
+  }
  /**
   * 发送语音流, 最小10000
   * @param data
--- a/src/asr/provider/volcengine/asr-ws-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-client.ts
@@ -238,7 +238,7 @@ interface AudioItem {
  id: string | number;
  path: string;
 }
-
+// 流式语音识别
 export class AsrWsClient extends VolcEngineBase {
  private audioPath: string;
  private cluster: string;
--- a/src/asr/provider/volcengine/auc.ts
+++ b/src/asr/provider/volcengine/auc.ts
@@ -0,0 +1,136 @@
+// https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts
+import { nanoid } from "nanoid"
+
+export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
+export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
+export const AsrBase = 'volc.bigasr.auc'
+export const AsrTurbo = 'volc.bigasr.auc_turbo'
+
+const uuid = () => nanoid()
+
+type AsrOptions = {
+  url?: string
+  appid?: string
+  token?: string
+  type?: AsrType
+}
+
+type AsrType = 'flash' | 'standard' | 'turbo'
+export class Asr {
+  url: string = FlashURL
+  appid: string = ""
+  token: string = ""
+  type: AsrType = 'flash'
+  constructor(options: AsrOptions = {}) {
+    this.appid = options.appid || ""
+    this.token = options.token || ""
+    this.type = options.type || 'flash'
+    if (this.type !== 'flash') {
+      this.url = AsrBaseURL
+    }
+    if (!this.appid || !this.token) {
+      throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
+    }
+  }
+
+  header() {
+    const model = this.type === 'flash' ? AsrTurbo : AsrBase
+    return {
+      "X-Api-App-Key": this.appid,
+      "X-Api-Access-Key": this.token,
+      "X-Api-Resource-Id": model,
+      "X-Api-Request-Id": uuid(),
+      "X-Api-Sequence": "-1",
+    }
+  }
+  submit(body: AsrRequest) {
+    if (!body.audio || (!body.audio.url && !body.audio.data)) {
+      throw new Error("audio.url or audio.data is required")
+    }
+    const data: AsrRequest = {
+      ...body,
+    }
+    return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
+  }
+  async getText(body: AsrRequest) {
+    const res = await this.submit(body)
+    return res.json()
+  }
+}
+
+export type AsrResponse = {
+  audio_info: {
+    /**
+     * 音频时长，单位为 ms
+     */
+    duration: number;
+  };
+  result: {
+    additions: {
+      duration: string;
+    };
+    text: string;
+    utterances: Array<{
+      end_time: number;
+      start_time: number;
+      text: string;
+      words: Array<{
+        confidence: number;
+        end_time: number;
+        start_time: number;
+        text: string;
+      }>;
+    }>;
+  };
+}
+export interface AsrRequest {
+  user?: {
+    uid: string;
+  };
+  audio: {
+    url?: string;
+    data?: string;
+    format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
+    codec?: 'raw' | 'opus'; 	// raw / opus，默认为 raw(pcm) 。
+    rate?: 8000 | 16000; // 采样率，支持 8000 或 16000，默认为 16000 。
+    channel?: 1 | 2; // 声道数，支持 1 或 2，默认为 1。
+  };
+
+
+  request?: {
+    model_name?: string; // 识别模型名称，如 "bigmodel"
+    enable_words?: boolean; // 是否开启词级别时间戳，默认为 false。
+    enable_sentence_info?: boolean; // 是否开启句子级别时间戳，默认为 false。
+    enable_utterance_info?: boolean; // 是否开启语句级别时间戳，默认为 true。
+    enable_punctuation_prediction?: boolean; // 是否开启标点符号预测，默认为 true。
+    enable_inverse_text_normalization?: boolean; // 是否开启文本规范化，默认为 true。
+    enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别，默认为 false。
+    audio_channel_count?: 1 | 2; // 音频声道数，仅在 enable_separate_recognition_per_channel 开启时有效，支持 1 或 2，默认为 1。
+    max_sentence_silence?: number; // 句子最大静音时间，仅在 enable_sentence_info 开启时有效，单位为 ms，默认为 800。
+    custom_words?: string[];
+    enable_channel_split?: boolean; // 是否开启声道分离
+    enable_ddc?: boolean; // 是否开启 DDC（双通道降噪）
+    enable_speaker_info?: boolean; // 是否开启说话人分离
+    enable_punc?: boolean; // 是否开启标点符号预测（简写）
+    enable_itn?: boolean; // 是否开启文本规范化（简写）
+    vad_segment?: boolean; // 是否开启 VAD 断句
+    show_utterances?: boolean; // 是否返回语句级别结果
+    corpus?: {
+      boosting_table_name?: string;
+      correct_table_name?: string;
+      context?: string;
+    };
+  };
+}
+
+// const main = async () => {
+//   const base64Audio = wavToBase64(audioPath);
+//   const auc = new Asr({
+//     appid: config.VOLCENGINE_AUC_APPID,
+//     token: config.VOLCENGINE_AUC_TOKEN,
+//   });
+//   const result = await auc.getText({ audio: { data: base64Audio } });
+//   console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
+// }
+
+// main();
--- a/src/asr/provider/volcengine/base.ts
+++ b/src/asr/provider/volcengine/base.ts
@@ -1,4 +1,3 @@
-import { initWs } from '../../../ws-adapter/index.ts';
 import { WSServer } from '../../provider/ws-server.ts';
 import { nanoid } from 'nanoid';

--- a/src/asr/provider/volcengine/test/asr-bigmodel.ts
+++ b/src/asr/provider/volcengine/test/asr-bigmodel.ts
@@ -7,15 +7,22 @@ import fs from 'fs';
 const main = async () => {
  const audioId = '123';
  const asrClient = new AsrWsClient({
-    appid: config.APP_ID,
-    token: config.TOKEN,
+    appid: config.VOLCENGINE_ASR_MODEL_APPID,
+    token: config.VOLCENGINE_ASR_MODEL_TOKEN,
  });
+  asrClient.emitter.on('message', (result) => {
+    console.log('识别结果', JSON.stringify(result, null, 2));
+  })
+  asrClient.emitter.on('end', (result) => {
+    console.log('识别结束', JSON.stringify(result, null, 2));
+  })
  await new Promise((resolve) => setTimeout(resolve, 2000));
  const data = fs.readFileSync(audioPath);
  await asrClient.sendVoiceFile(data);
-  await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
+  // await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
  asrClient.setIsEnd(true);
-  await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
+  // await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
+
 };

 main();
--- a/src/asr/provider/volcengine/test/auc.ts
+++ b/src/asr/provider/volcengine/test/auc.ts
@@ -0,0 +1,21 @@
+import { audioPath, config, sleep } from './common.ts';
+
+import { Asr } from '../auc.ts';
+import fs from 'fs';
+import util from 'node:util';
+const wavToBase64 = (filePath: string) => {
+  const data = fs.readFileSync(filePath);
+  return data.toString('base64');
+};
+
+const main = async () => {
+  const base64Audio = wavToBase64(audioPath);
+  const auc = new Asr({
+    appid: config.VOLCENGINE_AUC_APPID,
+    token: config.VOLCENGINE_AUC_TOKEN,
+  });
+  const result = await auc.getText({ audio: { data: base64Audio } });
+  console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
+}
+
+main();
--- a/src/asr/provider/ws-server.ts
+++ b/src/asr/provider/ws-server.ts
@@ -1,7 +1,7 @@
 import { EventEmitter } from 'eventemitter3';
 import { initWs } from '../../ws-adapter/index.ts';
 import type { ClientOptions } from 'ws';
-type WSSOptions = {
+export type WSSOptions = {
  url: string;
  ws?: WebSocket;
  onConnect?: () => void;
@@ -45,7 +45,7 @@ export class WSServer {
   */
  async onOpen() {
    this.connected = true;
-    this.onConnect();
+    this?.onConnect?.();
    this.emitter.emit('open');
  }
  /**
--- a/src/logger/index.ts
+++ b/src/logger/index.ts
@@ -1,37 +1,6 @@
-import { pino } from 'pino';
-import { useConfig } from '@kevisual/use-config/env';
+import { Logger } from '@kevisual/logger/node';

-const config = useConfig();
-
-export const logger = pino({
-  level: config.LOG_LEVEL || 'info',
-  transport: {
-    target: 'pino-pretty',
-    options: {
-      colorize: true,
-      translateTime: 'SYS:standard',
-      ignore: 'pid,hostname',
-    },
-  },
-  serializers: {
-    error: pino.stdSerializers.err,
-    req: pino.stdSerializers.req,
-    res: pino.stdSerializers.res,
-  },
-  // base: {
-  //   app: 'ai-videos',
-  //   env: process.env.NODE_ENV || 'development',
-  // },
+const level = process.env.LOG_LEVEL || 'info';
+export const logger = new Logger({
+  level: level as any,
 });
-
-export const logError = (message: string, data?: any) => logger.error({ data }, message);
-export const logWarning = (message: string, data?: any) => logger.warn({ data }, message);
-export const logInfo = (message: string, data?: any) => logger.info({ data }, message);
-export const logDebug = (message: string, data?: any) => logger.debug({ data }, message);
-
-export const log = {
-  error: logError,
-  warn: logWarning,
-  info: logInfo,
-  debug: logDebug,
-};
--- a/src/recorder/index.ts
+++ b/src/recorder/index.ts
@@ -1,9 +1,8 @@
 import assert from 'assert';
-import { logDebug, logInfo } from '../logger/index.ts';
+import { logger } from '../logger/index.ts';
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
 import recorders from '../recorder/recorders/index.ts';
 import Stream from 'stream';
-
 export type RecordingOptions = {
  /* 采样率，默认为16000 */
  sampleRate?: number;
@@ -64,9 +63,9 @@ export class Recording {
    this.args = args;
    this.cmdOptions = Object.assign({ encoding: 'binary', stdio: 'pipe' }, spawnOptions);

-    logDebug(`Started recording`);
-    logDebug('options', this.options);
-    logDebug(` ${this.cmd} ${this.args.join(' ')}`);
+    logger.debug(`Started recording`);
+    logger.debug('options', this.options);
+    logger.debug(` ${this.cmd} ${this.args.join(' ')}`);

    return this.start();
  }
@@ -92,15 +91,15 @@ Enable debugging with the environment variable DEBUG=record.`,
    });

    err.on('data', (chunk) => {
-      logDebug(`STDERR: ${chunk}`);
+      logger.debug(`STDERR: ${chunk}`);
    });

    rec.on('data', (chunk) => {
-      logDebug(`Recording ${chunk.length} bytes`);
+      logger.debug(`Recording ${chunk.length} bytes`);
    });

    rec.on('end', () => {
-      logDebug('Recording ended');
+      logger.debug('Recording ended');
    });

    return this;
@@ -117,7 +116,7 @@ Enable debugging with the environment variable DEBUG=record.`,

    this.process.kill('SIGSTOP');
    this._stream.pause();
-    logDebug('Paused recording');
+    logger.debug('Paused recording');
  }

  resume() {
@@ -125,7 +124,7 @@ Enable debugging with the environment variable DEBUG=record.`,

    this.process.kill('SIGCONT');
    this._stream.resume();
-    logDebug('Resumed recording');
+    logger.debug('Resumed recording');
  }

  isPaused() {
--- a/src/tts/provider/cosyvoice/test/tts.ts
+++ b/src/tts/provider/cosyvoice/test/tts.ts
@@ -0,0 +1,30 @@
+import { Client } from '@gradio/client';
+import path from 'node:path';
+import fs from 'node:fs';
+
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
+// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
+const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
+const name = 'output-1746007775571.mp3';
+const videoTestPath2 = path.join(process.cwd(), 'build', name);
+const textPath = path.join(process.cwd(), 'build', '01-kevisual.md');
+const exampleAudio = fs.readFileSync(videoTestPath);
+// const exampleAudio = await response_0.blob();
+const text = fs.readFileSync(textPath, 'utf-8');
+const client = await Client.connect('http://192.168.31.220:50000/');
+const result = await client.predict('/generate_audio', {
+  // tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
+  tts_text: text,
+  mode_checkbox_group: '3s极速复刻',
+  sft_dropdown: '',
+  prompt_text: '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。',
+  prompt_wav_upload: exampleAudio,
+  prompt_wav_record: null,
+  instruct_text: '',
+  seed: 3,
+  stream: false,
+  speed: 1,
+});
+
+console.log(result.data);
--- a/src/tts/provider/cosyvoice/tts.ts
+++ b/src/tts/provider/cosyvoice/tts.ts
@@ -0,0 +1,53 @@
+import { Client } from '@gradio/client';
+
+type CosyVoiceTTSOptions = {
+  url: string;
+};
+type AudioOptions = {
+  tts_text: string;
+  mode_checkbox_group: string;
+  sft_dropdown: string;
+  prompt_text: string;
+  prompt_wav_upload?: any;
+  prompt_wav_record: any | null;
+  instruct_text: string;
+  seed: number;
+  stream: boolean;
+  speed: number;
+};
+
+export class CosyVoiceTTS {
+  private client: Client;
+  private url: string;
+  isInit = false;
+  constructor(opts?: CosyVoiceTTSOptions) {
+    this.url = opts?.url || 'http://localhost:50000/';
+  }
+  async init() {
+    const url = this.url;
+    const client = await Client.connect(url);
+    this.client = client;
+    this.isInit = true;
+    return true;
+  }
+  generateAudio = async (opts?: Partial<AudioOptions>) => {
+    if (!this.isInit) {
+      await this.init();
+    }
+    const data: AudioOptions = {
+      tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
+      mode_checkbox_group: '3s极速复刻',
+      sft_dropdown: '',
+      prompt_text: '在一无所知中，梦里的一天结束了一个新的轮回，便会开始。',
+      // prompt_wav_upload: exampleAudio,
+      prompt_wav_record: null,
+      instruct_text: '',
+      seed: 3,
+      stream: false,
+      speed: 1,
+      ...opts,
+    };
+    const result = await this.client.predict('/generate_audio', data);
+    return result;
+  };
+}
--- a/src/wake/test/build.ts
+++ b/src/wake/test/build.ts
@@ -2,7 +2,7 @@ import vosk from 'vosk';
 import { Recording } from '../../recorder/index.ts';
 import fs from 'fs';
 import path from 'path';
-import { audioPath, sleep } from './common.ts';
+import { audioPath, sleep, mySpeechText } from './common.ts';
 import { encodeWav, decodeWav } from '../../utils/convert.ts';
 // 需要先下载Vosk模型
 // const MODEL_PATH = 'vosk-model-small-en-us-0.15';
@@ -21,8 +21,12 @@ async function detectWithVosk(audioFilePath) {
  const wakeWords = ['欢迎']; // 自定义唤醒词列表
  const audioBuffer = fs.readFileSync(audioFilePath);
  const pcmBuffer = decodeWav(audioBuffer);
-  const result = await rec.acceptWaveformAsync(pcmBuffer);
+  const result = rec.acceptWaveform(pcmBuffer);
  console.log('result', result, rec.result());
+
+  // const result = await rec.acceptWaveformAsync(pcmBuffer);
+  // console.log('result', result, rec.result());
+
  // return new Promise((resolve) => {
  //   const pcmBufferLength = Buffer.byteLength(pcmBuffer);
  //   console.log('pcmBufferLength', pcmBufferLength);
@@ -44,6 +48,10 @@ async function detectWithVosk(audioFilePath) {
  // });
 }

-detectWithVosk(audioPath).then((result) => {
+// detectWithVosk(audioPath).then((result) => {
+//   console.log('result', result);
+// });
+
+detectWithVosk(mySpeechText).then((result) => {
  console.log('result', result);
 });
--- a/src/wake/test/common.ts
+++ b/src/wake/test/common.ts
@@ -6,7 +6,13 @@ export const config = dotenv.config({
 }).parsed;

 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
+export const mySpeechText = path.join(process.cwd(), 'videos/my_speech_text.wav');
 export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
 export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');

 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+
+const model_all = 'models/vosk-model-cn-0.22';
+const model_small = 'models/vosk-model-small-cn-0.22';
+export const MODEL_PATH = path.join(process.cwd(), model_small);
+// export const MODEL_PATH = path.join(process.cwd(), model_all);
--- a/src/wake/test/stream.ts
+++ b/src/wake/test/stream.ts
@@ -0,0 +1,178 @@
+import vosk from 'vosk';
+import { Recording } from '../../recorder/index.ts';
+import fs, { WriteStream } from 'fs';
+import path from 'path';
+import { audioPath, sleep, mySpeechText, MODEL_PATH } from './common.ts';
+import { encodeWav, decodeWav } from '../../utils/convert.ts';
+
+const streamText = async (audioFilePath: string) => {
+  if (!fs.existsSync(MODEL_PATH)) {
+    console.error('请先下载Vosk模型');
+    return false;
+  }
+
+  const model = new vosk.Model(MODEL_PATH);
+  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
+
+  const audioBuffer = fs.readFileSync(audioFilePath);
+  const pcmBuffer = decodeWav(audioBuffer);
+
+  for (let i = 0; i < pcmBuffer.length; i += 1024) {
+    const chunk = pcmBuffer.subarray(i, i + 1024);
+    if (rec.acceptWaveform(chunk)) {
+      const result = rec.result();
+      console.log('Streamed Result:', result);
+    } else {
+      const partialResult = rec.partialResult();
+      console.log('Partial Result:', partialResult);
+    }
+    // await sleep(100); // 模拟延时
+  }
+
+  return true;
+};
+
+// 测试流式处理
+// streamText(mySpeechText)
+//   .then((result) => {
+//     console.log('Final Result:', result);
+//   })
+//   .catch((error) => {
+//     console.error('Error during streaming:', error);
+//   });
+
+const record = async () => {
+  const recording = new Recording({
+    sampleRate: 16000,
+    channels: 1,
+  });
+
+  recording.start();
+  const stream = recording.stream();
+  console.log('Recording started...', stream);
+  const model = new vosk.Model(MODEL_PATH);
+  const rec = new vosk.Recognizer({
+    model: model,
+    sampleRate: 16000,
+    grammar: ['你', '好', '小', '嗨', '秀'], // 添加唤醒词
+  });
+  console.log('Vosk Recognizer initialized...');
+
+  // 创建累积缓冲区
+  let accumulatedBuffer = Buffer.alloc(0);
+  const PROCESS_SIZE = 4 * 8192; // 合并大约4个8192字节的块 (可根据需要调整)
+
+  stream.on('data', (data: Buffer) => {
+    // const pcmBuffer = decodeWav(data); // 8192 bytes per chunk
+    const pcmBuffer = data; // 假设数据已经是PCM格式
+
+    // 将新数据追加到累积缓冲区
+    accumulatedBuffer = Buffer.concat([accumulatedBuffer, pcmBuffer]);
+
+    // 当积累的数据足够大时处理它
+    if (accumulatedBuffer.length >= PROCESS_SIZE) {
+      if (rec.acceptWaveform(accumulatedBuffer)) {
+        const result = rec.result();
+        console.log('Recorded Result:', result);
+        // 检查是否包含唤醒词
+        if (result.text) {
+          const detect = detectWakeWord(result.text);
+          if (detect.detected) {
+            console.log(`检测到唤醒词: "${detect.word}"，置信度: ${detect.confidence}`);
+          }
+          // 执行唤醒后的操作
+        }
+      } else {
+        const partialResult = rec.partialResult();
+        console.log('Partial Result:', partialResult);
+      }
+
+      // 清空累积缓冲区
+      accumulatedBuffer = Buffer.alloc(0);
+    }
+  });
+
+  // 添加停止录音的处理
+  stream.on('end', () => {
+    // 处理剩余的缓冲区数据
+    if (accumulatedBuffer.length > 0) {
+      if (rec.acceptWaveform(accumulatedBuffer)) {
+        const result = rec.result();
+        console.log('Final Recorded Result:', result);
+      }
+    }
+
+    // 获取最终结果
+    const finalResult = rec.finalResult();
+    console.log('Final Complete Result:', finalResult);
+
+    // 释放资源
+    rec.free();
+    model.free();
+  });
+
+  // 返回一个用于停止录音的函数
+  return {
+    stop: () => {
+      recording.stop();
+    },
+  };
+};
+// 添加唤醒配置
+const wakeConfig = {
+  words: ['你好小小', '嗨小小', '小小', '秀秀'],
+  threshold: 0.75, // 匹配置信度阈值
+  minWordCount: 2, // 最小词数
+};
+// 优化唤醒词检测
+function detectWakeWord(text: string): { detected: boolean; confidence: number; word: string } {
+  if (!text || text.length < wakeConfig.minWordCount) return { detected: false, confidence: 0, word: '' };
+
+  let bestMatch = { detected: false, confidence: 0, word: '' };
+
+  for (const wakeWord of wakeConfig.words) {
+    // 计算文本与唤醒词的相似度
+    const confidence = calculateSimilarity(text.toLowerCase(), wakeWord.toLowerCase());
+    console.log(`检测到唤醒词 "${wakeWord}" 的相似度: ${confidence}`);
+    if (confidence > wakeConfig.threshold && confidence > bestMatch.confidence) {
+      bestMatch = { detected: true, confidence, word: wakeWord };
+    }
+  }
+
+  return bestMatch;
+}
+
+// 简单的字符串相似度计算函数
+function calculateSimilarity(str1: string, str2: string): number {
+  if (str1.includes(str2)) return 1.0;
+
+  // 计算莱文斯坦距离的简化版本
+  const longer = str1.length > str2.length ? str1 : str2;
+  const shorter = str1.length > str2.length ? str2 : str1;
+
+  // 如果短字符串为空，相似度为0
+  if (shorter.length === 0) return 0;
+
+  // 简单的相似度计算 - 可以替换为更复杂的算法
+  let matchCount = 0;
+  for (let i = 0; i <= longer.length - shorter.length; i++) {
+    const segment = longer.substring(i, i + shorter.length);
+    let localMatches = 0;
+    for (let j = 0; j < shorter.length; j++) {
+      if (segment[j] === shorter[j]) localMatches++;
+    }
+    matchCount = Math.max(matchCount, localMatches);
+  }
+
+  return matchCount / shorter.length;
+}
+// 启动录音并在适当的时候停止
+(async () => {
+  const recorder = await record();
+
+  // 可选：30秒后自动停止录音
+  setTimeout(() => {
+    console.log('Stopping recording...');
+    recorder.stop();
+  }, 10 * 30 * 1000);
+})();
--- a/src/ws-adapter/index.ts
+++ b/src/ws-adapter/index.ts
@@ -1,6 +1,15 @@
-const isBrowser = process?.env?.BROWSER === 'true';
-import { EventEmitter } from 'events';
-
+const isBrowser = (typeof process === 'undefined') || 
+  (typeof window !== 'undefined' && typeof window.document !== 'undefined') || 
+  (typeof process !== 'undefined' && process?.env?.BROWSER === 'true');
+const chantHttpToWs = (url: string) => {
+  if (url.startsWith('http://')) {
+    return url.replace('http://', 'ws://');
+  }
+  if (url.startsWith('https://')) {
+    return url.replace('https://', 'wss://');
+  }
+  return url;
+};
 type WebSocketOptions = {
  /**
   * 是否拒绝不安全的证书, in node only
@@ -11,13 +20,14 @@ type WebSocketOptions = {
 };
 export const initWs = async (url: string, options?: WebSocketOptions) => {
  let ws: WebSocket;
+  url = chantHttpToWs(url);
  if (isBrowser) {
    ws = new WebSocket(url);
  } else {
    const WebSocket = await import('ws').then((module) => module.default);
    const { rejectUnauthorized, headers, ...rest } = options || {};
    ws = new WebSocket(url, {
-      rejectUnauthorized: rejectUnauthorized || true,
+      rejectUnauthorized: rejectUnauthorized ?? true,
      headers: headers,
      ...rest,
    }) as any;
@@ -30,12 +40,3 @@ interface EventEmitterOptions {
   */
  captureRejections?: boolean | undefined;
 }
-/**
- * 初始化一个事件发射器
- * @param opts 事件发射器选项
- * @returns 事件发射器
- */
-export const initEmitter = (opts?: EventEmitterOptions) => {
-  const emitter = new EventEmitter(opts);
-  return emitter;
-};
--- a/tsup.config.mjs
+++ b/tsup.config.mjs
@@ -1,41 +0,0 @@
-import { defineConfig } from 'tsup';
-// import glob from 'fast-glob';
-// const services = glob.sync('src/services/*.ts');
-import fs from 'fs';
-
-const clean = () => {
-  const distDir = 'dist';
-  if (fs.existsSync(distDir)) {
-    fs.rmSync(distDir, { recursive: true, force: true });
-  }
-};
-clean();
-
-const entrys = ['src/index.ts'];
-const nodeEntrys = ['src/dev.ts'];
-
-const getCommonConfig = (opts = {}) => {
-  return {
-    entry: opts.entry,
-    outExtension: ({ format }) => ({
-      js: format === 'esm' ? '.mjs' : '.js',
-    }),
-    splitting: false,
-    sourcemap: false,
-    // clean: true,
-    format: 'esm',
-    external: ['dotenv'],
-    dts: true,
-    outDir: 'dist',
-    tsconfig: 'tsconfig.json',
-    ...opts,
-    define: {
-      'process.env.IS_BROWSER': JSON.stringify(process.env.BROWSER || false),
-      ...opts.define,
-    },
-  };
-};
-export default defineConfig([
-  // getCommonConfig({ entry: entrys, define: { 'process.env.IS_BROWSER': JSON.stringify(true) } }), // 浏览器
-  getCommonConfig({ entry: nodeEntrys, define: { 'process.env.IS_BROWSER': JSON.stringify(false) } }), // node
-]);
--- a/videos/my_speech_text.txt
+++ b/videos/my_speech_text.txt
@@ -0,0 +1 @@
+在一无所知中，梦里的一天结束了一个新的轮回，便会开始。
--- a/videos/my_speech_text.wav
+++ b/videos/my_speech_text.wav
Author	SHA1	Message	Date
xiongxiao	9e94a4d898	update	2025-10-14 23:04:59 +08:00
熊潇	d4475cb2f2	更新 src/asr/provider/volcengine/auc.ts	2025-10-14 22:52:24 +08:00
xiongxiao	5603d09e80	update	2025-10-13 22:13:19 +08:00
xiongxiao	78cc6dcf55	update	2025-10-03 18:43:57 +08:00
abearxiong	8047577165	temp test	2025-08-23 22:34:36 +08:00
abearxiong	e4596b4fde	add batch send file to get text	2025-06-23 18:34:54 +08:00
abearxiong	767e436eb8	fix: fix ws	2025-06-23 10:38:01 +08:00
abearxiong	203fa1f103	fix:	2025-06-22 15:18:44 +08:00
abearxiong	87769076c8	fix: add src code	2025-06-22 13:34:34 +08:00
abearxiong	4a9568447e	remove some dependencies	2025-06-22 12:46:24 +08:00
abearxiong	b3b64ec59c	bump version	2025-06-04 10:09:49 +08:00
abearxiong	232d799575	"feat: 更新ASR服务连接配置，优化录音流处理及模型路径"	2025-06-02 12:38:53 +08:00
abearxiong	e638d7907a	test	2025-05-24 00:10:21 +08:00
abearxiong	38b4e58124	add txt	2025-05-20 12:17:52 +08:00
abearxiong	776e0800e9	tts for cosyvoice and funasr and aliyun	2025-05-20 00:39:21 +08:00
abearxiong	54da76bf9d	阿里云一句话识别	2025-05-19 01:44:24 +08:00
abearxiong	a1df51f56b	fix funasr	2025-05-19 01:01:38 +08:00
				`@@ -0,0 +1 @@`
				`在一无所知中，梦里的一天结束了一个新的轮回，便会开始。`