temp: add test

2025-04-18 18:28:34 +08:00 · 2025-04-18 18:28:34 +08:00 · fdc3985b93
commit fdc3985b93
parent d92b93c6f9
22 changed files with 669 additions and 2082 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,3 +15,5 @@ logs

 .env*
 !.env.example
+
+models
--- a/package.json
+++ b/package.json
@ -36,16 +36,17 @@
    "access": "public"
  },
  "dependencies": {
-    "@kevisual/code-center-module": "0.0.18",
-    "@kevisual/mark": "0.0.7",
    "@kevisual/router": "0.0.10",
    "@kevisual/use-config": "^1.0.10",
+    "@kevisual/video": "^0.0.1",
+    "@picovoice/porcupine-node": "^3.0.6",
    "cookie": "^1.0.2",
    "dayjs": "^1.11.13",
+    "eventemitter3": "^5.0.1",
    "formidable": "^3.5.2",
    "lodash-es": "^4.17.21",
    "nanoid": "^5.1.5",
-    "node-record-lpcm16": "^1.0.1"
+    "vosk": "^0.3.39"
  },
  "devDependencies": {
    "@kevisual/types": "^0.0.6",
@ -59,22 +60,22 @@
    "@types/crypto-js": "^4.2.2",
    "@types/formidable": "^3.4.5",
    "@types/lodash-es": "^4.17.12",
-    "@types/node": "^22.14.0",
+    "@types/node": "^22.14.1",
+    "@types/vosk": "^0.3.1",
    "@types/ws": "^8.18.1",
    "commander": "^13.1.0",
    "concurrently": "^9.1.2",
    "cross-env": "^7.0.3",
    "dotenv": "^16.5.0",
    "inquire": "^0.4.8",
-    "ioredis": "^5.6.0",
-    "jsrepo": "^1.45.3",
+    "ioredis": "^5.6.1",
    "nodemon": "^3.1.9",
    "pg": "^8.14.1",
    "pino": "^9.6.0",
    "pino-pretty": "^13.0.0",
    "pm2": "^6.0.5",
    "rimraf": "^6.0.1",
-    "rollup": "^4.39.0",
+    "rollup": "^4.40.0",
    "rollup-plugin-copy": "^3.5.0",
    "rollup-plugin-dts": "^6.2.1",
    "rollup-plugin-esbuild": "^6.2.1",
@ -85,5 +86,5 @@
    "typescript": "^5.8.3",
    "ws": "^8.18.1"
  },
-  "packageManager": "pnpm@10.7.1"
+  "packageManager": "pnpm@10.8.1"
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/src/asr/provider/funasr/ws.ts
+++ b/src/asr/provider/funasr/ws.ts
@ -1,16 +1,16 @@
 // import WebSocket from 'ws';
-import { initWs } from '../../ws/index.ts';
+import { initWs } from '../../ws-adapter/index.ts';

-type VideoWSOptions = {
+export type VideoWSOptions = {
  url?: string;
  ws?: WebSocket;
  itn?: boolean;
-  mode?: string;
+  mode?: VideoWsMode;
  isFile?: boolean;
  onConnect?: () => void;
 };
-export const VideoWsMode = ['2pass', 'online', 'offline'];
-type VideoWsMode = (typeof VideoWsMode)[number];
+export const videoWsMode = ['2pass', 'online', 'offline'] as const;
+type VideoWsMode = (typeof videoWsMode)[number];

 export type VideoWsResult = {
  isFinal: boolean;
--- a/src/asr/provider/provider.ts
+++ b/src/asr/provider/provider.ts
--- a/src/asr/provider/volcengine/asr-ws-big-model-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-big-model-client.ts
@ -1,8 +1,4 @@
-import * as fs from 'fs/promises';
-import * as path from 'path';
 import * as zlib from 'zlib';
-import * as util from 'util';
-import { Readable } from 'stream';
 import { promisify } from 'util';
 import { nanoid } from 'nanoid';
 import { VolcEngineBase, uuid } from './base.ts';
@ -254,7 +250,6 @@ interface AudioItem {
 * ASR WebSocket Client
 */
 export class AsrWsClient extends VolcEngineBase {
-  private audioPath: string;
  private successCode: number = 1000;
  private segDuration: number;
  private format: string;
@ -262,16 +257,14 @@ export class AsrWsClient extends VolcEngineBase {
  private bits: number;
  private channel: number;
  private codec: string;
-  private authMethod: string;
  private hotWords: string[] | null;
-  private streaming: boolean;
  private mp3SegSize: number;
  private reqEvent: number = 1;
  private uid: string;
  private seq: number = 1;
  private hasSendFullClientRequest: boolean = false;

-  constructor(audioPath: string, options: AsrClientOptions = {}) {
+  constructor(options: AsrClientOptions = {}) {
    super({
      url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v3/sauc/bigmodel',
      onConnect: () => this.onWsConnect(),
@ -285,22 +278,19 @@ export class AsrWsClient extends VolcEngineBase {
      },
    });

-    this.audioPath = audioPath;
-    this.segDuration = options.segDuration || 100;
+    this.segDuration = options.segDuration || 200;
    this.uid = options.uid || 'test';
    this.format = options.format || 'wav';
    this.rate = options.rate || 16000;
    this.bits = options.bits || 16;
    this.channel = options.channel || 1;
    this.codec = options.codec || 'raw';
-    this.authMethod = options.authMethod || 'none';
    this.hotWords = options.hotWords || null;
-    this.streaming = options.streaming !== undefined ? options.streaming : true;
    this.mp3SegSize = options.mp3SegSize || 1000;
  }

  private onWsConnect() {
-    console.log('ASR WebSocket connected');
+    console.log('ASR Big Model WebSocket connected');
  }

  /**
@ -321,6 +311,7 @@ export class AsrWsClient extends VolcEngineBase {
      request: {
        model_name: 'bigmodel',
        enable_punc: true,
+        result_type: 'single', // all, single
      },
    };
  }
@ -357,90 +348,59 @@ export class AsrWsClient extends VolcEngineBase {
  private async segmentDataProcessor(audioData: Buffer, segmentSize: number): Promise<any> {
    await this.sendFullClientRequest();
    const that = this;
-    // Wait for response
-    const result = await new Promise<any>((resolve, reject) => {
-      const onMessage = async (event: MessageEvent) => {
-        try {
-          const response = parseResponse(Buffer.from(event.data as ArrayBuffer));
-          console.log('Initial response:', response);
-
-          // Process audio chunks
+    const sendVoice = async (audioData: Buffer, segmentSize: number) => {
+      that.setCanSend(false);
      for (const [chunk, last] of sliceData(audioData, segmentSize)) {
        that.seq += 1;
-            if (last) {
+        const isEnd = that.isEnd && last; // 结束了，而且是语音的最后一包
+        if (isEnd) {
          that.seq = -that.seq;
        }
        const seq = that.seq;
-
-            const start = Date.now();
        const compressedChunk = await gzipPromise(chunk);

        const messageType = AUDIO_ONLY_REQUEST;
-            const flags = last ? NEG_WITH_SEQUENCE : POS_SEQUENCE;
+        const flags = isEnd ? NEG_WITH_SEQUENCE : POS_SEQUENCE;

        const audioRequest = Buffer.concat([generateHeader(messageType, flags), generateBeforePayload(seq), Buffer.alloc(4), compressedChunk]);

        // Set payload size
        audioRequest.writeUInt32BE(compressedChunk.length, 8);
-
        // Send audio chunk
-            (this as any).ws.send(audioRequest);
-
-            // Wait for each response
-            const chunkResponse = await new Promise<any>((resolveChunk) => {
-              const onChunkMessage = (chunkEvent: MessageEvent) => {
-                (this as any).ws.removeEventListener('message', onChunkMessage);
-                const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
-                console.log(`Seq ${seq} response:`, parsed);
-                resolveChunk(parsed);
+        this.ws.send(audioRequest);
+        // 待测试， 是否需要等待
+        // const chunkResponse = await new Promise<any>((resolveChunk) => {
+        //   const onChunkMessage = (chunkEvent: MessageEvent) => {
+        //     (this as any).ws.removeEventListener('message', onChunkMessage);
+        //     const parsed = parseResponse(Buffer.from(chunkEvent.data as ArrayBuffer));
+        //     resolveChunk(parsed);
+        //   };
+        //   (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
+        // });
+        // if (last) {
+        //   console.log('last', JSON.stringify(chunkResponse));
+        //   break;
+        // }
+      }
+      that.setCanSend(true);
    };
-
-              (this as any).ws.addEventListener('message', onChunkMessage, { once: true });
-            });
-
-            // If streaming, add delay to simulate real-time
-            if (this.streaming) {
-              const elapsed = Date.now() - start;
-              const sleepTime = Math.max(0, this.segDuration - elapsed);
-              await new Promise((r) => setTimeout(r, sleepTime));
+    // Wait for response
+    await sendVoice(audioData, segmentSize);
  }
-
-            // If this is the last chunk, resolve with final result
-            if (last) {
-              resolve(chunkResponse);
-              break;
+  async onMessage(event: MessageEvent) {
+    try {
+      const parsed = parseResponse(Buffer.from(event.data as ArrayBuffer));
+      console.log(`Seq ${parsed.payloadSequence} response:`, parsed);
+      // console.log('parsed', parsed.payloadSequence, parsed.payloadMsg.result.text);
+      if (parsed.isLastPackage) {
+        this.emitter.emit('end', parsed);
      }
-          }
-
-          (this as any).ws.removeEventListener('message', onMessage);
    } catch (error) {
      console.error('Error processing response:', error);
-          reject(error);
    }
-      };
-
-      (this as any).ws.addEventListener('message', onMessage, { once: true });
-
-      (this as any).ws.addEventListener(
-        'error',
-        (error) => {
-          console.error('WebSocket error:', error);
-          reject(error);
-        },
-        { once: true },
-      );
-    });
-
-    return result;
  }
-
-  /**
-   * Execute ASR on the audio file
-   */
-  public async execute(): Promise<any> {
+  public async sendVoiceFile(data: Buffer) {
    try {
-      const data = await fs.readFile(this.audioPath);
-
      if (this.format === 'mp3') {
        const segmentSize = this.mp3SegSize;
        return await this.segmentDataProcessor(data, segmentSize);
@ -465,65 +425,9 @@ export class AsrWsClient extends VolcEngineBase {
      throw error;
    }
  }
-
-  /**
-   * Send OPUS data for processing
-   */
-  public async sendOpusData(audioData: Buffer): Promise<any> {
-    const segmentSize = Math.floor((this.rate * 2 * this.channel * this.segDuration) / 500);
-    return await this.segmentDataProcessor(audioData, segmentSize);
+  public async sendVoiceStream(data: Buffer) {
+    const segmentSize = Buffer.byteLength(data);
+    console.log('segmentSize', segmentSize);
+    return await this.segmentDataProcessor(data, segmentSize);
  }
 }
-
-/**
- * Execute ASR on a single audio file
- */
-export async function executeOne(audioItem: AudioItem, options: AsrClientOptions = {}): Promise<any> {
-  if (!audioItem.id || !audioItem.path) {
-    throw new Error('Audio item must have id and path properties');
-  }
-
-  const audioId = audioItem.id;
-  const audioPath = path.resolve(process.cwd(), audioItem.path);
-
-  const asrClient = new AsrWsClient(audioPath, options);
-  await new Promise((resolve) => setTimeout(resolve, 2000));
-
-  return asrClient.execute().then((result) => {
-    return {
-      id: audioId,
-      path: audioPath,
-      result: result,
-    };
-  });
-}
-
-/**
- * Test stream processing
- */
-export const testStream = async () => {
-  console.log('测试流式');
-  const audioPath = 'videos/asr_example.wav';
-
-  const res = await executeOne({
-    id: 1,
-    path: audioPath,
-  })
-    .then((result) => {
-      console.log('====end test=====');
-      console.log(result);
-      return result;
-    })
-    .catch((error) => {
-      console.error('Test error:', error);
-      return '';
-    });
-};
-
-/**
- * Handle audio data directly
- */
-export async function handleAudioData(audioData: Buffer, options: AsrClientOptions = {}): Promise<any> {
-  const asrClient = new AsrWsClient('', options);
-  return await asrClient.sendOpusData(audioData);
-}
--- a/src/asr/provider/volcengine/asr-ws-client.ts
+++ b/src/asr/provider/volcengine/asr-ws-client.ts
@ -265,7 +265,6 @@ export class AsrWsClient extends VolcEngineBase {
    super({
      url: options.wsUrl || 'wss://openspeech.bytedance.com/api/v2/asr',
      onConnect: () => this.onWsConnect(),
-      enabled: false,
      wsOptions: {
        headers: {
          Authorization: `Bearer; ${options.token}`,
@ -329,13 +328,6 @@ export class AsrWsClient extends VolcEngineBase {
      },
    };
  }
-  /**
-   * Generate headers for authentication
-   */
-  private tokenAuth(): Record<string, string> {
-    return { Authorization: `Bearer; ${this.token}` };
-  }
-
  /**
   * Process audio data in segments
   */
--- a/src/asr/provider/volcengine/base.ts
+++ b/src/asr/provider/volcengine/base.ts
@ -1,5 +1,5 @@
-import { initWs } from '../../ws/index.ts';
-import { WSServer } from '../ws-server.ts';
+import { initWs } from '../../ws-adapter/index.ts';
+import { WSServer } from '../../provider/ws-server.ts';
 import { nanoid } from 'nanoid';

 export const uuid = () => nanoid(16);
@ -7,7 +7,6 @@ export const uuid = () => nanoid(16);
 type VolcEngineBaseOptions = {
  url?: string;
  ws?: WebSocket;
-  enabled?: boolean;
  onConnect?: () => void;
  wsOptions?: {
    headers?: {
@ -20,17 +19,38 @@ type VolcEngineBaseOptions = {
  };
 };
 export class VolcEngineBase extends WSServer {
+  canSend = false;
+  isEnd: boolean = false;
+
  constructor(opts: VolcEngineBaseOptions) {
    super({
      url: opts.url,
      ws: opts.ws,
      onConnect: opts.onConnect,
      wsOptions: opts.wsOptions,
-      enabled: opts.enabled,
    });
  }
  async onOpen() {
    console.log('VolcEngineBase onOpen');
    // 发送认证信息
  }
+  async isCanSend() {
+    if (this.canSend) {
+      return true;
+    }
+    return new Promise((resolve) => {
+      this.emitter.once('canSend', () => {
+        resolve(true);
+      });
+    });
+  }
+  async setCanSend(canSend: boolean) {
+    this.canSend = canSend;
+    if (canSend) {
+      this.emitter.emit('canSend', canSend);
+    }
+  }
+  async setIsEnd(isEnd: boolean) {
+    this.isEnd = isEnd;
+  }
 }
--- a/src/asr/provider/volcengine/test/asr-bigmodel.ts
+++ b/src/asr/provider/volcengine/test/asr-bigmodel.ts
@ -1,28 +1,21 @@
-import { AsrWsClient, testStream } from '../asr-ws-big-model-client.ts';
-import { audioPath, config } from './common.ts';
-
+import { AsrWsClient } from '../asr-ws-big-model-client.ts';
+import { audioPath, audioPath2, blankAudioPath, config } from './common.ts';
+import fs from 'fs';
 // const asr = new AsrWsClient('videos/asr_example.wav');

 // tsx src/asr/provider/volcengine/test/asr-bigmodel.ts
 const main = async () => {
  const audioId = '123';
-  const asrClient = new AsrWsClient(audioPath, {
+  const asrClient = new AsrWsClient({
    appid: config.APP_ID,
    token: config.TOKEN,
-    streaming: false,
  });
  await new Promise((resolve) => setTimeout(resolve, 2000));
-
-  return asrClient.execute().then((result) => {
-    return {
-      id: audioId,
-      path: audioPath,
-      result: result,
-    };
-  });
-};
-const main2 = async () => {
-  testStream();
+  const data = fs.readFileSync(audioPath);
+  await asrClient.sendVoiceFile(data);
+  await asrClient.sendVoiceFile(fs.readFileSync(blankAudioPath));
+  asrClient.setIsEnd(true);
+  await asrClient.sendVoiceFile(fs.readFileSync(audioPath2));
 };

 main();
--- a/src/asr/provider/volcengine/test/common.ts
+++ b/src/asr/provider/volcengine/test/common.ts
@ -6,5 +6,7 @@ export const config = dotenv.config({
 }).parsed;

 export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
+export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
+export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');

 export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
--- a/src/asr/provider/volcengine/test/recorder.ts
+++ b/src/asr/provider/volcengine/test/recorder.ts
@ -0,0 +1,39 @@
+import { AsrWsClient } from '../asr-ws-big-model-client.ts';
+
+import { audioPath, config, sleep } from '../test/common.ts';
+
+import net from 'net';
+import { Recording } from '../../../../recorder/index.ts';
+import Stream from 'stream';
+
+const recorder = new Recording();
+
+const asrClient = new AsrWsClient({
+  appid: config.APP_ID,
+  token: config.TOKEN,
+});
+// tsx src/asr/provider/volcengine/test/recorder.ts
+
+const main = async () => {
+  // await asrClient.sendVoiceFile(fs.readFileSync(audioPath));
+  const send = (data: Buffer) => {
+    asrClient.sendVoiceStream(data);
+  };
+  let chunks: Buffer = Buffer.alloc(0);
+  var chunk_size = 960; // for asr chunk_size [5, 10, 5]
+  let totalsend = 0;
+  recorder.stream().on('data', (chunk) => {
+    chunks = Buffer.concat([chunks, chunk]);
+    const chunkSize = Buffer.byteLength(chunks);
+    if (chunkSize > chunk_size) {
+      send(chunks);
+      totalsend += chunks.length;
+      chunks = Buffer.alloc(0);
+    }
+  });
+  // setTimeout(() => {
+  //   recorder.stop();
+  // }, 10000);
+};
+
+main();
--- a/src/asr/provider/ws-server.ts
+++ b/src/asr/provider/ws-server.ts
@ -1,41 +1,77 @@
-import { initWs } from '../ws/index.ts';
+import { EventEmitter } from 'eventemitter3';
+import { initWs } from '../ws-adapter/index.ts';
 import type { ClientOptions } from 'ws';
 type WSSOptions = {
  url: string;
  ws?: WebSocket;
  onConnect?: () => void;
  wsOptions?: ClientOptions;
-  enabled?: boolean;
+  emitter?: EventEmitter;
 };
 export class WSServer {
  ws: WebSocket;
  onConnect?: () => void;
+  connected: boolean;
+  emitter: EventEmitter;
  constructor(opts: WSSOptions) {
    this.initWs(opts);
  }
  async initWs(opts: WSSOptions) {
-    const enabled = opts.enabled || true;
    if (opts.ws) {
      this.ws = opts.ws;
-    } else if (enabled) {
-      this.ws = await initWs(opts.url, opts.wsOptions);
    }
+    this.emitter = opts.emitter || new EventEmitter();
+    this.ws = await initWs(opts.url, opts.wsOptions);
    this.onConnect = opts?.onConnect || (() => {});
    this.ws.onopen = this.onOpen.bind(this);
    this.ws.onmessage = this.onMessage.bind(this);
    this.ws.onerror = this.onError.bind(this);
    this.ws.onclose = this.onClose.bind(this);
  }
+  /**
+   * 连接成功 ws 事件
+   */
  async onOpen() {
+    this.connected = true;
    this.onConnect();
+    this.emitter.emit('open');
  }
+  /**
+   * 检查是否连接
+   * @returns
+   */
+  async isConnected() {
+    if (this.connected) {
+      return true;
+    }
+    return new Promise((resolve) => {
+      this.emitter.once('open', () => {
+        resolve(true);
+      });
+    });
+  }
+  /**
+   * 收到消息 ws 事件
+   * @param event
+   */
  async onMessage(event: MessageEvent) {
    // console.log('WSS onMessage', event);
+    this.emitter.emit('message', event);
  }
+  /**
+   * ws 错误事件
+   * @param event
+   */
  async onError(event: Event) {
    console.error('WSS onError');
+    this.emitter.emit('error', event);
  }
+  /**
+   * ws 关闭事件
+   * @param event
+   */
  async onClose(event: CloseEvent) {
    console.error('WSS onClose');
+    this.emitter.emit('close', event);
  }
 }
--- a/src/asr/ws-adapter/browser.ts
+++ b/src/asr/ws-adapter/browser.ts
--- a/src/asr/ws-adapter/index.ts
+++ b/src/asr/ws-adapter/index.ts
@ -1,4 +1,5 @@
 const isBrowser = process?.env?.BROWSER === 'true';
+import { EventEmitter } from 'events';

 type WebSocketOptions = {
  /**
@ -23,3 +24,18 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
  }
  return ws;
 };
+interface EventEmitterOptions {
+  /**
+   * Enables automatic capturing of promise rejection.
+   */
+  captureRejections?: boolean | undefined;
+}
+/**
+ * 初始化一个事件发射器
+ * @param opts 事件发射器选项
+ * @returns 事件发射器
+ */
+export const initEmitter = (opts?: EventEmitterOptions) => {
+  const emitter = new EventEmitter(opts);
+  return emitter;
+};
--- a/src/asr/ws-adapter/node.ts
+++ b/src/asr/ws-adapter/node.ts
--- a/src/recorder/index.ts
+++ b/src/recorder/index.ts
@ -1,7 +1,7 @@
 import assert from 'assert';
 import { logDebug, logInfo } from '../logger/index.ts';
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
-import recorders from './recorders/index.ts';
+import recorders from '../recorder/recorders/index.ts';
 import Stream from 'stream';

 export type RecordingOptions = {
--- a/src/utils/convert.ts
+++ b/src/utils/convert.ts
@ -0,0 +1,69 @@
+type EncodeWavOptions = {
+  numChannels?: number;
+  sampleRate?: number;
+  byteRate?: number;
+};
+
+/**
+ * 编码pcm文件为wav文件
+ * @param rawPCM
+ * @param options
+ * @returns
+ */
+export function encodeWav(rawPCM: Buffer | string, options?: EncodeWavOptions) {
+  if (typeof rawPCM === 'string') {
+    rawPCM = Buffer.from(rawPCM, 'binary');
+  }
+
+  if (!Buffer.isBuffer(rawPCM)) {
+    throw new TypeError('pcm data must be Buffer or string');
+  }
+  const opt = options || {};
+  const sampleRate = opt.sampleRate || 16000;
+  const numChannels = opt.numChannels || 1;
+  const byteRate = opt.byteRate || 16;
+
+  const buf = rawPCM;
+  const header = Buffer.alloc(44);
+
+  header.write('RIFF', 0);
+  header.writeUInt32LE(buf.length, 4);
+  header.write('WAVE', 8);
+  header.write('fmt ', 12);
+  header.writeUInt8(16, 16);
+  header.writeUInt8(1, 20);
+  header.writeUInt8(numChannels, 22);
+  header.writeUInt32LE(sampleRate, 24);
+  header.writeUInt32LE(byteRate, 28);
+  header.writeUInt8(4, 32);
+  header.writeUInt8(16, 34);
+  header.write('data', 36);
+  header.writeUInt32LE(buf.length + 44 - 8, 40);
+
+  return Buffer.concat([header, buf]);
+}
+
+/**
+ * 解码wav文件
+ * @param rawWav
+ * @returns
+ */
+export function decodeWav(rawWav: Buffer | string) {
+  if (typeof rawWav === 'string') {
+    rawWav = Buffer.from(rawWav, 'binary');
+  }
+
+  if (!Buffer.isBuffer(rawWav)) {
+    throw new TypeError('pcm data must be Buffer or string');
+  }
+
+  // remove the header of pcm format
+  rawWav = rawWav.subarray(44);
+
+  return rawWav;
+}
+
+export const converter = {
+  encodeWav,
+  decodeWav,
+};
--- a/src/wake/test/build.ts
+++ b/src/wake/test/build.ts
@ -0,0 +1,49 @@
+import vosk from 'vosk';
+import { Recording } from '../../recorder/index.ts';
+import fs from 'fs';
+import path from 'path';
+import { audioPath, sleep } from './common.ts';
+import { encodeWav, decodeWav } from '../../utils/convert.ts';
+// 需要先下载Vosk模型
+// const MODEL_PATH = 'vosk-model-small-en-us-0.15';
+const MODEL_PATH = path.join(process.cwd(), 'models/vosk-model-small-cn-0.22');
+
+async function detectWithVosk(audioFilePath) {
+  if (!fs.existsSync(MODEL_PATH)) {
+    console.error('请先下载Vosk模型');
+    return false;
+  }
+
+  const model = new vosk.Model(MODEL_PATH);
+  const rec = new vosk.Recognizer({ model: model, sampleRate: 16000 });
+
+  // const wakeWords = ['hey computer', 'okay jarvis']; // 自定义唤醒词列表
+  const wakeWords = ['欢迎']; // 自定义唤醒词列表
+  const audioBuffer = fs.readFileSync(audioFilePath);
+  const pcmBuffer = decodeWav(audioBuffer);
+  const result = await rec.acceptWaveformAsync(pcmBuffer);
+  console.log('result', result, rec.result());
+  // return new Promise((resolve) => {
+  //   const pcmBufferLength = Buffer.byteLength(pcmBuffer);
+  //   console.log('pcmBufferLength', pcmBufferLength);
+  //   const bufferLength = 1024 * 8;
+  //   let index = 0;
+  //   for (let i = 0; i < pcmBufferLength; i += bufferLength) {
+  //     const chunk = pcmBuffer.subarray(i, i + bufferLength);
+  //     index++;
+  //     if (rec.acceptWaveform(chunk)) {
+  //       const result = rec.result();
+  //       console.log('=========result', result, index);
+  //       const text = result.text.toLowerCase();
+  //       if (wakeWords.some((word) => text.includes(word))) {
+  //         resolve(true);
+  //       }
+  //     }
+  //   }
+  //   resolve(false);
+  // });
+}
+
+detectWithVosk(audioPath).then((result) => {
+  console.log('result', result);
+});
--- a/src/wake/test/common.ts
+++ b/src/wake/test/common.ts
@ -0,0 +1,12 @@
+import path from 'path';
+import dotenv from 'dotenv';
+
+export const config = dotenv.config({
+  path: path.join(process.cwd(), '.env'),
+}).parsed;
+
+export const audioPath = path.join(process.cwd(), 'videos/asr_example.wav');
+export const audioPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
+export const blankAudioPath = path.join(process.cwd(), 'videos/blank.wav');
+
+export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
--- a/tsup.config.mjs
+++ b/tsup.config.mjs
@ -1,23 +1,41 @@
 import { defineConfig } from 'tsup';
 // import glob from 'fast-glob';
 // const services = glob.sync('src/services/*.ts');
+import fs from 'fs';
+
+const clean = () => {
+  const distDir = 'dist';
+  if (fs.existsSync(distDir)) {
+    fs.rmSync(distDir, { recursive: true, force: true });
+  }
+};
+clean();

 const entrys = ['src/index.ts'];
+const nodeEntrys = ['src/dev.ts'];

-export default defineConfig({
-  entry: entrys,
+const getCommonConfig = (opts = {}) => {
+  return {
+    entry: opts.entry,
    outExtension: ({ format }) => ({
      js: format === 'esm' ? '.mjs' : '.js',
    }),
-  define: {
-    'process.env.IS_BROWSER': JSON.stringify(process.env.BROWSER || false),
-  },
    splitting: false,
    sourcemap: false,
-  clean: true,
+    // clean: true,
    format: 'esm',
    external: ['dotenv'],
    dts: true,
    outDir: 'dist',
    tsconfig: 'tsconfig.json',
-});
+    ...opts,
+    define: {
+      'process.env.IS_BROWSER': JSON.stringify(process.env.BROWSER || false),
+      ...opts.define,
+    },
+  };
+};
+export default defineConfig([
+  // getCommonConfig({ entry: entrys, define: { 'process.env.IS_BROWSER': JSON.stringify(true) } }), // 浏览器
+  getCommonConfig({ entry: nodeEntrys, define: { 'process.env.IS_BROWSER': JSON.stringify(false) } }), // node
+]);
--- a/videos/asr_example2.wav
+++ b/videos/asr_example2.wav
--- a/videos/blank.wav
+++ b/videos/blank.wav