video-tools/src/asr/provider/funasr/ws.ts

// import WebSocket from 'ws';
import { initWs } from '../../../ws-adapter/index.ts';
import { logger } from '@/logger/index.ts';
import { WSServer, WSSOptions } from '../../provider/ws-server.ts';

export type VideoWSOptions = {
  url?: string;
  ws?: WebSocket;
  itn?: boolean;
  mode?: VideoWsMode;
  isFile?: boolean;
  onConnect?: () => void;
  wav_format?: string;
} & {
  wsOptions?: WSSOptions['wsOptions'];
};
export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number];
type OpenRequest = {
  // 语音分片大小(单位: 毫秒):
  chunk_size: number[];
  // 音频文件名:
  wav_name: string;
  // 是否正在说话:
  is_speaking: boolean;
  // 分片间隔(单位: 毫秒):
  chunk_interval: number;
  // 逆文本标准化(ITN):
  itn: boolean;
  // 模式:
  // '2pass' - 双通道模式, 'online' - 在线模式, 'offline' - 离线模式
  mode: VideoWsMode;
  // 音频格式:
  wav_format?: string;
  // 音频采样率(单位: Hz):
  audio_fs?: number;
  // 热词列表:
  hotwords?: string;
};
export type VideoWsResult = {
  isFinal: boolean;
  mode: VideoWsMode;
  stamp_sents: { end: number; punc: string; start: number; text_seg: string; tsList: [][] }[];
  text: string;
  timestamp: string;
  wav_name: string;
};

export class VideoWS extends WSServer {
  itn?: boolean;
  mode?: VideoWsMode;
  wav_format?: string;
  constructor(options?: VideoWSOptions) {
    super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect, wsOptions: options?.wsOptions });
    this.itn = options?.itn || false;
    this.itn = options?.itn || false;
    this.mode = options?.mode || 'online';
    this.wav_format = options?.wav_format;
  }

  async start(opts?: Partial<OpenRequest>) {
    const chunk_size = new Array(5, 10, 5);

    const request: OpenRequest = {
      chunk_size: chunk_size,
      wav_name: 'h5', //
      is_speaking: true,
      chunk_interval: 10,
      itn: this.itn,
      mode: this.mode || 'online',
      ...opts,
    };
    const file_sample_rate = 16000;
    request.wav_format = request.wav_format || this.wav_format || 'wav';
    if ('wav' == request.wav_format) {
      request.wav_format = 'PCM';
      request.audio_fs = file_sample_rate;
    }
    console.log('request', request);

    this.ws.send(JSON.stringify(request));
  }
  async stop() {
    var chunk_size = new Array(5, 10, 5);
    var request = {
      chunk_size: chunk_size,
      wav_name: 'h5',
      is_speaking: false,
      chunk_interval: 10,
      mode: this.mode,
    };
    this.ws.send(JSON.stringify(request));
  }
  async send(data: any) {
    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
      this.ws.send(data);
    }
  }
  async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string }) {
    const { wav_format = 'wav' } = opts || {};
    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
      let sampleBuf = new Uint8Array(data);
      const ws = this;
      var chunk_size = 960; // for asr chunk_size [5, 10, 5]
      let totalsend = 0;
      let len = 0;
      ws.start({ wav_format });
      while (sampleBuf.length >= chunk_size) {
        const sendBuf = sampleBuf.slice(0, chunk_size);
        totalsend = totalsend + sampleBuf.length;
        sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
        await new Promise((resolve) => setTimeout(resolve, 10));
        ws.send(sendBuf);
        len++;
      }
      ws.stop();
    }
  }
  async onMessage(event: MessageEvent) {
    super.onMessage(event);
    const data = event.data;
    try {
      const result = JSON.parse(data.toString());
      console.log('result', result);
    } catch (error) {
      console.log('error', error);
    }
  }
  async onError(event: Event) {
    console.log('onError', event);
  }
  async onClose(event: CloseEvent) {
    console.log('onClose', event);
  }
}