update add ASR MODEL

2025-10-15 01:52:33 +08:00
parent daf0b6b31d
commit c2038b8421
9 changed files with 190 additions and 9 deletions
--- a/src/aura/asr/index.ts
+++ b/src/aura/asr/index.ts
@@ -0,0 +1,18 @@
+import { app } from '@/app.ts'
+import { asr } from './modules/index.ts'
+app.route({
+  path: 'asr',
+  key: 'text'
+}).define(async (ctx) => {
+  const base64Audio = ctx.query.base64Audio as string
+  if (!base64Audio) {
+    ctx.throw('Missing base64Audio parameter')
+  }
+  const result = await asr.getText({
+    audio: {
+      data: base64Audio
+    }
+  })
+  ctx.body = result
+})
+  .addTo(app)
--- a/src/aura/asr/modules/index.ts
+++ b/src/aura/asr/modules/index.ts
@@ -0,0 +1,7 @@
+import { Asr } from '../../libs/auc.ts'
+import { auraConfig } from '../../config.ts'
+
+export const asr = new Asr({
+  appid: auraConfig.VOLCENGINE_AUC_APPID,
+  token: auraConfig.VOLCENGINE_AUC_TOKEN,
+})
--- a/src/aura/config.ts
+++ b/src/aura/config.ts
@@ -0,0 +1,6 @@
+import { config } from '@/modules/config.ts'
+export type AIConfig = {
+  VOLCENGINE_AUC_APPID: string
+  VOLCENGINE_AUC_TOKEN: string
+}
+export const auraConfig: AIConfig = config as unknown as AIConfig;
--- a/src/aura/index.ts
+++ b/src/aura/index.ts
@@ -0,0 +1 @@
+import './asr/index.ts'
--- a/src/aura/libs/auc.ts
+++ b/src/aura/libs/auc.ts
@@ -0,0 +1,136 @@
+// https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts
+import { nanoid } from "nanoid"
+
+export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
+export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
+export const AsrBase = 'volc.bigasr.auc'
+export const AsrTurbo = 'volc.bigasr.auc_turbo'
+
+const uuid = () => nanoid()
+
+type AsrOptions = {
+  url?: string
+  appid?: string
+  token?: string
+  type?: AsrType
+}
+
+type AsrType = 'flash' | 'standard' | 'turbo'
+export class Asr {
+  url: string = FlashURL
+  appid: string = ""
+  token: string = ""
+  type: AsrType = 'flash'
+  constructor(options: AsrOptions = {}) {
+    this.appid = options.appid || ""
+    this.token = options.token || ""
+    this.type = options.type || 'flash'
+    if (this.type !== 'flash') {
+      this.url = AsrBaseURL
+    }
+    if (!this.appid || !this.token) {
+      throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
+    }
+  }
+
+  header() {
+    const model = this.type === 'flash' ? AsrTurbo : AsrBase
+    return {
+      "X-Api-App-Key": this.appid,
+      "X-Api-Access-Key": this.token,
+      "X-Api-Resource-Id": model,
+      "X-Api-Request-Id": uuid(),
+      "X-Api-Sequence": "-1",
+    }
+  }
+  submit(body: AsrRequest) {
+    if (!body.audio || (!body.audio.url && !body.audio.data)) {
+      throw new Error("audio.url or audio.data is required")
+    }
+    const data: AsrRequest = {
+      ...body,
+    }
+    return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
+  }
+  async getText(body: AsrRequest) {
+    const res = await this.submit(body)
+    return res.json()
+  }
+}
+
+export type AsrResponse = {
+  audio_info: {
+    /**
+     * 音频时长，单位为 ms
+     */
+    duration: number;
+  };
+  result: {
+    additions: {
+      duration: string;
+    };
+    text: string;
+    utterances: Array<{
+      end_time: number;
+      start_time: number;
+      text: string;
+      words: Array<{
+        confidence: number;
+        end_time: number;
+        start_time: number;
+        text: string;
+      }>;
+    }>;
+  };
+}
+export interface AsrRequest {
+  user?: {
+    uid: string;
+  };
+  audio: {
+    url?: string;
+    data?: string;
+    format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
+    codec?: 'raw' | 'opus'; 	// raw / opus，默认为 raw(pcm) 。
+    rate?: 8000 | 16000; // 采样率，支持 8000 或 16000，默认为 16000 。
+    channel?: 1 | 2; // 声道数，支持 1 或 2，默认为 1。
+  };
+
+
+  request?: {
+    model_name?: string; // 识别模型名称，如 "bigmodel"
+    enable_words?: boolean; // 是否开启词级别时间戳，默认为 false。
+    enable_sentence_info?: boolean; // 是否开启句子级别时间戳，默认为 false。
+    enable_utterance_info?: boolean; // 是否开启语句级别时间戳，默认为 true。
+    enable_punctuation_prediction?: boolean; // 是否开启标点符号预测，默认为 true。
+    enable_inverse_text_normalization?: boolean; // 是否开启文本规范化，默认为 true。
+    enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别，默认为 false。
+    audio_channel_count?: 1 | 2; // 音频声道数，仅在 enable_separate_recognition_per_channel 开启时有效，支持 1 或 2，默认为 1。
+    max_sentence_silence?: number; // 句子最大静音时间，仅在 enable_sentence_info 开启时有效，单位为 ms，默认为 800。
+    custom_words?: string[];
+    enable_channel_split?: boolean; // 是否开启声道分离
+    enable_ddc?: boolean; // 是否开启 DDC（双通道降噪）
+    enable_speaker_info?: boolean; // 是否开启说话人分离
+    enable_punc?: boolean; // 是否开启标点符号预测（简写）
+    enable_itn?: boolean; // 是否开启文本规范化（简写）
+    vad_segment?: boolean; // 是否开启 VAD 断句
+    show_utterances?: boolean; // 是否返回语句级别结果
+    corpus?: {
+      boosting_table_name?: string;
+      correct_table_name?: string;
+      context?: string;
+    };
+  };
+}
+
+// const main = async () => {
+//   const base64Audio = wavToBase64(audioPath);
+//   const auc = new Asr({
+//     appid: config.VOLCENGINE_AUC_APPID,
+//     token: config.VOLCENGINE_AUC_TOKEN,
+//   });
+//   const result = await auc.getText({ audio: { data: base64Audio } });
+//   console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
+// }
+
+// main();
--- a/src/route.ts
+++ b/src/route.ts
@@ -1,4 +1,5 @@
 import './routes/index.ts';
+import './aura/index.ts';
 import { app } from './app.ts';
 import type { App } from '@kevisual/router';
 import { User } from './models/user.ts';
--- a/src/routes/mark/mark-model.ts
+++ b/src/routes/mark/mark-model.ts
@@ -62,6 +62,7 @@ export class MarkModel extends Model {
  declare fileList: MarkFile[]; // 文件管理
  declare uname: string; // 用户的名称, 或者着别名

+  declare markedAt: Date; // 标记时间
  declare createdAt: Date;
  declare updatedAt: Date;
  declare version: number;
@@ -290,6 +291,11 @@ export const MarkMInit = async <T = any>(opts: MarkInitOpts<T>, sync?: Opts) =>
      type: DataTypes.INTEGER, // 更新刷新版本，多人协作
      defaultValue: 1,
    },
+    markedAt: {
+      type: DataTypes.DATE,
+      allowNull: true,
+      comment: '标记时间',
+    },
    uid: {
      type: DataTypes.UUID,
      allowNull: true,