From c2038b84214cd43ea2a255b9e3f43979a4d77601 Mon Sep 17 00:00:00 2001 From: abearxiong Date: Wed, 15 Oct 2025 01:52:33 +0800 Subject: [PATCH] update add ASR MODEL --- package.json | 7 +- pnpm-lock.yaml | 17 +++-- src/aura/asr/index.ts | 18 +++++ src/aura/asr/modules/index.ts | 7 ++ src/aura/config.ts | 6 ++ src/aura/index.ts | 1 + src/aura/libs/auc.ts | 136 ++++++++++++++++++++++++++++++++++ src/route.ts | 1 + src/routes/mark/mark-model.ts | 6 ++ 9 files changed, 190 insertions(+), 9 deletions(-) create mode 100644 src/aura/asr/index.ts create mode 100644 src/aura/asr/modules/index.ts create mode 100644 src/aura/config.ts create mode 100644 src/aura/index.ts create mode 100644 src/aura/libs/auc.ts diff --git a/package.json b/package.json index eb49821..80d599d 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ }, "scripts": { "test": "tsx test/**/*.ts", - "dev": "bun run --watch --hot --inspect src/index.ts", + "dev": "bun run --watch --hot src/index.ts", + "dev:inspect": "bun run --watch --hot --inspect src/index.ts", "cmd": "bun run src/run.ts ", "prebuild": "rimraf dist", "build": "NODE_ENV=production bun bun.config.mjs", @@ -56,7 +57,7 @@ "@kevisual/logger": "^0.0.4", "@kevisual/oss": "0.0.12", "@kevisual/permission": "^0.0.3", - "@kevisual/router": "0.0.26", + "@kevisual/router": "0.0.28", "@kevisual/types": "^0.0.10", "@kevisual/use-config": "^1.0.19", "@types/archiver": "^6.0.3", @@ -107,5 +108,5 @@ "sqlite3" ] }, - "packageManager": "pnpm@10.18.2" + "packageManager": "pnpm@10.18.3" } \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6fa2f53..93933d7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -56,8 +56,8 @@ importers: specifier: ^0.0.3 version: 0.0.3 '@kevisual/router': - specifier: 0.0.26 - version: 0.0.26 + specifier: 0.0.28 + version: 0.0.28 '@kevisual/types': specifier: ^0.0.10 version: 0.0.10 @@ -366,8 +366,8 @@ packages: '@kevisual/router@0.0.23': resolution: {integrity: sha512-W6ehlhAzNe58vq4QeQt2XFoO84Qaw34A0PVOByJsJ2ICj4YKBTclAt+rOAoISCvUeSbeNOIuhUE3sLyPfplzUw==} - '@kevisual/router@0.0.26': - resolution: {integrity: sha512-huAwVp65vqKgx4C2pEBKZ7EVKP1EBJ1Ml1nbfr3PrA6w9GPsqGzycBOTwPab7tEIIC3dSZVsQFCqkUVzCLQMEA==} + '@kevisual/router@0.0.28': + resolution: {integrity: sha512-MqpnRqBRt2TkM9KyDDaz/AjbBFi8L2y2/MwChu28fK6g0OL5fJ45NQQBGNpNrj2rsUVmpCA2wDr2SqjVxE3CLA==} '@kevisual/types@0.0.10': resolution: {integrity: sha512-Q73uzzjk9UidumnmCvOpgzqDDvQxsblz22bIFuoiioUFJWwaparx8bpd8ArRyFojicYL1YJoFDzDZ9j9NN8grA==} @@ -1535,6 +1535,9 @@ packages: resolution: {integrity: sha512-TdrF7fW9Rphjq4RjrW0Kp2AW0Ahwu9sRGTkS6bvDi0SCwZlEZYmcfDbEsTz8RVk0EHIS/Vd1bv3JhG+1xZuAyQ==} engines: {node: '>=16'} + path-to-regexp@8.3.0: + resolution: {integrity: sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==} + pg-cloudflare@1.2.7: resolution: {integrity: sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==} @@ -2409,9 +2412,9 @@ snapshots: transitivePeerDependencies: - supports-color - '@kevisual/router@0.0.26': + '@kevisual/router@0.0.28': dependencies: - path-to-regexp: 8.2.0 + path-to-regexp: 8.3.0 selfsigned: 3.0.1 send: 1.2.0 transitivePeerDependencies: @@ -3717,6 +3720,8 @@ snapshots: path-to-regexp@8.2.0: {} + path-to-regexp@8.3.0: {} + pg-cloudflare@1.2.7: optional: true diff --git a/src/aura/asr/index.ts b/src/aura/asr/index.ts new file mode 100644 index 0000000..1b28399 --- /dev/null +++ b/src/aura/asr/index.ts @@ -0,0 +1,18 @@ +import { app } from '@/app.ts' +import { asr } from './modules/index.ts' +app.route({ + path: 'asr', + key: 'text' +}).define(async (ctx) => { + const base64Audio = ctx.query.base64Audio as string + if (!base64Audio) { + ctx.throw('Missing base64Audio parameter') + } + const result = await asr.getText({ + audio: { + data: base64Audio + } + }) + ctx.body = result +}) + .addTo(app) \ No newline at end of file diff --git a/src/aura/asr/modules/index.ts b/src/aura/asr/modules/index.ts new file mode 100644 index 0000000..d9c896a --- /dev/null +++ b/src/aura/asr/modules/index.ts @@ -0,0 +1,7 @@ +import { Asr } from '../../libs/auc.ts' +import { auraConfig } from '../../config.ts' + +export const asr = new Asr({ + appid: auraConfig.VOLCENGINE_AUC_APPID, + token: auraConfig.VOLCENGINE_AUC_TOKEN, +}) \ No newline at end of file diff --git a/src/aura/config.ts b/src/aura/config.ts new file mode 100644 index 0000000..8fd49c9 --- /dev/null +++ b/src/aura/config.ts @@ -0,0 +1,6 @@ +import { config } from '@/modules/config.ts' +export type AIConfig = { + VOLCENGINE_AUC_APPID: string + VOLCENGINE_AUC_TOKEN: string +} +export const auraConfig: AIConfig = config as unknown as AIConfig; \ No newline at end of file diff --git a/src/aura/index.ts b/src/aura/index.ts new file mode 100644 index 0000000..ab9593e --- /dev/null +++ b/src/aura/index.ts @@ -0,0 +1 @@ +import './asr/index.ts' \ No newline at end of file diff --git a/src/aura/libs/auc.ts b/src/aura/libs/auc.ts new file mode 100644 index 0000000..b6573ef --- /dev/null +++ b/src/aura/libs/auc.ts @@ -0,0 +1,136 @@ +// https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts +import { nanoid } from "nanoid" + +export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash" +export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit' +export const AsrBase = 'volc.bigasr.auc' +export const AsrTurbo = 'volc.bigasr.auc_turbo' + +const uuid = () => nanoid() + +type AsrOptions = { + url?: string + appid?: string + token?: string + type?: AsrType +} + +type AsrType = 'flash' | 'standard' | 'turbo' +export class Asr { + url: string = FlashURL + appid: string = "" + token: string = "" + type: AsrType = 'flash' + constructor(options: AsrOptions = {}) { + this.appid = options.appid || "" + this.token = options.token || "" + this.type = options.type || 'flash' + if (this.type !== 'flash') { + this.url = AsrBaseURL + } + if (!this.appid || !this.token) { + throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set") + } + } + + header() { + const model = this.type === 'flash' ? AsrTurbo : AsrBase + return { + "X-Api-App-Key": this.appid, + "X-Api-Access-Key": this.token, + "X-Api-Resource-Id": model, + "X-Api-Request-Id": uuid(), + "X-Api-Sequence": "-1", + } + } + submit(body: AsrRequest) { + if (!body.audio || (!body.audio.url && !body.audio.data)) { + throw new Error("audio.url or audio.data is required") + } + const data: AsrRequest = { + ...body, + } + return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) }) + } + async getText(body: AsrRequest) { + const res = await this.submit(body) + return res.json() + } +} + +export type AsrResponse = { + audio_info: { + /** + * 音频时长,单位为 ms + */ + duration: number; + }; + result: { + additions: { + duration: string; + }; + text: string; + utterances: Array<{ + end_time: number; + start_time: number; + text: string; + words: Array<{ + confidence: number; + end_time: number; + start_time: number; + text: string; + }>; + }>; + }; +} +export interface AsrRequest { + user?: { + uid: string; + }; + audio: { + url?: string; + data?: string; + format?: 'wav' | 'pcm' | 'mp3' | 'ogg'; + codec?: 'raw' | 'opus'; // raw / opus,默认为 raw(pcm) 。 + rate?: 8000 | 16000; // 采样率,支持 8000 或 16000,默认为 16000 。 + channel?: 1 | 2; // 声道数,支持 1 或 2,默认为 1。 + }; + + + request?: { + model_name?: string; // 识别模型名称,如 "bigmodel" + enable_words?: boolean; // 是否开启词级别时间戳,默认为 false。 + enable_sentence_info?: boolean; // 是否开启句子级别时间戳,默认为 false。 + enable_utterance_info?: boolean; // 是否开启语句级别时间戳,默认为 true。 + enable_punctuation_prediction?: boolean; // 是否开启标点符号预测,默认为 true。 + enable_inverse_text_normalization?: boolean; // 是否开启文本规范化,默认为 true。 + enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别,默认为 false。 + audio_channel_count?: 1 | 2; // 音频声道数,仅在 enable_separate_recognition_per_channel 开启时有效,支持 1 或 2,默认为 1。 + max_sentence_silence?: number; // 句子最大静音时间,仅在 enable_sentence_info 开启时有效,单位为 ms,默认为 800。 + custom_words?: string[]; + enable_channel_split?: boolean; // 是否开启声道分离 + enable_ddc?: boolean; // 是否开启 DDC(双通道降噪) + enable_speaker_info?: boolean; // 是否开启说话人分离 + enable_punc?: boolean; // 是否开启标点符号预测(简写) + enable_itn?: boolean; // 是否开启文本规范化(简写) + vad_segment?: boolean; // 是否开启 VAD 断句 + show_utterances?: boolean; // 是否返回语句级别结果 + corpus?: { + boosting_table_name?: string; + correct_table_name?: string; + context?: string; + }; + }; +} + +// const main = async () => { +// const base64Audio = wavToBase64(audioPath); +// const auc = new Asr({ +// appid: config.VOLCENGINE_AUC_APPID, +// token: config.VOLCENGINE_AUC_TOKEN, +// }); +// const result = await auc.getText({ audio: { data: base64Audio } }); +// console.log(util.inspect(result, { showHidden: false, depth: null, colors: true })) +// } + +// main(); \ No newline at end of file diff --git a/src/route.ts b/src/route.ts index fc200a0..d1cfecd 100644 --- a/src/route.ts +++ b/src/route.ts @@ -1,4 +1,5 @@ import './routes/index.ts'; +import './aura/index.ts'; import { app } from './app.ts'; import type { App } from '@kevisual/router'; import { User } from './models/user.ts'; diff --git a/src/routes/mark/mark-model.ts b/src/routes/mark/mark-model.ts index 73578fc..e737479 100644 --- a/src/routes/mark/mark-model.ts +++ b/src/routes/mark/mark-model.ts @@ -62,6 +62,7 @@ export class MarkModel extends Model { declare fileList: MarkFile[]; // 文件管理 declare uname: string; // 用户的名称, 或者着别名 + declare markedAt: Date; // 标记时间 declare createdAt: Date; declare updatedAt: Date; declare version: number; @@ -290,6 +291,11 @@ export const MarkMInit = async (opts: MarkInitOpts, sync?: Opts) => type: DataTypes.INTEGER, // 更新刷新版本,多人协作 defaultValue: 1, }, + markedAt: { + type: DataTypes.DATE, + allowNull: true, + comment: '标记时间', + }, uid: { type: DataTypes.UUID, allowNull: true,