update add ASR MODEL

This commit is contained in:
2025-10-15 01:52:33 +08:00
parent daf0b6b31d
commit c2038b8421
9 changed files with 190 additions and 9 deletions

18
src/aura/asr/index.ts Normal file
View File

@@ -0,0 +1,18 @@
import { app } from '@/app.ts'
import { asr } from './modules/index.ts'
app.route({
path: 'asr',
key: 'text'
}).define(async (ctx) => {
const base64Audio = ctx.query.base64Audio as string
if (!base64Audio) {
ctx.throw('Missing base64Audio parameter')
}
const result = await asr.getText({
audio: {
data: base64Audio
}
})
ctx.body = result
})
.addTo(app)

View File

@@ -0,0 +1,7 @@
import { Asr } from '../../libs/auc.ts'
import { auraConfig } from '../../config.ts'
export const asr = new Asr({
appid: auraConfig.VOLCENGINE_AUC_APPID,
token: auraConfig.VOLCENGINE_AUC_TOKEN,
})

6
src/aura/config.ts Normal file
View File

@@ -0,0 +1,6 @@
import { config } from '@/modules/config.ts'
export type AIConfig = {
VOLCENGINE_AUC_APPID: string
VOLCENGINE_AUC_TOKEN: string
}
export const auraConfig: AIConfig = config as unknown as AIConfig;

1
src/aura/index.ts Normal file
View File

@@ -0,0 +1 @@
import './asr/index.ts'

136
src/aura/libs/auc.ts Normal file
View File

@@ -0,0 +1,136 @@
// https://git.xiongxiao.me/kevisual/video-tools/raw/branch/main/src/asr/provider/volcengine/auc.ts
import { nanoid } from "nanoid"
export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
export const AsrBase = 'volc.bigasr.auc'
export const AsrTurbo = 'volc.bigasr.auc_turbo'
const uuid = () => nanoid()
type AsrOptions = {
url?: string
appid?: string
token?: string
type?: AsrType
}
type AsrType = 'flash' | 'standard' | 'turbo'
export class Asr {
url: string = FlashURL
appid: string = ""
token: string = ""
type: AsrType = 'flash'
constructor(options: AsrOptions = {}) {
this.appid = options.appid || ""
this.token = options.token || ""
this.type = options.type || 'flash'
if (this.type !== 'flash') {
this.url = AsrBaseURL
}
if (!this.appid || !this.token) {
throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
}
}
header() {
const model = this.type === 'flash' ? AsrTurbo : AsrBase
return {
"X-Api-App-Key": this.appid,
"X-Api-Access-Key": this.token,
"X-Api-Resource-Id": model,
"X-Api-Request-Id": uuid(),
"X-Api-Sequence": "-1",
}
}
submit(body: AsrRequest) {
if (!body.audio || (!body.audio.url && !body.audio.data)) {
throw new Error("audio.url or audio.data is required")
}
const data: AsrRequest = {
...body,
}
return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
}
async getText(body: AsrRequest) {
const res = await this.submit(body)
return res.json()
}
}
export type AsrResponse = {
audio_info: {
/**
* 音频时长,单位为 ms
*/
duration: number;
};
result: {
additions: {
duration: string;
};
text: string;
utterances: Array<{
end_time: number;
start_time: number;
text: string;
words: Array<{
confidence: number;
end_time: number;
start_time: number;
text: string;
}>;
}>;
};
}
export interface AsrRequest {
user?: {
uid: string;
};
audio: {
url?: string;
data?: string;
format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
codec?: 'raw' | 'opus'; // raw / opus默认为 raw(pcm) 。
rate?: 8000 | 16000; // 采样率,支持 8000 或 16000默认为 16000 。
channel?: 1 | 2; // 声道数,支持 1 或 2默认为 1。
};
request?: {
model_name?: string; // 识别模型名称,如 "bigmodel"
enable_words?: boolean; // 是否开启词级别时间戳,默认为 false。
enable_sentence_info?: boolean; // 是否开启句子级别时间戳,默认为 false。
enable_utterance_info?: boolean; // 是否开启语句级别时间戳,默认为 true。
enable_punctuation_prediction?: boolean; // 是否开启标点符号预测,默认为 true。
enable_inverse_text_normalization?: boolean; // 是否开启文本规范化,默认为 true。
enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别,默认为 false。
audio_channel_count?: 1 | 2; // 音频声道数,仅在 enable_separate_recognition_per_channel 开启时有效,支持 1 或 2默认为 1。
max_sentence_silence?: number; // 句子最大静音时间,仅在 enable_sentence_info 开启时有效,单位为 ms默认为 800。
custom_words?: string[];
enable_channel_split?: boolean; // 是否开启声道分离
enable_ddc?: boolean; // 是否开启 DDC双通道降噪
enable_speaker_info?: boolean; // 是否开启说话人分离
enable_punc?: boolean; // 是否开启标点符号预测(简写)
enable_itn?: boolean; // 是否开启文本规范化(简写)
vad_segment?: boolean; // 是否开启 VAD 断句
show_utterances?: boolean; // 是否返回语句级别结果
corpus?: {
boosting_table_name?: string;
correct_table_name?: string;
context?: string;
};
};
}
// const main = async () => {
// const base64Audio = wavToBase64(audioPath);
// const auc = new Asr({
// appid: config.VOLCENGINE_AUC_APPID,
// token: config.VOLCENGINE_AUC_TOKEN,
// });
// const result = await auc.getText({ audio: { data: base64Audio } });
// console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
// }
// main();

View File

@@ -1,4 +1,5 @@
import './routes/index.ts';
import './aura/index.ts';
import { app } from './app.ts';
import type { App } from '@kevisual/router';
import { User } from './models/user.ts';

View File

@@ -62,6 +62,7 @@ export class MarkModel extends Model {
declare fileList: MarkFile[]; // 文件管理
declare uname: string; // 用户的名称, 或者着别名
declare markedAt: Date; // 标记时间
declare createdAt: Date;
declare updatedAt: Date;
declare version: number;
@@ -290,6 +291,11 @@ export const MarkMInit = async <T = any>(opts: MarkInitOpts<T>, sync?: Opts) =>
type: DataTypes.INTEGER, // 更新刷新版本,多人协作
defaultValue: 1,
},
markedAt: {
type: DataTypes.DATE,
allowNull: true,
comment: '标记时间',
},
uid: {
type: DataTypes.UUID,
allowNull: true,