generated from tailored/router-template
update
This commit is contained in:
1324
pnpm-lock.yaml
generated
1324
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
123
src/asr/provider/volcengine/auc.ts
Normal file
123
src/asr/provider/volcengine/auc.ts
Normal file
@@ -0,0 +1,123 @@
|
||||
import crypto from "node:crypto"
|
||||
|
||||
export const FlashURL = "https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash"
|
||||
export const AsrBaseURL = 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit'
|
||||
export const AsrBase = 'volc.bigasr.auc'
|
||||
export const AsrTurbo = 'volc.bigasr.auc_turbo'
|
||||
|
||||
const uuid = () => crypto.randomUUID()
|
||||
|
||||
type AsrOptions = {
|
||||
url?: string
|
||||
appid?: string
|
||||
token?: string
|
||||
type?: AsrType
|
||||
}
|
||||
|
||||
type AsrType = 'flash' | 'standard' | 'turbo'
|
||||
export class Asr {
|
||||
url: string = FlashURL
|
||||
appid: string = ""
|
||||
token: string = ""
|
||||
type: AsrType = 'flash'
|
||||
constructor(options: AsrOptions = {}) {
|
||||
this.appid = options.appid || ""
|
||||
this.token = options.token || ""
|
||||
this.type = options.type || 'flash'
|
||||
if (this.type !== 'flash') {
|
||||
this.url = AsrBaseURL
|
||||
}
|
||||
if (!this.appid || !this.token) {
|
||||
throw new Error("VOLCENGINE_Asr_APPID or VOLCENGINE_Asr_TOKEN is not set")
|
||||
}
|
||||
}
|
||||
|
||||
header() {
|
||||
const model = this.type === 'flash' ? AsrTurbo : AsrBase
|
||||
return {
|
||||
"X-Api-App-Key": this.appid,
|
||||
"X-Api-Access-Key": this.token,
|
||||
"X-Api-Resource-Id": model,
|
||||
"X-Api-Request-Id": uuid(),
|
||||
"X-Api-Sequence": "-1",
|
||||
}
|
||||
}
|
||||
submit(body: AsrRequest) {
|
||||
if (!body.audio || (!body.audio.url && !body.audio.data)) {
|
||||
throw new Error("audio.url or audio.data is required")
|
||||
}
|
||||
const data: AsrRequest = {
|
||||
...body,
|
||||
}
|
||||
return fetch(this.url, { method: "POST", headers: this.header(), body: JSON.stringify(data) })
|
||||
}
|
||||
async getText(body: AsrRequest) {
|
||||
const res = await this.submit(body)
|
||||
return res.json()
|
||||
}
|
||||
}
|
||||
|
||||
export type AsrResponse = {
|
||||
audio_info: {
|
||||
/**
|
||||
* 音频时长,单位为 ms
|
||||
*/
|
||||
duration: number;
|
||||
};
|
||||
result: {
|
||||
additions: {
|
||||
duration: string;
|
||||
};
|
||||
text: string;
|
||||
utterances: Array<{
|
||||
end_time: number;
|
||||
start_time: number;
|
||||
text: string;
|
||||
words: Array<{
|
||||
confidence: number;
|
||||
end_time: number;
|
||||
start_time: number;
|
||||
text: string;
|
||||
}>;
|
||||
}>;
|
||||
};
|
||||
}
|
||||
export interface AsrRequest {
|
||||
user?: {
|
||||
uid: string;
|
||||
};
|
||||
audio: {
|
||||
url?: string;
|
||||
data?: string;
|
||||
format?: 'wav' | 'pcm' | 'mp3' | 'ogg';
|
||||
codec?: 'raw' | 'opus'; // raw / opus,默认为 raw(pcm) 。
|
||||
rate?: 8000 | 16000; // 采样率,支持 8000 或 16000,默认为 16000 。
|
||||
channel?: 1 | 2; // 声道数,支持 1 或 2,默认为 1。
|
||||
};
|
||||
|
||||
|
||||
request?: {
|
||||
model_name?: string; // 识别模型名称,如 "bigmodel"
|
||||
enable_words?: boolean; // 是否开启词级别时间戳,默认为 false。
|
||||
enable_sentence_info?: boolean; // 是否开启句子级别时间戳,默认为 false。
|
||||
enable_utterance_info?: boolean; // 是否开启语句级别时间戳,默认为 true。
|
||||
enable_punctuation_prediction?: boolean; // 是否开启标点符号预测,默认为 true。
|
||||
enable_inverse_text_normalization?: boolean; // 是否开启文本规范化,默认为 true。
|
||||
enable_separate_recognition_per_channel?: boolean; // 是否开启声道分离识别,默认为 false。
|
||||
audio_channel_count?: 1 | 2; // 音频声道数,仅在 enable_separate_recognition_per_channel 开启时有效,支持 1 或 2,默认为 1。
|
||||
max_sentence_silence?: number; // 句子最大静音时间,仅在 enable_sentence_info 开启时有效,单位为 ms,默认为 800。
|
||||
custom_words?: string[];
|
||||
enable_channel_split?: boolean; // 是否开启声道分离
|
||||
enable_ddc?: boolean; // 是否开启 DDC(双通道降噪)
|
||||
enable_speaker_info?: boolean; // 是否开启说话人分离
|
||||
enable_punc?: boolean; // 是否开启标点符号预测(简写)
|
||||
enable_itn?: boolean; // 是否开启文本规范化(简写)
|
||||
vad_segment?: boolean; // 是否开启 VAD 断句
|
||||
show_utterances?: boolean; // 是否返回语句级别结果
|
||||
corpus?: {
|
||||
boosting_table_name?: string;
|
||||
correct_table_name?: string;
|
||||
context?: string;
|
||||
};
|
||||
};
|
||||
}
|
||||
21
src/asr/provider/volcengine/test/auc.ts
Normal file
21
src/asr/provider/volcengine/test/auc.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
import { audioPath, config, sleep } from './common.ts';
|
||||
|
||||
import { Asr } from '../auc.ts';
|
||||
import fs from 'fs';
|
||||
import util from 'node:util';
|
||||
const wavToBase64 = (filePath: string) => {
|
||||
const data = fs.readFileSync(filePath);
|
||||
return data.toString('base64');
|
||||
};
|
||||
|
||||
const main = async () => {
|
||||
const base64Audio = wavToBase64(audioPath);
|
||||
const auc = new Asr({
|
||||
appid: config.VOLCENGINE_AUC_APPID,
|
||||
token: config.VOLCENGINE_AUC_TOKEN,
|
||||
});
|
||||
const result = await auc.getText({ audio: { data: base64Audio } });
|
||||
console.log(util.inspect(result, { showHidden: false, depth: null, colors: true }))
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user