feat: 更新语音转文字功能,支持从音频链接获取音频数据并转换为base64;添加错误处理和权限验证

This commit is contained in:
2026-03-12 02:45:46 +08:00
parent 99141a926e
commit 10874917f2
2 changed files with 103 additions and 13 deletions

View File

@@ -1,20 +1,109 @@
import { app } from '@/app.ts' import { app, oss } from '@/app.ts'
import { asr } from './modules/index.ts' import { asr } from './modules/index.ts'
import z from 'zod'
import { baseURL } from '@/modules/domain.ts'
import { getObjectByPathname } from '@/modules/fm-manager/index.ts'
export const createAsr = async (opts: { base64Data: string }) => {
const { base64Data } = opts
const result = await asr.getText({
audio: {
data: base64Data,
format: 'wav' as any,
rate: 16000,
channel: 1
},
request: {
enable_words: true,
enable_sentence_info: true,
enable_utterance_info: true,
enable_punctuation_prediction: true,
enable_inverse_text_normalization: true
}
})
return {
text: result.result?.text || '',
result
};
}
app.route({ app.route({
path: 'asr', path: 'asr',
key: 'text', key: 'text',
middleware: ['auth'], middleware: ['auth'],
description: '语音转文字将base64的音频数据转换为文字, 参数: base64Audio 为base64编码的音频数据', description: '语音转文字将base64的音频数据转换为文字, 参数: base64Data 为base64编码的音频数据',
metadata: {
args: {
base64Data: z.string().describe('base64编码的音频数据').nonempty('base64Data参数不能为空'),
}
}
}).define(async (ctx) => { }).define(async (ctx) => {
const base64Audio = ctx.query.base64Audio as string const base64Data = ctx.query.base64Data as string
if (!base64Audio) { if (!base64Data) {
ctx.throw('Missing base64Audio parameter') ctx.throw(400, 'base64Data参数不能为空')
} }
const result = await asr.getText({ const result = await createAsr({ base64Data })
audio: { ctx.body = {
data: base64Audio text: result.text
} }
})
ctx.body = result
}) })
.addTo(app) .addTo(app)
app.route({
path: 'asr',
key: 'link',
middleware: ['auth'],
description: '语音转文字,将音频链接的音频数据转换为文字, 参数: url 为音频链接',
metadata: {
args: {
url: z.string().describe('音频链接').nonempty('url参数不能为空'),
}
}
}).define(async (ctx) => {
const tokenUser = ctx.state.tokenUser;
const url = ctx.query.url as string
if (!url) {
ctx.throw(400, 'url参数不能为空')
}
let base64Data: string = null;
if (url.startsWith(baseURL) || url.startsWith('/')) {
const pathname = new URL(url, baseURL).pathname;
const [username] = pathname.split('/').filter(Boolean)
if (username !== tokenUser.username) {
ctx.throw(403, '没有权限访问该音频链接')
}
let data: Awaited<ReturnType<typeof oss.getObject>>;
try {
console.log('fetch audio from minio with objectName', pathname.slice(1))
const objectName = getObjectByPathname({ pathname })
data = await oss.getObject(objectName.objectName)
} catch (e: any) {
if (e?.name === 'NoSuchKey' || e?.$metadata?.httpStatusCode === 404) {
ctx.throw(404, '音频文件不存在')
}
}
if (!data.Body) {
ctx.throw(404, '音频文件内容为空')
}
const bytes = await data.Body.transformToByteArray()
base64Data = Buffer.from(bytes).toString('base64')
} else if (url.startsWith('http')) {
base64Data = await fetchAudioAsBase64(url)
} else {
ctx.throw(400, 'url参数必须是有效的链接')
}
// 这里需要将音频链接转换为base64数据可以使用fetch获取音频数据并转换为base64
const result = await createAsr({ base64Data })
ctx.body = {
text: result.text
}
})
.addTo(app)
const fetchAudioAsBase64 = async (url: string): Promise<string> => {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch audio from URL: ${response.statusText}`);
}
const arrayBuffer = await response.arrayBuffer();
const base64String = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
return base64String;
};

View File

@@ -4,4 +4,5 @@ import { auraConfig } from '../../config.ts'
export const asr = new Asr({ export const asr = new Asr({
appid: auraConfig.VOLCENGINE_AUC_APPID, appid: auraConfig.VOLCENGINE_AUC_APPID,
token: auraConfig.VOLCENGINE_AUC_TOKEN, token: auraConfig.VOLCENGINE_AUC_TOKEN,
type: 'flash'
}) })