From 10874917f2d58939c8a617826b3e970b1653baf5 Mon Sep 17 00:00:00 2001 From: abearxiong Date: Thu, 12 Mar 2026 02:45:46 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=9B=B4=E6=96=B0=E8=AF=AD=E9=9F=B3?= =?UTF-8?q?=E8=BD=AC=E6=96=87=E5=AD=97=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=AF?= =?UTF-8?q?=E6=8C=81=E4=BB=8E=E9=9F=B3=E9=A2=91=E9=93=BE=E6=8E=A5=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E9=9F=B3=E9=A2=91=E6=95=B0=E6=8D=AE=E5=B9=B6=E8=BD=AC?= =?UTF-8?q?=E6=8D=A2=E4=B8=BAbase64=EF=BC=9B=E6=B7=BB=E5=8A=A0=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E5=A4=84=E7=90=86=E5=92=8C=E6=9D=83=E9=99=90=E9=AA=8C?= =?UTF-8?q?=E8=AF=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/aura/asr/index.ts | 115 ++++++++++++++++++++++++++++++---- src/aura/asr/modules/index.ts | 1 + 2 files changed, 103 insertions(+), 13 deletions(-) diff --git a/src/aura/asr/index.ts b/src/aura/asr/index.ts index 9557369..e81ebe3 100644 --- a/src/aura/asr/index.ts +++ b/src/aura/asr/index.ts @@ -1,20 +1,109 @@ -import { app } from '@/app.ts' +import { app, oss } from '@/app.ts' import { asr } from './modules/index.ts' +import z from 'zod' +import { baseURL } from '@/modules/domain.ts' +import { getObjectByPathname } from '@/modules/fm-manager/index.ts' +export const createAsr = async (opts: { base64Data: string }) => { + const { base64Data } = opts + const result = await asr.getText({ + audio: { + data: base64Data, + format: 'wav' as any, + rate: 16000, + channel: 1 + }, + request: { + enable_words: true, + enable_sentence_info: true, + enable_utterance_info: true, + enable_punctuation_prediction: true, + enable_inverse_text_normalization: true + } + }) + return { + text: result.result?.text || '', + result + }; +} app.route({ path: 'asr', key: 'text', middleware: ['auth'], - description: '语音转文字,将base64的音频数据转换为文字, 参数: base64Audio, 为base64编码的音频数据', -}).define(async (ctx) => { - const base64Audio = ctx.query.base64Audio as string - if (!base64Audio) { - ctx.throw('Missing base64Audio parameter') - } - const result = await asr.getText({ - audio: { - data: base64Audio + description: '语音转文字,将base64的音频数据转换为文字, 参数: base64Data 为base64编码的音频数据', + metadata: { + args: { + base64Data: z.string().describe('base64编码的音频数据').nonempty('base64Data参数不能为空'), } - }) - ctx.body = result + } +}).define(async (ctx) => { + const base64Data = ctx.query.base64Data as string + if (!base64Data) { + ctx.throw(400, 'base64Data参数不能为空') + } + const result = await createAsr({ base64Data }) + ctx.body = { + text: result.text + } }) - .addTo(app) \ No newline at end of file + .addTo(app) + +app.route({ + path: 'asr', + key: 'link', + middleware: ['auth'], + description: '语音转文字,将音频链接的音频数据转换为文字, 参数: url 为音频链接', + metadata: { + args: { + url: z.string().describe('音频链接').nonempty('url参数不能为空'), + } + } +}).define(async (ctx) => { + const tokenUser = ctx.state.tokenUser; + const url = ctx.query.url as string + if (!url) { + ctx.throw(400, 'url参数不能为空') + } + let base64Data: string = null; + if (url.startsWith(baseURL) || url.startsWith('/')) { + const pathname = new URL(url, baseURL).pathname; + const [username] = pathname.split('/').filter(Boolean) + if (username !== tokenUser.username) { + ctx.throw(403, '没有权限访问该音频链接') + } + let data: Awaited>; + try { + console.log('fetch audio from minio with objectName', pathname.slice(1)) + const objectName = getObjectByPathname({ pathname }) + data = await oss.getObject(objectName.objectName) + } catch (e: any) { + if (e?.name === 'NoSuchKey' || e?.$metadata?.httpStatusCode === 404) { + ctx.throw(404, '音频文件不存在') + } + } + if (!data.Body) { + ctx.throw(404, '音频文件内容为空') + } + const bytes = await data.Body.transformToByteArray() + base64Data = Buffer.from(bytes).toString('base64') + } else if (url.startsWith('http')) { + base64Data = await fetchAudioAsBase64(url) + } else { + ctx.throw(400, 'url参数必须是有效的链接') + } + // 这里需要将音频链接转换为base64数据,可以使用fetch获取音频数据并转换为base64 + const result = await createAsr({ base64Data }) + ctx.body = { + text: result.text + } +}) + .addTo(app) + +const fetchAudioAsBase64 = async (url: string): Promise => { + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to fetch audio from URL: ${response.statusText}`); + } + const arrayBuffer = await response.arrayBuffer(); + const base64String = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer))); + return base64String; +}; \ No newline at end of file diff --git a/src/aura/asr/modules/index.ts b/src/aura/asr/modules/index.ts index d9c896a..e91fc6b 100644 --- a/src/aura/asr/modules/index.ts +++ b/src/aura/asr/modules/index.ts @@ -4,4 +4,5 @@ import { auraConfig } from '../../config.ts' export const asr = new Asr({ appid: auraConfig.VOLCENGINE_AUC_APPID, token: auraConfig.VOLCENGINE_AUC_TOKEN, + type: 'flash' }) \ No newline at end of file