diff --git a/package.json b/package.json index 6c6aecf..2324d72 100644 --- a/package.json +++ b/package.json @@ -49,6 +49,7 @@ "vosk": "^0.3.39" }, "devDependencies": { + "@alicloud/pop-core": "^1.8.0", "@kevisual/logger": "^0.0.3", "@kevisual/types": "^0.0.6", "@kevisual/use-config": "^1.0.10", diff --git a/src/asr/provider/aliyun/aliyun-asr-server.ts b/src/asr/provider/aliyun/aliyun-asr-server.ts new file mode 100644 index 0000000..20c15d2 --- /dev/null +++ b/src/asr/provider/aliyun/aliyun-asr-server.ts @@ -0,0 +1,131 @@ +type AliAsrServerOptions = { + baseUrl?: string; + appkey: string; + token: string; + format?: string; + sampleRate?: string; + enablePunctuationPrediction?: boolean; + enableInverseTextNormalization?: boolean; + enableVoiceDetection?: boolean; +}; +export class AliAsrServer { + private baseUrl: string; + private appkey: string; + private token: string; + private format: string; + private sampleRate: string; + private enablePunctuationPrediction: boolean; + private enableInverseTextNormalization: boolean; + private enableVoiceDetection: boolean; + + constructor(opts?: AliAsrServerOptions) { + const { + baseUrl = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr', + appkey = '', + token = '', + format, + sampleRate, + enablePunctuationPrediction = true, + enableInverseTextNormalization = true, + enableVoiceDetection = false, + } = opts || {}; + this.baseUrl = baseUrl; + this.appkey = appkey; + this.token = token; + this.format = format; + this.sampleRate = sampleRate; + this.enablePunctuationPrediction = enablePunctuationPrediction; + this.enableInverseTextNormalization = enableInverseTextNormalization; + this.enableVoiceDetection = enableVoiceDetection; + } + + buildRequestUrl(): string { + const params = new URLSearchParams(); + params.append('appkey', this.appkey); + this.format && params.append('format', this.format); + this.sampleRate && params.append('sample_rate', this.sampleRate); + + if (this.enablePunctuationPrediction) { + params.append('enable_punctuation_prediction', 'true'); + } + + if (this.enableInverseTextNormalization) { + params.append('enable_inverse_text_normalization', 'true'); + } + + if (this.enableVoiceDetection) { + params.append('enable_voice_detection', 'true'); + } + + return `${this.baseUrl}?${params.toString()}`; + } + + async processAudio(audioContent: Buffer): Promise { + try { + // 设置请求头 + const headers = { + 'X-NLS-Token': this.token, + 'Content-Type': 'application/octet-stream', + }; + + // 构建请求URL + const requestUrl = this.buildRequestUrl(); + + // 发送请求 + const response = await fetch(requestUrl, { + method: 'POST', + headers, + body: audioContent, + }); + + // 处理响应 + if (!response.ok) { + console.log(`The audio file recognized failed, http code: ${response.status}`); + const v = await response.text(); + console.log('The audio file recognized response:', v); + return null; + } + // 解析响应体 + // console.log('The audio file recognized response:', v); + const body = await response.json(); + + if (body.status === 20000000) { + console.log('The audio file recognized result:'); + console.log(body); + console.log('result: ' + body.result); + console.log('The audio file recognized succeed!'); + return body; + } else { + console.log('The audio file recognized failed!'); + console.log(body); + return null; + } + } catch (error) { + if (error.code === 'ENOENT') { + console.log('The audio file does not exist!'); + } else { + console.log('Error during audio processing:', error); + } + return null; + } + } +} + +// // 使用示例 +// async function main() { +// const asrServer = new AliAsrServer({ +// appkey: '填入appkey', +// token: '填入服务鉴权Token', +// format: 'pcm', +// sampleRate: '16000', +// enablePunctuationPrediction: true, +// enableInverseTextNormalization: true, +// enableVoiceDetection: false, +// }); + +// const audioFile = '/path/to/nls-sample-16k.wav'; +// await asrServer.processAudio(audioFile); +// } + +// // 执行主函数 +// main().catch(console.error); diff --git a/src/asr/provider/aliyun/base.ts b/src/asr/provider/aliyun/base.ts new file mode 100644 index 0000000..dfae977 --- /dev/null +++ b/src/asr/provider/aliyun/base.ts @@ -0,0 +1,42 @@ +import RPCClient from '@alicloud/pop-core'; + +interface TokenResponse { + Token: { + Id: string; + ExpireTime: number; + }; +} +type AliCommonOptions = { + accessKeyId: string; + accessKeySecret: string; +}; +export class AliCommon { + private accessKeyId: string; + private accessKeySecret: string; + private endpoint: string; + private apiVersion: string; + token = ''; + expireTime = 0; + constructor(opts?: AliCommonOptions) { + this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || ''; + this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || ''; + this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com'; + this.apiVersion = '2019-02-28'; + } + async getToken() { + if (this.token && this.expireTime > Date.now()) { + return this.token; + } + const client = new RPCClient({ + accessKeyId: this.accessKeyId, + accessKeySecret: this.accessKeySecret, + endpoint: this.endpoint, + apiVersion: this.apiVersion, + }); + + const result = await client.request('CreateToken', {}); + this.token = result.Token.Id; + this.expireTime = result.Token.ExpireTime * 1000; + return result.Token.Id; + } +} diff --git a/src/asr/provider/aliyun/test/get-text.ts b/src/asr/provider/aliyun/test/get-text.ts new file mode 100644 index 0000000..3854822 --- /dev/null +++ b/src/asr/provider/aliyun/test/get-text.ts @@ -0,0 +1,25 @@ +import { AliAsrServer } from '../aliyun-asr-server.ts'; +import fs from 'fs/promises'; +import path from 'path'; +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); +const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); +// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); +const name = 'output-1746007775571.mp3'; +const videoTestPath2 = path.join(process.cwd(), 'build', name); + +// 使用示例 +async function main() { + const asrServer = new AliAsrServer({ + appkey: process.env.ALI_ASR_APP_KEY, + token: process.env.ALI_ASR_TOKEN, + format: 'mp3', + // format: 'wav', + }); + + const audioContent = await fs.readFile(videoTestPath); + await asrServer.processAudio(audioContent); +} + +// 执行主函数 +main().catch(console.error); diff --git a/src/asr/provider/aliyun/test/get-token.ts b/src/asr/provider/aliyun/test/get-token.ts new file mode 100644 index 0000000..6cb8f4a --- /dev/null +++ b/src/asr/provider/aliyun/test/get-token.ts @@ -0,0 +1,10 @@ +import dotenv from 'dotenv'; +dotenv.config(); +import { AliCommon } from '../base.ts'; + +const aliCommon = new AliCommon({ + accessKeyId: process.env.ALIYUN_AK_ID, + accessKeySecret: process.env.ALIYUN_AK_SECRET, +}); + +aliCommon.getToken().then(console.log); diff --git a/src/asr/provider/funasr/test/get-text.ts b/src/asr/provider/funasr/test/get-text.ts index 93de37c..2a0952d 100644 --- a/src/asr/provider/funasr/test/get-text.ts +++ b/src/asr/provider/funasr/test/get-text.ts @@ -8,8 +8,9 @@ import fs from 'fs'; const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); // const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); const name = 'output-1746007775571.mp3'; -const url = 'wss://funasr.xiongxiao.me'; const videoTestPath2 = path.join(process.cwd(), 'build', name); + +const url = 'wss://funasr.xiongxiao.me'; // const ws = new VideoWS({ // // url: 'wss://192.168.31.220:10095', // url: 'wss://funasr.xiongxiao.me',