From 776e0800e96c6908972a71379ca96fedbf479701 Mon Sep 17 00:00:00 2001 From: abearxiong Date: Tue, 20 May 2025 00:39:21 +0800 Subject: [PATCH] tts for cosyvoice and funasr and aliyun --- package.json | 1 + src/asr/provider/aliyun/test/get-text.ts | 4 +- src/asr/provider/funasr/test/get-text.ts | 4 +- src/tts/provider/cosyvoice/test/tts.ts | 28 +++++++++++++ src/tts/provider/cosyvoice/tts.ts | 53 ++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 4 deletions(-) create mode 100644 src/tts/provider/cosyvoice/test/tts.ts create mode 100644 src/tts/provider/cosyvoice/tts.ts diff --git a/package.json b/package.json index 2324d72..a9cf4b8 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "access": "public" }, "dependencies": { + "@gradio/client": "^1.14.2", "@kevisual/router": "0.0.10", "@kevisual/use-config": "^1.0.10", "@kevisual/video": "^0.0.1", diff --git a/src/asr/provider/aliyun/test/get-text.ts b/src/asr/provider/aliyun/test/get-text.ts index 3854822..8797294 100644 --- a/src/asr/provider/aliyun/test/get-text.ts +++ b/src/asr/provider/aliyun/test/get-text.ts @@ -3,8 +3,8 @@ import fs from 'fs/promises'; import path from 'path'; // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); -const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); -// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); +const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); const name = 'output-1746007775571.mp3'; const videoTestPath2 = path.join(process.cwd(), 'build', name); diff --git a/src/asr/provider/funasr/test/get-text.ts b/src/asr/provider/funasr/test/get-text.ts index 2a0952d..b7be4df 100644 --- a/src/asr/provider/funasr/test/get-text.ts +++ b/src/asr/provider/funasr/test/get-text.ts @@ -5,8 +5,8 @@ import fs from 'fs'; // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); -const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); -// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); +const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); const name = 'output-1746007775571.mp3'; const videoTestPath2 = path.join(process.cwd(), 'build', name); diff --git a/src/tts/provider/cosyvoice/test/tts.ts b/src/tts/provider/cosyvoice/test/tts.ts new file mode 100644 index 0000000..3e4e367 --- /dev/null +++ b/src/tts/provider/cosyvoice/test/tts.ts @@ -0,0 +1,28 @@ +import { Client } from '@gradio/client'; +import path from 'node:path'; +import fs from 'node:fs'; + +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); +// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); +const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); +const name = 'output-1746007775571.mp3'; +const videoTestPath2 = path.join(process.cwd(), 'build', name); +const exampleAudio = fs.readFileSync(videoTestPath); +// const exampleAudio = await response_0.blob(); + +const client = await Client.connect('http://192.168.31.220:50000/'); +const result = await client.predict('/generate_audio', { + tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回', + mode_checkbox_group: '3s极速复刻', + sft_dropdown: '', + prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。', + prompt_wav_upload: exampleAudio, + prompt_wav_record: null, + instruct_text: '', + seed: 3, + stream: false, + speed: 1, +}); + +console.log(result.data); diff --git a/src/tts/provider/cosyvoice/tts.ts b/src/tts/provider/cosyvoice/tts.ts new file mode 100644 index 0000000..ffcfe62 --- /dev/null +++ b/src/tts/provider/cosyvoice/tts.ts @@ -0,0 +1,53 @@ +import { Client } from '@gradio/client'; + +type CosyVoiceTTSOptions = { + url: string; +}; +type AudioOptions = { + tts_text: string; + mode_checkbox_group: string; + sft_dropdown: string; + prompt_text: string; + prompt_wav_upload?: any; + prompt_wav_record: any | null; + instruct_text: string; + seed: number; + stream: boolean; + speed: number; +}; + +export class CosyVoiceTTS { + private client: Client; + private url: string; + isInit = false; + constructor(opts?: CosyVoiceTTSOptions) { + this.url = opts?.url || 'http://localhost:50000/'; + } + async init() { + const url = this.url; + const client = await Client.connect(url); + this.client = client; + this.isInit = true; + return true; + } + generateAudio = async (opts?: Partial) => { + if (!this.isInit) { + await this.init(); + } + const data: AudioOptions = { + tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回', + mode_checkbox_group: '3s极速复刻', + sft_dropdown: '', + prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。', + // prompt_wav_upload: exampleAudio, + prompt_wav_record: null, + instruct_text: '', + seed: 3, + stream: false, + speed: 1, + ...opts, + }; + const result = await this.client.predict('/generate_audio', data); + return result; + }; +}