tts for cosyvoice and funasr and aliyun

This commit is contained in:
熊潇 2025-05-20 00:39:21 +08:00
parent 54da76bf9d
commit 776e0800e9
5 changed files with 86 additions and 4 deletions

View File

@ -36,6 +36,7 @@
"access": "public"
},
"dependencies": {
"@gradio/client": "^1.14.2",
"@kevisual/router": "0.0.10",
"@kevisual/use-config": "^1.0.10",
"@kevisual/video": "^0.0.1",

View File

@ -3,8 +3,8 @@ import fs from 'fs/promises';
import path from 'path';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);

View File

@ -5,8 +5,8 @@ import fs from 'fs';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);

View File

@ -0,0 +1,28 @@
import { Client } from '@gradio/client';
import path from 'node:path';
import fs from 'node:fs';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
const exampleAudio = fs.readFileSync(videoTestPath);
// const exampleAudio = await response_0.blob();
const client = await Client.connect('http://192.168.31.220:50000/');
const result = await client.predict('/generate_audio', {
tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
mode_checkbox_group: '3s极速复刻',
sft_dropdown: '',
prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。',
prompt_wav_upload: exampleAudio,
prompt_wav_record: null,
instruct_text: '',
seed: 3,
stream: false,
speed: 1,
});
console.log(result.data);

View File

@ -0,0 +1,53 @@
import { Client } from '@gradio/client';
type CosyVoiceTTSOptions = {
url: string;
};
type AudioOptions = {
tts_text: string;
mode_checkbox_group: string;
sft_dropdown: string;
prompt_text: string;
prompt_wav_upload?: any;
prompt_wav_record: any | null;
instruct_text: string;
seed: number;
stream: boolean;
speed: number;
};
export class CosyVoiceTTS {
private client: Client;
private url: string;
isInit = false;
constructor(opts?: CosyVoiceTTSOptions) {
this.url = opts?.url || 'http://localhost:50000/';
}
async init() {
const url = this.url;
const client = await Client.connect(url);
this.client = client;
this.isInit = true;
return true;
}
generateAudio = async (opts?: Partial<AudioOptions>) => {
if (!this.isInit) {
await this.init();
}
const data: AudioOptions = {
tts_text: '梦里的一天结束了一个新的轮m,回梦里的一天结束了一个新的轮回梦里的一,,天结束了一个新的轮回,梦里的一天结束了一个新的轮回',
mode_checkbox_group: '3s极速复刻',
sft_dropdown: '',
prompt_text: '在一无所知中,梦里的一天结束了一个新的轮回,便会开始。',
// prompt_wav_upload: exampleAudio,
prompt_wav_record: null,
instruct_text: '',
seed: 3,
stream: false,
speed: 1,
...opts,
};
const result = await this.client.predict('/generate_audio', data);
return result;
};
}