Compare commits

...

2 Commits

Author SHA1 Message Date
54da76bf9d 阿里云一句话识别 2025-05-19 01:44:24 +08:00
a1df51f56b fix funasr 2025-05-19 01:01:38 +08:00
11 changed files with 386 additions and 129 deletions

76
.gitignore vendored
View File

@ -1,21 +1,69 @@
node_modules
dist
app.config.json5
apps.config.json
deploy.tar.gz
cache-file
/apps
logs
# mac
.DS_Store
.env*
!.env.example
!.env*example
dist
build
logs
.turbo
pack-dist
# astro
.astro
# next
.next
# nuxt
.nuxt
# vercel
.vercel
# vuepress
.vuepress/dist
# coverage
coverage/
# typescript
*.tsbuildinfo
# debug logs
*.log
*.tmp
# vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
# idea
.idea
# system
Thumbs.db
ehthumbs.db
Desktop.ini
# temp files
*.tmp
*.temp
# local development
*.local
public/r
.pnpm-store
models
videos/tts_mix.wav
videos/tts_mix.mp3

View File

@ -49,6 +49,8 @@
"vosk": "^0.3.39"
},
"devDependencies": {
"@alicloud/pop-core": "^1.8.0",
"@kevisual/logger": "^0.0.3",
"@kevisual/types": "^0.0.6",
"@kevisual/use-config": "^1.0.10",
"@rollup/plugin-alias": "^5.1.1",

View File

@ -0,0 +1,131 @@
type AliAsrServerOptions = {
baseUrl?: string;
appkey: string;
token: string;
format?: string;
sampleRate?: string;
enablePunctuationPrediction?: boolean;
enableInverseTextNormalization?: boolean;
enableVoiceDetection?: boolean;
};
export class AliAsrServer {
private baseUrl: string;
private appkey: string;
private token: string;
private format: string;
private sampleRate: string;
private enablePunctuationPrediction: boolean;
private enableInverseTextNormalization: boolean;
private enableVoiceDetection: boolean;
constructor(opts?: AliAsrServerOptions) {
const {
baseUrl = 'https://nls-gateway-cn-shanghai.aliyuncs.com/stream/v1/asr',
appkey = '',
token = '',
format,
sampleRate,
enablePunctuationPrediction = true,
enableInverseTextNormalization = true,
enableVoiceDetection = false,
} = opts || {};
this.baseUrl = baseUrl;
this.appkey = appkey;
this.token = token;
this.format = format;
this.sampleRate = sampleRate;
this.enablePunctuationPrediction = enablePunctuationPrediction;
this.enableInverseTextNormalization = enableInverseTextNormalization;
this.enableVoiceDetection = enableVoiceDetection;
}
buildRequestUrl(): string {
const params = new URLSearchParams();
params.append('appkey', this.appkey);
this.format && params.append('format', this.format);
this.sampleRate && params.append('sample_rate', this.sampleRate);
if (this.enablePunctuationPrediction) {
params.append('enable_punctuation_prediction', 'true');
}
if (this.enableInverseTextNormalization) {
params.append('enable_inverse_text_normalization', 'true');
}
if (this.enableVoiceDetection) {
params.append('enable_voice_detection', 'true');
}
return `${this.baseUrl}?${params.toString()}`;
}
async processAudio(audioContent: Buffer): Promise<any> {
try {
// 设置请求头
const headers = {
'X-NLS-Token': this.token,
'Content-Type': 'application/octet-stream',
};
// 构建请求URL
const requestUrl = this.buildRequestUrl();
// 发送请求
const response = await fetch(requestUrl, {
method: 'POST',
headers,
body: audioContent,
});
// 处理响应
if (!response.ok) {
console.log(`The audio file recognized failed, http code: ${response.status}`);
const v = await response.text();
console.log('The audio file recognized response:', v);
return null;
}
// 解析响应体
// console.log('The audio file recognized response:', v);
const body = await response.json();
if (body.status === 20000000) {
console.log('The audio file recognized result:');
console.log(body);
console.log('result: ' + body.result);
console.log('The audio file recognized succeed!');
return body;
} else {
console.log('The audio file recognized failed!');
console.log(body);
return null;
}
} catch (error) {
if (error.code === 'ENOENT') {
console.log('The audio file does not exist!');
} else {
console.log('Error during audio processing:', error);
}
return null;
}
}
}
// // 使用示例
// async function main() {
// const asrServer = new AliAsrServer({
// appkey: '填入appkey',
// token: '填入服务鉴权Token',
// format: 'pcm',
// sampleRate: '16000',
// enablePunctuationPrediction: true,
// enableInverseTextNormalization: true,
// enableVoiceDetection: false,
// });
// const audioFile = '/path/to/nls-sample-16k.wav';
// await asrServer.processAudio(audioFile);
// }
// // 执行主函数
// main().catch(console.error);

View File

@ -0,0 +1,42 @@
import RPCClient from '@alicloud/pop-core';
interface TokenResponse {
Token: {
Id: string;
ExpireTime: number;
};
}
type AliCommonOptions = {
accessKeyId: string;
accessKeySecret: string;
};
export class AliCommon {
private accessKeyId: string;
private accessKeySecret: string;
private endpoint: string;
private apiVersion: string;
token = '';
expireTime = 0;
constructor(opts?: AliCommonOptions) {
this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
this.apiVersion = '2019-02-28';
}
async getToken() {
if (this.token && this.expireTime > Date.now()) {
return this.token;
}
const client = new RPCClient({
accessKeyId: this.accessKeyId,
accessKeySecret: this.accessKeySecret,
endpoint: this.endpoint,
apiVersion: this.apiVersion,
});
const result = await client.request<TokenResponse>('CreateToken', {});
this.token = result.Token.Id;
this.expireTime = result.Token.ExpireTime * 1000;
return result.Token.Id;
}
}

View File

@ -0,0 +1,25 @@
import { AliAsrServer } from '../aliyun-asr-server.ts';
import fs from 'fs/promises';
import path from 'path';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
// 使用示例
async function main() {
const asrServer = new AliAsrServer({
appkey: process.env.ALI_ASR_APP_KEY,
token: process.env.ALI_ASR_TOKEN,
format: 'mp3',
// format: 'wav',
});
const audioContent = await fs.readFile(videoTestPath);
await asrServer.processAudio(audioContent);
}
// 执行主函数
main().catch(console.error);

View File

@ -0,0 +1,10 @@
import dotenv from 'dotenv';
dotenv.config();
import { AliCommon } from '../base.ts';
const aliCommon = new AliCommon({
accessKeyId: process.env.ALIYUN_AK_ID,
accessKeySecret: process.env.ALIYUN_AK_SECRET,
});
aliCommon.getToken().then(console.log);

View File

@ -3,40 +3,63 @@ import net from 'net';
import path from 'path';
import fs from 'fs';
const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
const ws = new VideoWS({
// url: 'wss://192.168.31.220:10095',
url: 'wss://funasr.xiongxiao.me',
isFile: true,
onConnect: async () => {
console.log('onConnect');
const data = fs.readFileSync(videoTestPath);
let sampleBuf = new Uint8Array(data);
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
// const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
let len = 0;
ws.start();
while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
if (len === 100) {
// ws.stop();
// ws.start();
await new Promise((resolve) => setTimeout(resolve, 1000));
}
ws.send(sendBuf);
len++;
}
ws.stop();
console.log('len', len);
const url = 'wss://funasr.xiongxiao.me';
// const ws = new VideoWS({
// // url: 'wss://192.168.31.220:10095',
// url: 'wss://funasr.xiongxiao.me',
// isFile: true,
// // mode: 'offline',
// wav_format: 'mp3',
// onConnect: async () => {
// console.log('onConnect');
// const data = fs.readFileSync(videoTestPath);
// let sampleBuf = new Uint8Array(data);
// var chunk_size = 960; // for asr chunk_size [5, 10, 5]
// let totalsend = 0;
// let len = 0;
// ws.start();
// while (sampleBuf.length >= chunk_size) {
// const sendBuf = sampleBuf.slice(0, chunk_size);
// totalsend = totalsend + sampleBuf.length;
// sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
// if (len === 100) {
// // ws.stop();
// // ws.start();
// // await new Promise((resolve) => setTimeout(resolve, 1000));
// }
// await new Promise((resolve) => setTimeout(resolve, 10));
// ws.send(sendBuf);
// len++;
// }
// await new Promise((resolve) => setTimeout(resolve, 1000));
// ws.stop();
// console.log('len', len);
// },
// });
// const server = net.createServer((socket) => {
// socket.on('data', (data) => {
// console.log('data', data);
// });
// });
// server.listen(10096);
const ws2 = new VideoWS({
url: url,
onConnect: async () => {
const data = fs.readFileSync(videoTestPath);
await ws2.sendBuffer(data, { wav_format: 'mp3' });
await new Promise((resolve) => setTimeout(resolve, 1000));
const data2 = fs.readFileSync(videoTestPath2);
await ws2.sendBuffer(data2, { wav_format: 'mp3' });
},
});
const server = net.createServer((socket) => {
socket.on('data', (data) => {
console.log('data', data);
});
});
server.listen(10096);

View File

@ -1,5 +1,7 @@
// import WebSocket from 'ws';
import { initWs } from '../../../ws-adapter/index.ts';
import { logger } from '@/logger/index.ts';
import { WSServer } from '../../provider/ws-server.ts';
export type VideoWSOptions = {
url?: string;
@ -8,10 +10,22 @@ export type VideoWSOptions = {
mode?: VideoWsMode;
isFile?: boolean;
onConnect?: () => void;
wav_format?: string;
};
export const videoWsMode = ['2pass', 'online', 'offline'] as const;
type VideoWsMode = (typeof videoWsMode)[number];
type OpenRequest = {
chunk_size: number[];
wav_name: string;
is_speaking: boolean;
chunk_interval: number;
// 逆文本标准化(ITN):
itn: boolean;
mode: VideoWsMode;
wav_format?: string;
audio_fs?: number;
hotwords?: string;
};
export type VideoWsResult = {
isFinal: boolean;
mode: VideoWsMode;
@ -21,48 +35,21 @@ export type VideoWsResult = {
wav_name: string;
};
export class VideoWS {
ws: WebSocket;
export class VideoWS extends WSServer {
itn?: boolean;
mode?: VideoWsMode;
isFile?: boolean;
onConnect?: () => void;
wav_format?: string;
constructor(options?: VideoWSOptions) {
super({ url: options?.url, ws: options?.ws, onConnect: options?.onConnect });
this.itn = options?.itn || false;
this.itn = options?.itn || false;
this.mode = options?.mode || 'online';
this.isFile = options?.isFile || false;
this.initWs(options);
}
async initWs(options: VideoWSOptions) {
if (options?.ws) {
this.ws = options.ws;
} else {
this.ws = await initWs(options.url);
}
this.onConnect = options?.onConnect || (() => {});
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
this.wav_format = options?.wav_format;
}
async onOpen() {
this.onConnect();
}
async start() {
let isFileMode = this.isFile;
async start(opts?: Partial<OpenRequest>) {
const chunk_size = new Array(5, 10, 5);
type OpenRequest = {
chunk_size: number[];
wav_name: string;
is_speaking: boolean;
chunk_interval: number;
itn: boolean;
mode: VideoWsMode;
wav_format?: string;
audio_fs?: number;
hotwords?: string;
};
const request: OpenRequest = {
chunk_size: chunk_size,
wav_name: 'h5', //
@ -70,17 +57,16 @@ export class VideoWS {
chunk_interval: 10,
itn: this.itn,
mode: this.mode || 'online',
...opts,
};
console.log('request', request);
if (isFileMode) {
const file_ext = 'wav';
const file_sample_rate = 16000;
request.wav_format = file_ext;
if (file_ext == 'wav') {
request.wav_format = 'PCM';
request.audio_fs = file_sample_rate;
}
const file_sample_rate = 16000;
request.wav_format = request.wav_format || this.wav_format || 'wav';
if ('wav' == request.wav_format) {
request.wav_format = 'PCM';
request.audio_fs = file_sample_rate;
}
console.log('request', request);
this.ws.send(JSON.stringify(request));
}
async stop() {
@ -99,7 +85,28 @@ export class VideoWS {
this.ws.send(data);
}
}
async sendBuffer(data: Buffer, opts?: { isFile?: boolean; wav_format?: string }) {
const { wav_format = 'wav' } = opts || {};
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
let sampleBuf = new Uint8Array(data);
const ws = this;
var chunk_size = 960; // for asr chunk_size [5, 10, 5]
let totalsend = 0;
let len = 0;
ws.start({ wav_format });
while (sampleBuf.length >= chunk_size) {
const sendBuf = sampleBuf.slice(0, chunk_size);
totalsend = totalsend + sampleBuf.length;
sampleBuf = sampleBuf.slice(chunk_size, sampleBuf.length);
await new Promise((resolve) => setTimeout(resolve, 10));
ws.send(sendBuf);
len++;
}
ws.stop();
}
}
async onMessage(event: MessageEvent) {
super.onMessage(event);
const data = event.data;
try {
const result = JSON.parse(data.toString());

View File

@ -1,37 +1,6 @@
import { pino } from 'pino';
import { useConfig } from '@kevisual/use-config/env';
import { Logger } from '@kevisual/logger/node';
const config = useConfig();
export const logger = pino({
level: config.LOG_LEVEL || 'info',
transport: {
target: 'pino-pretty',
options: {
colorize: true,
translateTime: 'SYS:standard',
ignore: 'pid,hostname',
},
},
serializers: {
error: pino.stdSerializers.err,
req: pino.stdSerializers.req,
res: pino.stdSerializers.res,
},
// base: {
// app: 'ai-videos',
// env: process.env.NODE_ENV || 'development',
// },
const level = process.env.LOG_LEVEL || 'info';
export const logger = new Logger({
level: level as any,
});
export const logError = (message: string, data?: any) => logger.error({ data }, message);
export const logWarning = (message: string, data?: any) => logger.warn({ data }, message);
export const logInfo = (message: string, data?: any) => logger.info({ data }, message);
export const logDebug = (message: string, data?: any) => logger.debug({ data }, message);
export const log = {
error: logError,
warn: logWarning,
info: logInfo,
debug: logDebug,
};

View File

@ -1,9 +1,9 @@
import assert from 'assert';
import { logDebug, logInfo } from '../logger/index.ts';
import { logger } from '../logger/index.ts';
import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
import recorders from '../recorder/recorders/index.ts';
import Stream from 'stream';
const logDebug = logger.debug;
export type RecordingOptions = {
/* 采样率默认为16000 */
sampleRate?: number;