Compare commits

..

6 Commits

Author SHA1 Message Date
5db0418cb8 更新 package.json 和 pnpm-lock.yaml,修改版本号并添加依赖;重构 ASR 模块,优化音频处理逻辑,新增 AliyunAucChat 类及测试用例 2025-12-22 01:49:48 +08:00
00e91e8b28 update 2025-12-21 19:43:16 +08:00
a9ec22bef0 更新 package.json 和 pnpm-lock.yaml,修改版本号并优化构建脚本 2025-12-21 19:07:50 +08:00
02543115b4 update 2025-12-21 19:05:50 +08:00
2f008c3c1f update 2025-12-21 18:58:45 +08:00
58b27b86fe Refactor ASR module and remove deprecated AliAsrServer
- Introduced AsrRelatime class for real-time ASR using WebSocket.
- Removed AliAsrServer and related files from the aliyun provider.
- Updated base class for ASR to use WSServer for WebSocket connections.
- Added new test cases for the updated ASR functionality.
- Cleaned up unused imports and files across the project.
- Adjusted TypeScript configuration for better module resolution.
- Implemented silence generation for audio streaming.
2025-12-21 18:56:32 +08:00
25 changed files with 1083 additions and 3784 deletions

56
.cnb.yml Normal file
View File

@@ -0,0 +1,56 @@
# .cnb.yml
$:
vscode:
- docker:
image: docker.cnb.cool/kevisual/dev-env:latest
services:
- vscode
- docker
imports: https://cnb.cool/kevisual/env/-/blob/main/env.yml
# 开发环境启动后会执行的任务
# stages:
# - name: pnpm install
# script: pnpm install
main:
web_trigger_sync_to_gitea:
- services:
- docker
imports:
- https://cnb.cool/kevisual/env/-/blob/main/env.yml
stages:
- name: 'show username'
script: echo "GITEA_USERNAME is ${GITEA_USERNAME} and GITEA_PASSWORD is ${GITEA_PASSWORD}"
- name: sync to gitea
image: tencentcom/git-sync
settings:
target_url: https://git.xiongxiao.me/kevisual/video-tools.git
auth_type: https
username: "oauth2"
password: ${GITEA_TOKEN}
git_user: "abearxiong"
git_email: "xiongxiao@xiongxiao.me"
sync_mode: rebase
branch: main
web_trigger_sync_from_gitea:
- services:
- docker
imports:
- https://cnb.cool/kevisual/env/-/blob/main/env.yml
stages:
- name: '添加 gitea的origin'
script: |
git remote remove gitea 2>/dev/null || true
git remote add gitea https://oauth2:${GITEA_TOKEN}@git.xiongxiao.me/kevisual/video-tools.git
- name: '同步gitea代码到当前仓库'
script: git pull gitea main
- name: '提交到原本的origin'
script: git push origin main
"**":
web_trigger_test:
- stages:
- name: 执行任务
script: echo "job"

11
.cnb/web_trigger.yml Normal file
View File

@@ -0,0 +1,11 @@
# .cnb/web_trigger.yml
branch:
# 如下按钮在分支名以 release 开头的分支详情页面显示
- reg: "^main"
buttons:
- name: 同步代码到gitea
desc: 同步代码到gitea
event: web_trigger_sync_to_gitea
- name: 同步gitea代码到当前仓库
desc: 同步gitea代码到当前仓库
event: web_trigger_sync_from_gitea

View File

@@ -1,21 +1,19 @@
{ {
"name": "@kevisual/video-tools", "name": "@kevisual/video-tools",
"version": "0.0.5", "version": "0.0.12",
"description": "", "description": "",
"main": "index.js", "main": "index.js",
"basename": "/root/video-tools", "basename": "/root/video-tools",
"app": { "app": {
"key": "video-tools",
"entry": "dist/app.mjs", "entry": "dist/app.mjs",
"type": "system-app" "type": "system-app"
}, },
"scripts": { "scripts": {
"build": "rollup -c rollup.config.mjs", "build": "npm publish",
"dev:bun": "bun run src/dev.ts --watch", "dev:bun": "bun run src/dev.ts --watch",
"test": "tsx test/**/*.ts", "test": "tsx test/**/*.ts",
"clean": "rm -rf dist", "clean": "rm -rf dist",
"pub": "npm run build && envision pack -p -u", "pub": "npm run build && envision pack -p -u"
"cmd": "tsx cmd/index.ts "
}, },
"keywords": [], "keywords": [],
"author": "abearxiong <xiongxiao@xiongxiao.me>", "author": "abearxiong <xiongxiao@xiongxiao.me>",
@@ -31,48 +29,31 @@
"access": "public" "access": "public"
}, },
"dependencies": { "dependencies": {
"@gradio/client": "^1.15.1", "@gradio/client": "^2.0.1",
"@kevisual/router": "0.0.21", "@kevisual/ai": "^0.0.19",
"@kevisual/use-config": "^1.0.17", "@kevisual/router": "0.0.48",
"@kevisual/use-config": "^1.0.21",
"@kevisual/video": "^0.0.2", "@kevisual/video": "^0.0.2",
"cookie": "^1.0.2",
"crypto-js": "^4.2.0", "crypto-js": "^4.2.0",
"dayjs": "^1.11.13", "dayjs": "^1.11.19",
"eventemitter3": "^5.0.1", "eventemitter3": "^5.0.1",
"formidable": "^3.5.4", "nanoid": "^5.1.6"
"lodash-es": "^4.17.21",
"nanoid": "^5.1.5"
}, },
"devDependencies": { "devDependencies": {
"@alicloud/pop-core": "^1.8.0", "@alicloud/pop-core": "^1.8.0",
"@kevisual/logger": "^0.0.4", "@kevisual/logger": "^0.0.4",
"@kevisual/types": "^0.0.10", "@kevisual/types": "^0.0.10",
"@kevisual/use-config": "^1.0.17", "@kevisual/use-config": "^1.0.21",
"@types/crypto-js": "^4.2.2", "@types/crypto-js": "^4.2.2",
"@types/formidable": "^3.4.5", "@types/node": "^25.0.3",
"@types/lodash-es": "^4.17.12",
"@types/node": "^22.15.29",
"@types/vosk": "^0.3.1",
"@types/ws": "^8.18.1", "@types/ws": "^8.18.1",
"commander": "^14.0.0", "dotenv": "^17.2.3",
"concurrently": "^9.1.2",
"cross-env": "^7.0.3",
"dotenv": "^16.5.0",
"inquire": "^0.4.8",
"ioredis": "^5.6.1",
"nodemon": "^3.1.10",
"pg": "^8.16.0",
"pm2": "^6.0.6",
"rimraf": "^6.0.1",
"sequelize": "^6.37.7",
"tape": "^5.9.0",
"tsx": "^4.19.4",
"typescript": "^5.8.3",
"ws": "npm:@kevisual/ws" "ws": "npm:@kevisual/ws"
}, },
"exports": { "exports": {
"./src/*": "./src/*", "./src/*": "./src/*",
"./asr": "./src/asr/index.ts",
"./examples/*": "./examples/*" "./examples/*": "./examples/*"
}, },
"packageManager": "pnpm@10.11.1" "packageManager": "pnpm@10.26.1"
} }

3860
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
import { AsrRelatime as QwenAsrRelatime } from "./provider/aliyun/base.ts";
export { WSServer } from "./ws.ts";
export {
QwenAsrRelatime
}

View File

@@ -0,0 +1,59 @@
import { BailianChat } from '@kevisual/ai'
type Options = {
token?: string
}
export class AliyunAucChat extends BailianChat {
constructor(opts?: Options) {
super({
apiKey: opts?.token,
baseURL: 'https://dashscope.aliyuncs.com/api/v1',
})
}
async getText(messages: TextMessages[], options?: { model?: string, parameters?: any }): Promise<any> {
const model = options?.model || 'qwen3-asr-flash'
const data = {
model: model,
input: {
messages: messages,
},
parameters: {
"incremental_output": true,
"asr_options": {
"enable_itn": false
},
...options?.parameters
},
stream: false,
}
const response = await this.post(`${this.baseURL}/services/aigc/multimodal-generation/generation`, { data: data });
if (!response.ok) {
const errorText = await response.text();
throw new Error(`Chat API request failed: ${response.status} ${response.statusText} - ${errorText}`);
}
const res = await response.json() as any;
const choices = res.output?.choices || [];
const choice = choices[0] || {};
const message = choice.message || {};
this.prompt_tokens = res.usage?.prompt_tokens ?? 0;
this.total_tokens = res.usage?.total_tokens ?? 0;
this.completion_tokens = res.usage?.completion_tokens ?? 0;
const text = message.content?.map?.((item: any) => item.text).join('') || '';
this.responseText = text;
return message as ResponseMessage;
}
}
type TextMessages = {
role?: 'system' | 'user' | 'assistant',
content?: string | Array<{ audio: string }>
}
type ResponseMessage = {
role?: string,
content?: Array<{ text: string }>,
annotations?: { emotion: string, language: string, type: string }[],
}

View File

@@ -1,42 +1,154 @@
import RPCClient from '@alicloud/pop-core';
interface TokenResponse { import { WSServer, WSSOptions } from '../../../asr/ws.ts';
Token: { type Options = {
Id: string; model?: string;
ExpireTime: number; token?: string;
} & Partial<WSSOptions>
/**
* 阿里云实时语音识别服务
* new AsrRelatime({
* token: 'your_token',
* model: 'general_16k',
* enableServerVad: true,
* onConnect: async () => {
* await asr.sendSessionUpdate();
* }
* });
*/
export class AsrRelatime extends WSServer {
static baseURL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime"
/**
* 是否启用服务端VAD功能true为VAD模式false为Manual模式
*/
enableServerVad: boolean = true;
constructor(options: Options) {
const { url: _, wsOptions: _wsOptions, ...rest } = options;
const wsOptions: WSSOptions['wsOptions'] = {
..._wsOptions,
headers: {
Authorization: `Bearer ${options.token}`,
'OpenAi-Beta': 'realtime=v1',
..._wsOptions?.headers
}
}; };
} const models = options.model || 'qwen3-asr-flash-realtime';
type AliCommonOptions = { const url = AsrRelatime.baseURL + `?model=${models}`;
accessKeyId: string; super({ ...rest, url, wsOptions, onConnect: options.onConnect });
accessKeySecret: string;
};
export class AliCommon {
private accessKeyId: string;
private accessKeySecret: string;
private endpoint: string;
private apiVersion: string;
token = '';
expireTime = 0;
constructor(opts?: AliCommonOptions) {
this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
this.apiVersion = '2019-02-28';
} }
async getToken() { async sendSessionUpdate() {
if (this.token && this.expireTime > Date.now()) { const { ws, enableServerVad } = this;
return this.token; const connected = await this.checkConnected()
if (!connected) {
this.reconnect({ timeout: 60 * 1000 });
return;
}
const event = {
event_id: 'event_123',
type: 'session.update',
session: {
modalities: ['text'],
input_audio_format: 'pcm',
sample_rate: 16000,
input_audio_transcription: {
language: 'zh'
},
turn_detection: null
}
};
if (enableServerVad) {
event.session.turn_detection = {
type: 'server_vad',
threshold: 0.2,
silence_duration_ms: 800
}
}
ws.send(JSON.stringify(event));
}
async start() {
this.sendSessionUpdate();
}
async sendBuffer(buffer: Buffer) {
const { ws, enableServerVad } = this;;
const connected = await this.checkConnected()
if (!connected) {
this.reconnect({ timeout: 60 * 1000 });
return;
} }
const client = new RPCClient({
accessKeyId: this.accessKeyId,
accessKeySecret: this.accessKeySecret,
endpoint: this.endpoint,
apiVersion: this.apiVersion,
});
const result = await client.request<TokenResponse>('CreateToken', {}); let offset = 0;
this.token = result.Token.Id; const bufferLength = Buffer.byteLength(buffer);
this.expireTime = result.Token.ExpireTime * 1000; const chunkSize = 3200; // 约0.1s的PCM16音频 // max lenghth 262144
return result.Token.Id; while (offset < bufferLength) {
const chunkBuffer = buffer.subarray(offset, offset + chunkSize);
offset += chunkSize;
const encoded = chunkBuffer.toString('base64');
const appendEvent = {
event_id: `event_${Date.now()}`,
type: 'input_audio_buffer.append',
audio: encoded
};
ws.send(JSON.stringify(appendEvent));
}
if (!enableServerVad) {
const commitEvent = {
event_id: 'event_789',
type: 'input_audio_buffer.commit'
};
ws.send(JSON.stringify(commitEvent));
}
}
async onMessage(event: MessageEvent) {
super.onMessage(event);
const data = event.data;
try {
const result = JSON.parse(data.toString());
const types = ['conversation.item.input_audio_transcription.text', 'conversation.item.input_audio_transcription.completed'];
const isEnd = this.isComplated(result.type, types[1]);
const isText = this.isComplated(result.type, types[0]);
if (isEnd && result?.transcript) {
const text = result.transcript;
this.emitter.emit('result', {
text: text,
raw: result
});
} else if (isText && result?.stash) {
this.emitter.emit('partial', {
text: result.stash,
raw: result
});
}
} catch (error) {
console.log('error', error);
}
}
/**
* 运行在node环境将浏览器发送的Float32格式音频数据转换为PCM16格式
* @param base64
* @returns
*/
async fixBrowerBuffer(base64: string): Promise<Buffer> {
let voice = Buffer.from(base64, 'base64');
// 浏览器发送的Float32格式音频数据需要转换为PCM16
const floatArray = new Float32Array(voice.buffer, voice.byteOffset, voice.length / 4);
const pcm16 = Buffer.alloc(floatArray.length * 2);
for (let i = 0; i < floatArray.length; i++) {
// 将浮点数 [-1.0, 1.0] 转换为 Int16 [-32768, 32767]
const sample = Math.max(-1, Math.min(1, floatArray[i]));
pcm16.writeInt16LE(sample < 0 ? sample * 0x8000 : sample * 0x7FFF, i * 2);
}
voice = pcm16;
return voice;
}
async onClose(event: CloseEvent) {
let { code } = event;
if (code === 1007) {
// reason: Idle timeout
code = 0; // 超时不重连
}
super.onClose({ ...event, code });
}
async sendBlank(buffer?: Buffer): Promise<void> {
this.sendBuffer(buffer || this.generateSilence(2) as Buffer);
} }
} }

View File

@@ -75,6 +75,7 @@ export class AliAsrServer {
const response = await fetch(requestUrl, { const response = await fetch(requestUrl, {
method: 'POST', method: 'POST',
headers, headers,
// @ts-ignore
body: audioContent, body: audioContent,
}); });

View File

@@ -0,0 +1,42 @@
import RPCClient from '@alicloud/pop-core';
interface TokenResponse {
Token: {
Id: string;
ExpireTime: number;
};
}
type AliCommonOptions = {
accessKeyId: string;
accessKeySecret: string;
};
export class AliCommon {
private accessKeyId: string;
private accessKeySecret: string;
private endpoint: string;
private apiVersion: string;
token = '';
expireTime = 0;
constructor(opts?: AliCommonOptions) {
this.accessKeyId = opts?.accessKeyId || process.env.ALIYUN_AK_ID || '';
this.accessKeySecret = opts?.accessKeySecret || process.env.ALIYUN_AK_SECRET || '';
this.endpoint = 'http://nls-meta.cn-shanghai.aliyuncs.com';
this.apiVersion = '2019-02-28';
}
async getToken() {
if (this.token && this.expireTime > Date.now()) {
return this.token;
}
const client = new RPCClient({
accessKeyId: this.accessKeyId,
accessKeySecret: this.accessKeySecret,
endpoint: this.endpoint,
apiVersion: this.apiVersion,
});
const result = await client.request<TokenResponse>('CreateToken', {});
this.token = result.Token.Id;
this.expireTime = result.Token.ExpireTime * 1000;
return result.Token.Id;
}
}

View File

@@ -0,0 +1 @@
通过access_id和access_key_secret使用阿里云智能语音服务进行语音识别。

View File

@@ -0,0 +1,25 @@
import { AliAsrServer } from '../aliyun-asr-server.ts';
import fs from 'fs/promises';
import path from 'path';
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3';
const videoTestPath2 = path.join(process.cwd(), 'build', name);
// 使用示例
async function main() {
const asrServer = new AliAsrServer({
appkey: process.env.ALI_ASR_APP_KEY,
token: process.env.ALI_ASR_TOKEN,
format: 'mp3',
// format: 'wav',
});
const audioContent = await fs.readFile(videoTestPath);
await asrServer.processAudio(audioContent);
}
// 执行主函数
main().catch(console.error);

View File

@@ -1,25 +1,54 @@
import { AliAsrServer } from '../aliyun-asr-server.ts'; import { AsrRelatime } from '../base.ts';
import fs from 'fs/promises'; import fs from 'fs/promises';
import path from 'path'; import path from 'path';
import net from 'net';
import dotenv from 'dotenv';
dotenv.config();
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav'); // const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3'); // const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav'); const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const name = 'output-1746007775571.mp3'; const videoTestPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoTestPath2 = path.join(process.cwd(), 'build', name); const videoBlankPath = path.join(process.cwd(), 'videos/blank.wav');
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
// 使用示例 // 使用示例
async function main() { async function main() {
const asrServer = new AliAsrServer({ const ws = new AsrRelatime({
appkey: process.env.ALI_ASR_APP_KEY, token: process.env.BAILIAN_API_KEY,
token: process.env.ALI_ASR_TOKEN, onConnect: async () => {
format: 'mp3', ws.emitter.on('message', (event) => {
// format: 'wav', // console.log('message', event.data);
});
ws.emitter.on('result', ({ text }) => {
console.log('result:', text);
});
ws.emitter.on('close', (event) => {
const { code, reason, type } = event || {};
console.log('close envent', event);
console.log('Connection closed:', code)
if (typeof code === 'number' && code !== 0) {
ws.reconnect({ timeout: 1000 });
}
});
await ws.start();
// const audioContent = await fs.readFile(videoTestPath);
// ws.sendBuffer(audioContent);
// ws.sendBlank();
// ws.sendBuffer(await fs.readFile(videoTestPath2));
// ws.sendBlank();
},
}); });
const audioContent = await fs.readFile(videoTestPath); //
await asrServer.processAudio(audioContent);
} }
// 执行主函数 // 执行主函数
main().catch(console.error); main().catch(console.error);
const server = net.createServer((socket) => {
socket.on('data', (data) => {
console.log('data', data);
});
});
server.listen(10096);

View File

@@ -0,0 +1,35 @@
import { AliyunAucChat } from '../auc.ts';
import fs from 'fs/promises';
import path from 'path';
import net from 'net';
import dotenv from 'dotenv';
dotenv.config();
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const videoTestPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoBlankPath = path.join(process.cwd(), 'videos/blank.wav');
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const messages = [
{
role: 'user' as const,
content: [
{
audio: `data:audio/wav;base64,${(await fs.readFile(videoTestPath2)).toString('base64')}`,
},
],
},
];
async function main() {
const chat = new AliyunAucChat({
token: process.env.BAILIAN_API_KEY,
});
const response = await chat.getText(messages, { stream: false, model: 'qwen3-asr-flash' });
console.log('Final response:', response);
}
main().catch(console.error);

View File

@@ -0,0 +1,61 @@
import { AsrRelatime } from '../base.ts';
import path from 'node:path';
import net from 'net';
import { Recording } from '../../../../recorder/index.ts';
import Stream from 'stream';
import fs from 'node:fs'; // 新增
const recorder = new Recording({
sampleRate: 16000,
channels: 1, //
audioType: 'wav',
threshold: 0,
recorder: 'sox',
silence: '1.0',
endOnSilence: true,
});
const writeFilePath = path.join(process.cwd(), 'funasr_test.wav');
const fileStream = fs.createWriteStream(writeFilePath, { encoding: 'binary' });
const url = 'wss://funasr.xiongxiao.me';
const url3 = 'wss://pro.xiongxiao.me:10095';
const url4 = 'wss://121.4.112.18:10095'; // aliyun
const url5 = 'https://1.15.101.247:10095'; // pro
const ws = new AsrRelatime({
onConnect: async () => {
console.log('onConnect');
ws.sendSessionUpdate();
recorder.start();
let len = 0;
recorder.stream().on('data', (chunk) => {
// ws.sendBuffer(chunk, { online: true });
// console.log('Sending audio chunk:', chunk.length);
ws.sendBuffer(chunk);
fileStream.write(chunk); // 新增:将音频数据写入文件
len += chunk.length;
});
setTimeout(() => {
// ws.stop();
fileStream.end(); // 新增:关闭文件流
setTimeout(() => {
process.exit(0);
}, 1000);
console.log('len', len);
}, 10 * 1000);
ws.emitter.on('result', (event) => {
console.log('result', event);
});
},
});
const server = net.createServer((socket) => {
socket.on('data', (data) => {
console.log('data', data);
});
});
server.listen(10097);

View File

@@ -1,6 +1,6 @@
// import WebSocket from 'ws'; // import WebSocket from 'ws';
import { EventEmitter } from 'eventemitter3'; import { EventEmitter } from 'eventemitter3';
import { WSServer, WSSOptions } from '../../provider/ws-server.ts'; import { WSServer, WSSOptions } from '../../ws.ts';
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
export type VideoWSOptions = { export type VideoWSOptions = {
url?: string; url?: string;

View File

@@ -1,4 +1,4 @@
import { WSServer } from '../../provider/ws-server.ts'; import { WSServer } from '../../ws.ts';
import { nanoid } from 'nanoid'; import { nanoid } from 'nanoid';
export const uuid = () => nanoid(16); export const uuid = () => nanoid(16);

View File

@@ -1,98 +0,0 @@
import { EventEmitter } from 'eventemitter3';
import { initWs } from '../../ws-adapter/index.ts';
import type { ClientOptions } from 'ws';
export type WSSOptions = {
url: string;
ws?: WebSocket;
onConnect?: () => void;
wsOptions?: ClientOptions;
emitter?: EventEmitter;
};
export class WSServer {
ws: WebSocket;
onConnect?: () => void;
connected: boolean;
emitter: EventEmitter;
url: string;
wsOptions?: ClientOptions;
constructor(opts: WSSOptions) {
this.connected = false;
this.url = opts.url;
this.wsOptions = opts.wsOptions;
this.initWs(opts);
}
async initWs(opts: WSSOptions) {
if (opts.ws) {
this.ws = opts.ws;
}
this.emitter = opts.emitter || new EventEmitter();
this.ws = await initWs(opts.url, opts.wsOptions);
this.onConnect = opts?.onConnect || (() => {});
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
}
async reconnect() {
this.ws = await initWs(this.url, this.wsOptions);
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
}
/**
* 连接成功 ws 事件
*/
async onOpen() {
this.connected = true;
this?.onConnect?.();
this.emitter.emit('open');
}
/**
* 检查是否连接
* @returns
*/
async isConnected() {
if (this.connected) {
return true;
}
return new Promise((resolve) => {
this.emitter.once('open', () => {
resolve(true);
});
});
}
/**
* 收到消息 ws 事件
* @param event
*/
async onMessage(event: MessageEvent) {
// console.log('WSS onMessage', event);
this.emitter.emit('message', event);
}
/**
* ws 错误事件
* @param event
*/
async onError(event: Event) {
console.error('WSS onError');
this.emitter.emit('error', event);
}
/**
* ws 关闭事件
* @param event
*/
async onClose(event: CloseEvent) {
console.error('WSS onClose');
this.emitter.emit('close', event);
this.connected = false;
}
/**
* 关闭 ws 连接
*/
async close() {
if (this.ws.readyState === WebSocket.OPEN) {
this.ws.close();
}
}
}

237
src/asr/ws.ts Normal file
View File

@@ -0,0 +1,237 @@
import { EventEmitter } from 'eventemitter3';
import { initWs } from '../ws/index.ts';
import type { ClientOptions } from 'ws';
export type WSSOptions = {
url: string;
ws?: WebSocket;
onConnect?: () => void;
wsOptions?: ClientOptions;
emitter?: EventEmitter;
};
interface WSServerInterface {
isComplated(type: string, endType?: string): boolean;
start(): Promise<void>;
}
export class WSServer implements WSServerInterface {
ws: WebSocket;
onConnect?: () => void;
connected: boolean;
connecting: boolean = false;
emitter: EventEmitter;
url: string;
wsOptions?: ClientOptions;
constructor(opts: WSSOptions) {
this.connected = false;
this.url = opts.url;
this.wsOptions = opts.wsOptions;
this.emitter = opts?.emitter || new EventEmitter();
this.onConnect = opts?.onConnect || (() => { });
this.initWs();
}
async initWs(opts?: WSSOptions) {
if (opts?.ws) {
this.ws = opts.ws;
}
if (opts?.emitter) {
this.emitter = opts.emitter;
}
if (opts?.onConnect) {
this.onConnect = opts.onConnect;
}
if (opts?.emitter) {
this.emitter = opts.emitter;
}
this.connecting = true;
this.ws = await initWs(this.url, this.wsOptions);
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
}
async reconnect(opts?: { timeout?: number }) {
if (this.connected || this.connecting) {
return;
}
this.connecting = true;
const timeout = opts?.timeout || 0;
if (timeout > 0) {
await new Promise((resolve) => setTimeout(resolve, timeout));
}
this.ws = await initWs(this.url, this.wsOptions);
this.ws.onopen = this.onOpen.bind(this);
this.ws.onmessage = this.onMessage.bind(this);
this.ws.onerror = this.onError.bind(this);
this.ws.onclose = this.onClose.bind(this);
}
async start() {
// do nothing
}
/**
* 连接成功 ws 事件
*/
async onOpen() {
this.connected = true;
this.connecting = false;
this?.onConnect?.();
this.emitter.emit('open');
}
/**
* 检查是否连接
* @returns
*/
async isConnected() {
if (this.connected) {
return true;
}
return new Promise((resolve) => {
const timeout = setTimeout(() => {
resolve(false);
}, 5000);
this.emitter.once('open', () => {
clearTimeout(timeout);
resolve(true);
});
});
}
/**
* 收到消息 ws 事件
* @param event
*/
async onMessage(event: MessageEvent) {
// console.log('WSS onMessage', event);
this.emitter.emit('message', event);
}
/**
* ws 错误事件
* @param event
*/
async onError(event: Event) {
// console.error('WSS onError');
this.emitter.emit('error', event);
}
/**
* ws 关闭事件
* @param event
*/
async onClose(event: CloseEvent) {
this.emitter.emit('close', event);
this.connected = false;
setTimeout(() => {
this.emitter.removeAllListeners();
}, 100);
const { code } = event;
if (typeof code === 'number' && code !== 0) {
this.reconnect({ timeout: 10000 });
}
}
/**
* 检查并连接 ws
* @returns
*/
async checkConnected() {
const connecting = this.connecting
if (connecting) {
return this.isConnected();
}
if (!this.connected) {
await this.reconnect();
return this.isConnected();
}
return true;
}
/**
* 关闭 ws 连接
*/
async close() {
if (this.ws.readyState === WebSocket.OPEN) {
this.ws.close();
}
}
isComplated(type: string, endType = '') {
if (type === endType) {
return true;
}
return false;
}
/**
* 生成指定时长的静音 WAV 音频缓冲区
* @param durationSeconds 静音时长(秒)
* @returns WAV 音频缓冲区
*/
generateSilence(durationSeconds: number, { encoding = 'buffer' }: { encoding?: 'buffer' | 'base64' } = {}): Buffer | string {
const sampleRate = 16000; // 采样率 16kHz
const bitDepth = 16; // 位深 16bit
const channels = 1; // 单声道
const duration = durationSeconds; // 时长
const samplesPerChannel = sampleRate * duration;
const bytesPerSample = bitDepth / 8;
const blockAlign = channels * bytesPerSample;
const byteRate = sampleRate * blockAlign;
const dataSize = samplesPerChannel * blockAlign;
const fileSize = 36 + dataSize;
// 创建 WAV 文件头
const header = Buffer.alloc(44);
let offset = 0;
// RIFF 标识
header.write('RIFF', offset); offset += 4;
header.writeUInt32LE(fileSize, offset); offset += 4;
header.write('WAVE', offset); offset += 4;
// fmt 子块
header.write('fmt ', offset); offset += 4;
header.writeUInt32LE(16, offset); offset += 4; // fmt 块大小
header.writeUInt16LE(1, offset); offset += 2; // PCM 格式
header.writeUInt16LE(channels, offset); offset += 2; // 声道数
header.writeUInt32LE(sampleRate, offset); offset += 4; // 采样率
header.writeUInt32LE(byteRate, offset); offset += 4; // 字节率
header.writeUInt16LE(blockAlign, offset); offset += 2; // 块对齐
header.writeUInt16LE(bitDepth, offset); offset += 2; // 位深
// data 子块
header.write('data', offset); offset += 4;
header.writeUInt32LE(dataSize, offset);
// 创建静音数据全为0
const silenceData = Buffer.alloc(dataSize);
// 合并头部和数据
const buffer = Buffer.concat([header, silenceData]);
if (encoding === 'base64') {
return buffer.toString('base64');
}
return buffer;
}
async sendBlank(buffer?: Buffer | ((buffer: Buffer) => any)) {
const isConnected = await this.checkConnected();
if (!isConnected) {
this.reconnect({ timeout: 1000 });
return;
}
if (buffer && typeof buffer === 'function') {
const blankBuffer = this.generateSilence(2);
const value = await buffer(Buffer.from(blankBuffer));
if (typeof value === 'string') {
this.ws.send(value);
} else {
this.ws.send(JSON.stringify(value));
}
} else if (buffer && Buffer.isBuffer(buffer)) {
this.ws.send(buffer);
return;
}
// 生成一个 2 秒静音
const blankBuffer = this.generateSilence(2);
this.ws.send(blankBuffer);
}
async sendBlankJson() {
this.ws.send(JSON.stringify({ type: 'blankVoice' }));
}
}

View File

@@ -1,7 +1,7 @@
import assert from 'assert'; import assert from 'assert';
import { logger } from '../logger/index.ts'; import { logger } from '../logger/index.ts';
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'; import { ChildProcessWithoutNullStreams, spawn } from 'child_process';
import recorders from '../recorder/recorders/index.ts'; import recorders from './recorders/index.ts';
import Stream from 'stream'; import Stream from 'stream';
export type RecordingOptions = { export type RecordingOptions = {
/* 采样率默认为16000 */ /* 采样率默认为16000 */

47
src/test/asr.ts Normal file
View File

@@ -0,0 +1,47 @@
import { WSServer } from "../asr/ws.ts";
import net from "net";
import fs from 'fs/promises';
import path from 'path';
import dotenv from 'dotenv';
dotenv.config();
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/asr_example2.wav');
// const videoTestPath = path.join(process.cwd(), 'videos/tts_mix.mp3');
const videoTestPath = path.join(process.cwd(), 'videos/my_speech_text.wav');
const videoTestPath2 = path.join(process.cwd(), 'videos/asr_example2.wav');
const videoBlankPath = path.join(process.cwd(), 'videos/blank.wav');
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
const ws = new WSServer({
url: "ws://localhost:51015/ws/asr?id=test",
onConnect: async () => {
console.log("WebSocket connected");
ws.emitter.on("message", (data) => {
// console.log("Received message:", data.data);
const json = JSON.parse(data.data);
// console.log('json', json);
if (json && json.type === 'connected') {
ws.ws.send(JSON.stringify({ type: 'init' }));
}
if (json && json.type === 'asr' && json.code === 200) {
ws.emitter.emit('asr');
}
});
ws.emitter.once('asr', async () => {
const audioContent = await fs.readFile(videoTestPath);
const audioContent2 = await fs.readFile(videoTestPath2);
const base64Audio = audioContent.toString('base64');
const value = { voice: base64Audio };
ws.ws.send(JSON.stringify(value));
console.log('slice 40', base64Audio.slice(0, 40));
ws.sendBlank((buffer) => ({ type: 'blankVoice', voice: buffer.toString('base64') }));
ws.ws.send(JSON.stringify({ voice: audioContent2.toString('base64') }));
ws.sendBlank((buffer) => ({ type: 'blankVoice', voice: buffer.toString('base64') }));
});
}
});
net.createServer().listen(60000);

View File

@@ -24,7 +24,8 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
if (isBrowser) { if (isBrowser) {
ws = new WebSocket(url); ws = new WebSocket(url);
} else { } else {
const WebSocket = await import('ws').then((module) => module.default); const wsPakcages = 'ws' // 避免vite 自动会默认的在浏览器引入ws然后报错
const WebSocket = await import(wsPakcages).then((module) => module.default);
const { rejectUnauthorized, headers, ...rest } = options || {}; const { rejectUnauthorized, headers, ...rest } = options || {};
ws = new WebSocket(url, { ws = new WebSocket(url, {
rejectUnauthorized: rejectUnauthorized ?? true, rejectUnauthorized: rejectUnauthorized ?? true,
@@ -34,7 +35,7 @@ export const initWs = async (url: string, options?: WebSocketOptions) => {
} }
return ws; return ws;
}; };
interface EventEmitterOptions { export interface EventEmitterOptions {
/** /**
* Enables automatic capturing of promise rejection. * Enables automatic capturing of promise rejection.
*/ */

View File

@@ -1,33 +1,24 @@
{ {
"extends": "@kevisual/types/json/backend.json",
"compilerOptions": { "compilerOptions": {
"module": "nodenext", "module": "NodeNext",
"target": "esnext", "target": "esnext",
"noImplicitAny": false, "baseUrl": ".",
"outDir": "./dist",
"sourceMap": false,
"allowJs": true,
"newLine": "LF",
"baseUrl": "./",
"typeRoots": [ "typeRoots": [
"node_modules/@types", "./node_modules/@types",
"node_modules/@kevisual/types" "./node_modules/@kevisual/types/index.d.ts"
], ],
"declaration": true,
"noEmit": false,
"allowImportingTsExtensions": true,
"emitDeclarationOnly": true,
"moduleResolution": "NodeNext",
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"esModuleInterop": true,
"paths": { "paths": {
"@/*": [ "@/*": [
"src/*" "src/*"
],
"@agent/*": [
"agent/*"
] ]
} },
}, },
"include": [ "include": [
"src/**/*.ts", "src/**/*",
"agent/**/*",
], ],
"exclude": [],
} }