feat: add some orhter provider
This commit is contained in:
10
src/provider/chat-adapter/kimi.ts
Normal file
10
src/provider/chat-adapter/kimi.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { BaseChat, BaseChatOptions } from '../core/chat.ts';
|
||||
|
||||
export type KimiOptions = Partial<BaseChatOptions>;
|
||||
export class Kimi extends BaseChat {
|
||||
static BASE_URL = 'https://api.moonshot.cn/v1/';
|
||||
constructor(options: KimiOptions) {
|
||||
const baseURL = options.baseURL || Kimi.BASE_URL;
|
||||
super({ ...(options as BaseChatOptions), baseURL: baseURL });
|
||||
}
|
||||
}
|
||||
10
src/provider/chat-adapter/zhipu.ts
Normal file
10
src/provider/chat-adapter/zhipu.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { BaseChat, BaseChatOptions } from '../core/chat.ts';
|
||||
|
||||
export type ZhipuOptions = Partial<BaseChatOptions>;
|
||||
export class Zhipu extends BaseChat {
|
||||
static BASE_URL = 'https://open.bigmodel.cn/api/paas/v4/';
|
||||
constructor(options: ZhipuOptions) {
|
||||
const baseURL = options.baseURL || Zhipu.BASE_URL;
|
||||
super({ ...(options as BaseChatOptions), baseURL: baseURL });
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,8 @@ import { Volces } from './chat-adapter/volces.ts';
|
||||
import { DeepSeek } from './chat-adapter/deepseek.ts';
|
||||
import { ModelScope } from './chat-adapter/model-scope.ts';
|
||||
import { BailianChat } from './chat-adapter/dashscope.ts';
|
||||
import { Zhipu } from './chat-adapter/zhipu.ts';
|
||||
import { Kimi } from './chat-adapter/kimi.ts';
|
||||
|
||||
import { ChatMessage } from './core/type.ts';
|
||||
|
||||
@@ -18,6 +20,8 @@ export const VolcesProvider = Volces;
|
||||
export const DeepSeekProvider = DeepSeek;
|
||||
export const ModelScopeProvider = ModelScope;
|
||||
export const BailianProvider = BailianChat;
|
||||
export const ZhipuProvider = Zhipu;
|
||||
export const KimiProvider = Kimi;
|
||||
|
||||
export const ChatProviderMap = {
|
||||
Ollama: OllamaProvider,
|
||||
@@ -28,6 +32,8 @@ export const ChatProviderMap = {
|
||||
ModelScope: ModelScopeProvider,
|
||||
BaseChat: BaseChat,
|
||||
Bailian: BailianProvider,
|
||||
Zhipu: ZhipuProvider,
|
||||
Kimi: KimiProvider,
|
||||
};
|
||||
|
||||
type ProviderManagerConfig = {
|
||||
|
||||
@@ -9,6 +9,7 @@ import type {
|
||||
EmbeddingMessage,
|
||||
EmbeddingMessageComplete,
|
||||
} from './type.ts';
|
||||
import { AIUtils } from './utils/index.ts';
|
||||
|
||||
export type BaseChatOptions<T = Record<string, any>> = {
|
||||
/**
|
||||
@@ -32,14 +33,7 @@ export type BaseChatOptions<T = Record<string, any>> = {
|
||||
*/
|
||||
stream?: boolean;
|
||||
} & T;
|
||||
export const getIsBrowser = () => {
|
||||
try {
|
||||
// 检查是否存在window对象
|
||||
return typeof window !== 'undefined' && typeof window.document !== 'undefined';
|
||||
} catch (e) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
export class BaseChat implements BaseChatInterface, BaseChatUsageInterface {
|
||||
/**
|
||||
* 默认baseURL
|
||||
@@ -53,32 +47,16 @@ export class BaseChat implements BaseChatInterface, BaseChatUsageInterface {
|
||||
* 默认apiKey
|
||||
*/
|
||||
apiKey: string;
|
||||
/**
|
||||
* 是否在浏览器中使用
|
||||
*/
|
||||
isBrowser: boolean;
|
||||
/**
|
||||
* openai实例
|
||||
*/
|
||||
openai: OpenAI;
|
||||
|
||||
prompt_tokens: number;
|
||||
total_tokens: number;
|
||||
completion_tokens: number;
|
||||
responseText: string;
|
||||
|
||||
utils: AIUtils;
|
||||
constructor(options: BaseChatOptions) {
|
||||
this.baseURL = options.baseURL;
|
||||
this.model = options.model;
|
||||
this.apiKey = options.apiKey;
|
||||
// @ts-ignore
|
||||
const DEFAULT_IS_BROWSER = getIsBrowser();
|
||||
this.isBrowser = options.isBrowser ?? DEFAULT_IS_BROWSER;
|
||||
// this.openai = new OpenAI({
|
||||
// apiKey: this.apiKey,
|
||||
// baseURL: this.baseURL,
|
||||
// dangerouslyAllowBrowser: options?.dangerouslyAllowBrowser ?? this.isBrowser,
|
||||
// });
|
||||
this.utils = new AIUtils();
|
||||
}
|
||||
post(url = '', opts: { headers?: Record<string, string>, data?: any } = {}) {
|
||||
let _url = url.startsWith('http') ? url : this.baseURL + url;
|
||||
|
||||
192
src/provider/core/utils/index.ts
Normal file
192
src/provider/core/utils/index.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
export class AIUtils {
|
||||
/**
|
||||
* 从 Markdown 代码块中提取 JSON
|
||||
* @param str 包含 JSON 的字符串
|
||||
* @returns 解析后的对象或 null
|
||||
*/
|
||||
extractJsonFromMarkdown(str: string): any | null {
|
||||
// Try to extract JSON from ```json ... ```
|
||||
const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
|
||||
const match = str.match(jsonRegex);
|
||||
let jsonStr = match && match[1] ? match[1] : str;
|
||||
|
||||
try {
|
||||
return JSON.parse(jsonStr);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Markdown 代码块中提取代码
|
||||
* @param str Markdown 字符串
|
||||
* @param language 语言类型,不指定则返回所有代码块
|
||||
* @returns 提取的代码字符串或数组
|
||||
*/
|
||||
extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
|
||||
if (language) {
|
||||
const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
|
||||
const matches = str.match(regex);
|
||||
if (!matches) return null;
|
||||
return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
|
||||
}
|
||||
|
||||
const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
|
||||
const matches = [...str.matchAll(regex)];
|
||||
if (matches.length === 0) return null;
|
||||
return matches.map(m => m[1].trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理 AI 响应中的多余空白和格式
|
||||
* @param str 原始字符串
|
||||
* @returns 清理后的字符串
|
||||
*/
|
||||
cleanResponse(str: string): string {
|
||||
return str
|
||||
.trim()
|
||||
.replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
|
||||
.replace(/[ \t]+$/gm, ''); // 删除行尾空格
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 AI 响应中提取标签
|
||||
* @param str 响应字符串
|
||||
* @returns 标签数组
|
||||
*/
|
||||
extractTags(str: string): string[] {
|
||||
const tagPatterns = [
|
||||
/#(\w+)/g, // #tag 格式
|
||||
/\[(\w+)\]/g, // [tag] 格式
|
||||
/tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
|
||||
];
|
||||
|
||||
const tags = new Set<string>();
|
||||
|
||||
for (const pattern of tagPatterns) {
|
||||
const matches = str.matchAll(pattern);
|
||||
for (const match of matches) {
|
||||
if (match[1]) {
|
||||
const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
|
||||
extracted.forEach(tag => tags.add(tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文本中提取 URL
|
||||
* @param str 文本字符串
|
||||
* @returns URL 数组
|
||||
*/
|
||||
extractUrls(str: string): string[] {
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
const matches = str.match(urlRegex);
|
||||
return matches || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* 分割长文本为指定 token 数量的块
|
||||
* @param text 原始文本
|
||||
* @param maxTokens 每块最大 token 数(粗略估算:1 token ≈ 4 字符)
|
||||
* @returns 文本块数组
|
||||
*/
|
||||
chunkText(text: string, maxTokens: number = 1000): string[] {
|
||||
const chunkSize = maxTokens * 4; // 粗略估算
|
||||
const chunks: string[] = [];
|
||||
|
||||
// 按段落分割
|
||||
const paragraphs = text.split(/\n\n+/);
|
||||
let currentChunk = '';
|
||||
|
||||
for (const paragraph of paragraphs) {
|
||||
if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
currentChunk = paragraph;
|
||||
} else {
|
||||
currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除 AI 响应中的思考过程(thinking 标签)
|
||||
* @param str 响应字符串
|
||||
* @returns 清理后的字符串
|
||||
*/
|
||||
removeThinkingTags(str: string): string {
|
||||
return str
|
||||
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
|
||||
.replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 转义特殊字符用于 AI 提示词
|
||||
* @param str 原始字符串
|
||||
* @returns 转义后的字符串
|
||||
*/
|
||||
escapeForPrompt(str: string): string {
|
||||
return str
|
||||
.replace(/\\/g, '\\\\')
|
||||
.replace(/`/g, '\\`')
|
||||
.replace(/\$/g, '\\$');
|
||||
}
|
||||
|
||||
/**
|
||||
* 统计文本的大致 token 数量
|
||||
* @param text 文本
|
||||
* @returns 估算的 token 数量
|
||||
*/
|
||||
estimateTokens(text: string): number {
|
||||
// 简单估算:中文约 1.5 字符/token,英文约 4 字符/token
|
||||
const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
|
||||
const otherChars = text.length - chineseChars;
|
||||
return Math.ceil(chineseChars / 1.5 + otherChars / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从响应中提取结构化数据(key: value 格式)
|
||||
* @param str 响应字符串
|
||||
* @returns 键值对对象
|
||||
*/
|
||||
extractKeyValuePairs(str: string): Record<string, string> {
|
||||
const result: Record<string, string> = {};
|
||||
const lines = str.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(/^([^::]+)[::]\s*(.+)$/);
|
||||
if (match) {
|
||||
const key = match[1].trim();
|
||||
const value = match[2].trim();
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证 AI 响应是否完整(检查截断)
|
||||
* @param str 响应字符串
|
||||
* @returns 是否完整
|
||||
*/
|
||||
isResponseComplete(str: string): boolean {
|
||||
const incompleteSigns = [
|
||||
/```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
|
||||
/\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
|
||||
/\[[\s\S]*(?<!\])$/, // 未闭合的数组
|
||||
/\.{3,}$/, // 结尾省略号
|
||||
];
|
||||
|
||||
return !incompleteSigns.some(pattern => pattern.test(str.trim()));
|
||||
}
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
import { numTokensFromString } from './token.ts';
|
||||
|
||||
// 常量定义
|
||||
const CHUNK_SIZE = 512; // 每个chunk的最大token数
|
||||
const MAGIC_SEPARATOR = '🦛';
|
||||
const DELIMITER = [',', '.', '!', '?', '\n', ',', '。', '!', '?'];
|
||||
const PARAGRAPH_DELIMITER = '\n\n';
|
||||
|
||||
export interface Chunk {
|
||||
chunkId: number;
|
||||
text: string;
|
||||
tokens: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* 确保每个chunk的大小不超过最大token数
|
||||
* @param chunk 输入的文本块
|
||||
* @returns 分割后的文本块及其token数的数组
|
||||
*/
|
||||
function ensureChunkSize(chunk: string): Array<[string, number]> {
|
||||
const tokens = numTokensFromString(chunk);
|
||||
if (tokens <= CHUNK_SIZE) {
|
||||
return [[chunk, tokens]];
|
||||
}
|
||||
|
||||
// 在分隔符后添加魔法分隔符
|
||||
let processedChunk = chunk;
|
||||
for (const delimiter of DELIMITER) {
|
||||
// 转义特殊字符
|
||||
const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
|
||||
}
|
||||
|
||||
const chunks: Array<[string, number]> = [];
|
||||
let tail = '';
|
||||
|
||||
// 按CHUNK_SIZE分割文本
|
||||
for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
|
||||
const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
|
||||
const currentChunk = tail + sentences.slice(0, -1).join('');
|
||||
if (currentChunk.trim()) {
|
||||
const tokenCount = numTokensFromString(currentChunk);
|
||||
chunks.push([currentChunk, tokenCount]);
|
||||
}
|
||||
tail = sentences[sentences.length - 1].trim();
|
||||
}
|
||||
|
||||
// 处理最后剩余的tail
|
||||
if (tail) {
|
||||
const tokenCount = numTokensFromString(tail);
|
||||
chunks.push([tail, tokenCount]);
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将文本分割成chunks
|
||||
* @param text 输入文本
|
||||
* @returns 分割后的chunks数组
|
||||
*/
|
||||
export async function getChunks(text: string): Promise<Chunk[]> {
|
||||
// 按段落分割文本
|
||||
const paragraphs = text
|
||||
.split(PARAGRAPH_DELIMITER)
|
||||
.map((p) => p.trim())
|
||||
.filter((p) => p);
|
||||
|
||||
const chunks: Chunk[] = [];
|
||||
let currentIndex = 0;
|
||||
|
||||
// 处理每个段落
|
||||
for (const paragraph of paragraphs) {
|
||||
const splittedParagraph = ensureChunkSize(paragraph);
|
||||
for (const [text, tokens] of splittedParagraph) {
|
||||
chunks.push({
|
||||
chunkId: currentIndex,
|
||||
text,
|
||||
tokens,
|
||||
});
|
||||
currentIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
Reference in New Issue
Block a user