feat: add some orhter provider

2025-12-04 01:55:07 +08:00
parent 6d473a0902
commit 2c75559101
8 changed files with 288 additions and 169 deletions
--- a/src/provider/chat-adapter/kimi.ts
+++ b/src/provider/chat-adapter/kimi.ts
@@ -0,0 +1,10 @@
+import { BaseChat, BaseChatOptions } from '../core/chat.ts';
+
+export type KimiOptions = Partial<BaseChatOptions>;
+export class Kimi extends BaseChat {
+  static BASE_URL = 'https://api.moonshot.cn/v1/';
+  constructor(options: KimiOptions) {
+    const baseURL = options.baseURL || Kimi.BASE_URL;
+    super({ ...(options as BaseChatOptions), baseURL: baseURL });
+  }
+}
--- a/src/provider/chat-adapter/zhipu.ts
+++ b/src/provider/chat-adapter/zhipu.ts
@@ -0,0 +1,10 @@
+import { BaseChat, BaseChatOptions } from '../core/chat.ts';
+
+export type ZhipuOptions = Partial<BaseChatOptions>;
+export class Zhipu extends BaseChat {
+  static BASE_URL = 'https://open.bigmodel.cn/api/paas/v4/';
+  constructor(options: ZhipuOptions) {
+    const baseURL = options.baseURL || Zhipu.BASE_URL;
+    super({ ...(options as BaseChatOptions), baseURL: baseURL });
+  }
+}
--- a/src/provider/chat.ts
+++ b/src/provider/chat.ts
@@ -8,6 +8,8 @@ import { Volces } from './chat-adapter/volces.ts';
 import { DeepSeek } from './chat-adapter/deepseek.ts';
 import { ModelScope } from './chat-adapter/model-scope.ts';
 import { BailianChat } from './chat-adapter/dashscope.ts';
+import { Zhipu } from './chat-adapter/zhipu.ts';
+import { Kimi } from './chat-adapter/kimi.ts';

 import { ChatMessage } from './core/type.ts';

@@ -18,6 +20,8 @@ export const VolcesProvider = Volces;
 export const DeepSeekProvider = DeepSeek;
 export const ModelScopeProvider = ModelScope;
 export const BailianProvider = BailianChat;
+export const ZhipuProvider = Zhipu;
+export const KimiProvider = Kimi;

 export const ChatProviderMap = {
  Ollama: OllamaProvider,
@@ -28,6 +32,8 @@ export const ChatProviderMap = {
  ModelScope: ModelScopeProvider,
  BaseChat: BaseChat,
  Bailian: BailianProvider,
+  Zhipu: ZhipuProvider,
+  Kimi: KimiProvider,
 };

 type ProviderManagerConfig = {
--- a/src/provider/core/chat.ts
+++ b/src/provider/core/chat.ts
@@ -9,6 +9,7 @@ import type {
  EmbeddingMessage,
  EmbeddingMessageComplete,
 } from './type.ts';
+import { AIUtils } from './utils/index.ts';

 export type BaseChatOptions<T = Record<string, any>> = {
  /**
@@ -32,14 +33,7 @@ export type BaseChatOptions<T = Record<string, any>> = {
   */
  stream?: boolean;
 } & T;
-export const getIsBrowser = () => {
-  try {
-    // 检查是否存在window对象
-    return typeof window !== 'undefined' && typeof window.document !== 'undefined';
-  } catch (e) {
-    return false;
-  }
-};
+
 export class BaseChat implements BaseChatInterface, BaseChatUsageInterface {
  /**
   * 默认baseURL
@@ -53,32 +47,16 @@ export class BaseChat implements BaseChatInterface, BaseChatUsageInterface {
   * 默认apiKey
   */
  apiKey: string;
-  /**
-   * 是否在浏览器中使用
-   */
-  isBrowser: boolean;
-  /**
-   * openai实例
-   */
-  openai: OpenAI;
-
  prompt_tokens: number;
  total_tokens: number;
  completion_tokens: number;
  responseText: string;
-
+  utils: AIUtils;
  constructor(options: BaseChatOptions) {
    this.baseURL = options.baseURL;
    this.model = options.model;
    this.apiKey = options.apiKey;
-    // @ts-ignore
-    const DEFAULT_IS_BROWSER = getIsBrowser();
-    this.isBrowser = options.isBrowser ?? DEFAULT_IS_BROWSER;
-    // this.openai = new OpenAI({
-    //   apiKey: this.apiKey,
-    //   baseURL: this.baseURL,
-    //   dangerouslyAllowBrowser: options?.dangerouslyAllowBrowser ?? this.isBrowser,
-    // });
+    this.utils = new AIUtils();
  }
  post(url = '', opts: { headers?: Record<string, string>, data?: any } = {}) {
    let _url = url.startsWith('http') ? url : this.baseURL + url;
--- a/src/provider/core/utils/index.ts
+++ b/src/provider/core/utils/index.ts
@@ -0,0 +1,192 @@
+export class AIUtils {
+  /**
+   * 从 Markdown 代码块中提取 JSON
+   * @param str 包含 JSON 的字符串
+   * @returns 解析后的对象或 null
+   */
+  extractJsonFromMarkdown(str: string): any | null {
+    // Try to extract JSON from ```json ... ```
+    const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
+    const match = str.match(jsonRegex);
+    let jsonStr = match && match[1] ? match[1] : str;
+
+    try {
+      return JSON.parse(jsonStr);
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * 从 Markdown 代码块中提取代码
+   * @param str Markdown 字符串
+   * @param language 语言类型，不指定则返回所有代码块
+   * @returns 提取的代码字符串或数组
+   */
+  extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
+    if (language) {
+      const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
+      const matches = str.match(regex);
+      if (!matches) return null;
+      return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
+    }
+    
+    const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
+    const matches = [...str.matchAll(regex)];
+    if (matches.length === 0) return null;
+    return matches.map(m => m[1].trim());
+  }
+
+  /**
+   * 清理 AI 响应中的多余空白和格式
+   * @param str 原始字符串
+   * @returns 清理后的字符串
+   */
+  cleanResponse(str: string): string {
+    return str
+      .trim()
+      .replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
+      .replace(/[ \t]+$/gm, ''); // 删除行尾空格
+  }
+
+  /**
+   * 从 AI 响应中提取标签
+   * @param str 响应字符串
+   * @returns 标签数组
+   */
+  extractTags(str: string): string[] {
+    const tagPatterns = [
+      /#(\w+)/g, // #tag 格式
+      /\[(\w+)\]/g, // [tag] 格式
+      /tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
+    ];
+
+    const tags = new Set<string>();
+    
+    for (const pattern of tagPatterns) {
+      const matches = str.matchAll(pattern);
+      for (const match of matches) {
+        if (match[1]) {
+          const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
+          extracted.forEach(tag => tags.add(tag));
+        }
+      }
+    }
+
+    return Array.from(tags);
+  }
+
+  /**
+   * 从文本中提取 URL
+   * @param str 文本字符串
+   * @returns URL 数组
+   */
+  extractUrls(str: string): string[] {
+    const urlRegex = /(https?:\/\/[^\s]+)/g;
+    const matches = str.match(urlRegex);
+    return matches || [];
+  }
+
+  /**
+   * 分割长文本为指定 token 数量的块
+   * @param text 原始文本
+   * @param maxTokens 每块最大 token 数（粗略估算：1 token ≈ 4 字符）
+   * @returns 文本块数组
+   */
+  chunkText(text: string, maxTokens: number = 1000): string[] {
+    const chunkSize = maxTokens * 4; // 粗略估算
+    const chunks: string[] = [];
+    
+    // 按段落分割
+    const paragraphs = text.split(/\n\n+/);
+    let currentChunk = '';
+    
+    for (const paragraph of paragraphs) {
+      if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
+        chunks.push(currentChunk.trim());
+        currentChunk = paragraph;
+      } else {
+        currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
+      }
+    }
+    
+    if (currentChunk) {
+      chunks.push(currentChunk.trim());
+    }
+    
+    return chunks;
+  }
+
+  /**
+   * 移除 AI 响应中的思考过程（thinking 标签）
+   * @param str 响应字符串
+   * @returns 清理后的字符串
+   */
+  removeThinkingTags(str: string): string {
+    return str
+      .replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
+      .replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
+      .trim();
+  }
+
+  /**
+   * 转义特殊字符用于 AI 提示词
+   * @param str 原始字符串
+   * @returns 转义后的字符串
+   */
+  escapeForPrompt(str: string): string {
+    return str
+      .replace(/\\/g, '\\\\')
+      .replace(/`/g, '\\`')
+      .replace(/\$/g, '\\$');
+  }
+
+  /**
+   * 统计文本的大致 token 数量
+   * @param text 文本
+   * @returns 估算的 token 数量
+   */
+  estimateTokens(text: string): number {
+    // 简单估算：中文约 1.5 字符/token，英文约 4 字符/token
+    const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
+    const otherChars = text.length - chineseChars;
+    return Math.ceil(chineseChars / 1.5 + otherChars / 4);
+  }
+
+  /**
+   * 从响应中提取结构化数据（key: value 格式）
+   * @param str 响应字符串
+   * @returns 键值对对象
+   */
+  extractKeyValuePairs(str: string): Record<string, string> {
+    const result: Record<string, string> = {};
+    const lines = str.split('\n');
+    
+    for (const line of lines) {
+      const match = line.match(/^([^:：]+)[：:]\s*(.+)$/);
+      if (match) {
+        const key = match[1].trim();
+        const value = match[2].trim();
+        result[key] = value;
+      }
+    }
+    
+    return result;
+  }
+
+  /**
+   * 验证 AI 响应是否完整（检查截断）
+   * @param str 响应字符串
+   * @returns 是否完整
+   */
+  isResponseComplete(str: string): boolean {
+    const incompleteSigns = [
+      /```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
+      /\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
+      /\[[\s\S]*(?<!\])$/, // 未闭合的数组
+      /\.{3,}$/, // 结尾省略号
+    ];
+    
+    return !incompleteSigns.some(pattern => pattern.test(str.trim()));
+  }
+}
--- a/src/provider/utils/chunk.ts
+++ b/src/provider/utils/chunk.ts
@@ -1,86 +0,0 @@
-import { numTokensFromString } from './token.ts';
-
-// 常量定义
-const CHUNK_SIZE = 512; // 每个chunk的最大token数
-const MAGIC_SEPARATOR = '🦛';
-const DELIMITER = [',', '.', '!', '?', '\n', '，', '。', '！', '？'];
-const PARAGRAPH_DELIMITER = '\n\n';
-
-export interface Chunk {
-  chunkId: number;
-  text: string;
-  tokens: number;
-}
-
-/**
- * 确保每个chunk的大小不超过最大token数
- * @param chunk 输入的文本块
- * @returns 分割后的文本块及其token数的数组
- */
-function ensureChunkSize(chunk: string): Array<[string, number]> {
-  const tokens = numTokensFromString(chunk);
-  if (tokens <= CHUNK_SIZE) {
-    return [[chunk, tokens]];
-  }
-
-  // 在分隔符后添加魔法分隔符
-  let processedChunk = chunk;
-  for (const delimiter of DELIMITER) {
-    // 转义特殊字符
-    const escapedDelimiter = delimiter.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
-    processedChunk = processedChunk.replace(new RegExp(escapedDelimiter, 'g'), delimiter + MAGIC_SEPARATOR);
-  }
-
-  const chunks: Array<[string, number]> = [];
-  let tail = '';
-
-  // 按CHUNK_SIZE分割文本
-  for (let i = 0; i < processedChunk.length; i += CHUNK_SIZE) {
-    const sentences = (processedChunk.slice(i, i + CHUNK_SIZE) + ' ').split(MAGIC_SEPARATOR);
-    const currentChunk = tail + sentences.slice(0, -1).join('');
-    if (currentChunk.trim()) {
-      const tokenCount = numTokensFromString(currentChunk);
-      chunks.push([currentChunk, tokenCount]);
-    }
-    tail = sentences[sentences.length - 1].trim();
-  }
-
-  // 处理最后剩余的tail
-  if (tail) {
-    const tokenCount = numTokensFromString(tail);
-    chunks.push([tail, tokenCount]);
-  }
-
-  return chunks;
-}
-
-/**
- * 将文本分割成chunks
- * @param text 输入文本
- * @returns 分割后的chunks数组
- */
-export async function getChunks(text: string): Promise<Chunk[]> {
-  // 按段落分割文本
-  const paragraphs = text
-    .split(PARAGRAPH_DELIMITER)
-    .map((p) => p.trim())
-    .filter((p) => p);
-
-  const chunks: Chunk[] = [];
-  let currentIndex = 0;
-
-  // 处理每个段落
-  for (const paragraph of paragraphs) {
-    const splittedParagraph = ensureChunkSize(paragraph);
-    for (const [text, tokens] of splittedParagraph) {
-      chunks.push({
-        chunkId: currentIndex,
-        text,
-        tokens,
-      });
-      currentIndex++;
-    }
-  }
-
-  return chunks;
-}