feat: add some orhter provider

2025-12-04 01:55:07 +08:00
parent 6d473a0902
commit 2c75559101
8 changed files with 288 additions and 169 deletions
--- a/src/provider/core/utils/index.ts
+++ b/src/provider/core/utils/index.ts
@@ -0,0 +1,192 @@
+export class AIUtils {
+  /**
+   * 从 Markdown 代码块中提取 JSON
+   * @param str 包含 JSON 的字符串
+   * @returns 解析后的对象或 null
+   */
+  extractJsonFromMarkdown(str: string): any | null {
+    // Try to extract JSON from ```json ... ```
+    const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
+    const match = str.match(jsonRegex);
+    let jsonStr = match && match[1] ? match[1] : str;
+
+    try {
+      return JSON.parse(jsonStr);
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * 从 Markdown 代码块中提取代码
+   * @param str Markdown 字符串
+   * @param language 语言类型，不指定则返回所有代码块
+   * @returns 提取的代码字符串或数组
+   */
+  extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
+    if (language) {
+      const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
+      const matches = str.match(regex);
+      if (!matches) return null;
+      return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
+    }
+    
+    const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
+    const matches = [...str.matchAll(regex)];
+    if (matches.length === 0) return null;
+    return matches.map(m => m[1].trim());
+  }
+
+  /**
+   * 清理 AI 响应中的多余空白和格式
+   * @param str 原始字符串
+   * @returns 清理后的字符串
+   */
+  cleanResponse(str: string): string {
+    return str
+      .trim()
+      .replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
+      .replace(/[ \t]+$/gm, ''); // 删除行尾空格
+  }
+
+  /**
+   * 从 AI 响应中提取标签
+   * @param str 响应字符串
+   * @returns 标签数组
+   */
+  extractTags(str: string): string[] {
+    const tagPatterns = [
+      /#(\w+)/g, // #tag 格式
+      /\[(\w+)\]/g, // [tag] 格式
+      /tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
+    ];
+
+    const tags = new Set<string>();
+    
+    for (const pattern of tagPatterns) {
+      const matches = str.matchAll(pattern);
+      for (const match of matches) {
+        if (match[1]) {
+          const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
+          extracted.forEach(tag => tags.add(tag));
+        }
+      }
+    }
+
+    return Array.from(tags);
+  }
+
+  /**
+   * 从文本中提取 URL
+   * @param str 文本字符串
+   * @returns URL 数组
+   */
+  extractUrls(str: string): string[] {
+    const urlRegex = /(https?:\/\/[^\s]+)/g;
+    const matches = str.match(urlRegex);
+    return matches || [];
+  }
+
+  /**
+   * 分割长文本为指定 token 数量的块
+   * @param text 原始文本
+   * @param maxTokens 每块最大 token 数（粗略估算：1 token ≈ 4 字符）
+   * @returns 文本块数组
+   */
+  chunkText(text: string, maxTokens: number = 1000): string[] {
+    const chunkSize = maxTokens * 4; // 粗略估算
+    const chunks: string[] = [];
+    
+    // 按段落分割
+    const paragraphs = text.split(/\n\n+/);
+    let currentChunk = '';
+    
+    for (const paragraph of paragraphs) {
+      if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
+        chunks.push(currentChunk.trim());
+        currentChunk = paragraph;
+      } else {
+        currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
+      }
+    }
+    
+    if (currentChunk) {
+      chunks.push(currentChunk.trim());
+    }
+    
+    return chunks;
+  }
+
+  /**
+   * 移除 AI 响应中的思考过程（thinking 标签）
+   * @param str 响应字符串
+   * @returns 清理后的字符串
+   */
+  removeThinkingTags(str: string): string {
+    return str
+      .replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
+      .replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
+      .trim();
+  }
+
+  /**
+   * 转义特殊字符用于 AI 提示词
+   * @param str 原始字符串
+   * @returns 转义后的字符串
+   */
+  escapeForPrompt(str: string): string {
+    return str
+      .replace(/\\/g, '\\\\')
+      .replace(/`/g, '\\`')
+      .replace(/\$/g, '\\$');
+  }
+
+  /**
+   * 统计文本的大致 token 数量
+   * @param text 文本
+   * @returns 估算的 token 数量
+   */
+  estimateTokens(text: string): number {
+    // 简单估算：中文约 1.5 字符/token，英文约 4 字符/token
+    const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
+    const otherChars = text.length - chineseChars;
+    return Math.ceil(chineseChars / 1.5 + otherChars / 4);
+  }
+
+  /**
+   * 从响应中提取结构化数据（key: value 格式）
+   * @param str 响应字符串
+   * @returns 键值对对象
+   */
+  extractKeyValuePairs(str: string): Record<string, string> {
+    const result: Record<string, string> = {};
+    const lines = str.split('\n');
+    
+    for (const line of lines) {
+      const match = line.match(/^([^:：]+)[：:]\s*(.+)$/);
+      if (match) {
+        const key = match[1].trim();
+        const value = match[2].trim();
+        result[key] = value;
+      }
+    }
+    
+    return result;
+  }
+
+  /**
+   * 验证 AI 响应是否完整（检查截断）
+   * @param str 响应字符串
+   * @returns 是否完整
+   */
+  isResponseComplete(str: string): boolean {
+    const incompleteSigns = [
+      /```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
+      /\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
+      /\[[\s\S]*(?<!\])$/, // 未闭合的数组
+      /\.{3,}$/, // 结尾省略号
+    ];
+    
+    return !incompleteSigns.some(pattern => pattern.test(str.trim()));
+  }
+}