ai/src/provider/core/utils/index.ts

export class AIUtils {
  /**
   * 从 Markdown 代码块中提取 JSON
   * @param str 包含 JSON 的字符串
   * @returns 解析后的对象或 null
   */
  static extractJsonFromMarkdown(str: string): any | null {
    // Try to extract JSON from ```json ... ```
    const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
    const match = str.match(jsonRegex);
    let jsonStr = match && match[1] ? match[1] : str;

    try {
      return JSON.parse(jsonStr);
    } catch {
      return null;
    }
  }

  /**
   * 从 Markdown 代码块中提取代码
   * @param str Markdown 字符串
   * @param language 语言类型，不指定则返回所有代码块
   * @returns 提取的代码字符串或数组
   */
  extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
    if (language) {
      const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
      const matches = str.match(regex);
      if (!matches) return null;
      return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
    }

    const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
    const matches = [...str.matchAll(regex)];
    if (matches.length === 0) return null;
    return matches.map(m => m[1].trim());
  }

  /**
   * 清理 AI 响应中的多余空白和格式
   * @param str 原始字符串
   * @returns 清理后的字符串
   */
  cleanResponse(str: string): string {
    return str
      .trim()
      .replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
      .replace(/[ \t]+$/gm, ''); // 删除行尾空格
  }

  /**
   * 从 AI 响应中提取标签
   * @param str 响应字符串
   * @returns 标签数组
   */
  extractTags(str: string): string[] {
    const tagPatterns = [
      /#(\w+)/g, // #tag 格式
      /\[(\w+)\]/g, // [tag] 格式
      /tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
    ];

    const tags = new Set<string>();

    for (const pattern of tagPatterns) {
      const matches = str.matchAll(pattern);
      for (const match of matches) {
        if (match[1]) {
          const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
          extracted.forEach(tag => tags.add(tag));
        }
      }
    }

    return Array.from(tags);
  }

  /**
   * 从文本中提取 URL
   * @param str 文本字符串
   * @returns URL 数组
   */
  extractUrls(str: string): string[] {
    const urlRegex = /(https?:\/\/[^\s]+)/g;
    const matches = str.match(urlRegex);
    return matches || [];
  }

  /**
   * 分割长文本为指定 token 数量的块
   * @param text 原始文本
   * @param maxTokens 每块最大 token 数（粗略估算：1 token ≈ 4 字符）
   * @returns 文本块数组
   */
  chunkText(text: string, maxTokens: number = 1000): string[] {
    const chunkSize = maxTokens * 4; // 粗略估算
    const chunks: string[] = [];

    // 按段落分割
    const paragraphs = text.split(/\n\n+/);
    let currentChunk = '';

    for (const paragraph of paragraphs) {
      if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
        chunks.push(currentChunk.trim());
        currentChunk = paragraph;
      } else {
        currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
      }
    }

    if (currentChunk) {
      chunks.push(currentChunk.trim());
    }

    return chunks;
  }

  /**
   * 移除 AI 响应中的思考过程（thinking 标签）
   * @param str 响应字符串
   * @returns 清理后的字符串
   */
  removeThinkingTags(str: string): string {
    return str
      .replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
      .replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
      .trim();
  }

  /**
   * 转义特殊字符用于 AI 提示词
   * @param str 原始字符串
   * @returns 转义后的字符串
   */
  escapeForPrompt(str: string): string {
    return str
      .replace(/\\/g, '\\\\')
      .replace(/`/g, '\\`')
      .replace(/\$/g, '\\$');
  }

  /**
   * 统计文本的大致 token 数量
   * @param text 文本
   * @returns 估算的 token 数量
   */
  estimateTokens(text: string): number {
    // 简单估算：中文约 1.5 字符/token，英文约 4 字符/token
    const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
    const otherChars = text.length - chineseChars;
    return Math.ceil(chineseChars / 1.5 + otherChars / 4);
  }

  /**
   * 从响应中提取结构化数据（key: value 格式）
   * @param str 响应字符串
   * @returns 键值对对象
   */
  extractKeyValuePairs(str: string): Record<string, string> {
    const result: Record<string, string> = {};
    const lines = str.split('\n');

    for (const line of lines) {
      const match = line.match(/^([^:：]+)[：:]\s*(.+)$/);
      if (match) {
        const key = match[1].trim();
        const value = match[2].trim();
        result[key] = value;
      }
    }

    return result;
  }

  /**
   * 验证 AI 响应是否完整（检查截断）
   * @param str 响应字符串
   * @returns 是否完整
   */
  isResponseComplete(str: string): boolean {
    const incompleteSigns = [
      /```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
      /\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
      /\[[\s\S]*(?<!\])$/, // 未闭合的数组
      /\.{3,}$/, // 结尾省略号
    ];

    return !incompleteSigns.some(pattern => pattern.test(str.trim()));
  }
}