feat: add some orhter provider
This commit is contained in:
192
src/provider/core/utils/index.ts
Normal file
192
src/provider/core/utils/index.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
export class AIUtils {
|
||||
/**
|
||||
* 从 Markdown 代码块中提取 JSON
|
||||
* @param str 包含 JSON 的字符串
|
||||
* @returns 解析后的对象或 null
|
||||
*/
|
||||
extractJsonFromMarkdown(str: string): any | null {
|
||||
// Try to extract JSON from ```json ... ```
|
||||
const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
|
||||
const match = str.match(jsonRegex);
|
||||
let jsonStr = match && match[1] ? match[1] : str;
|
||||
|
||||
try {
|
||||
return JSON.parse(jsonStr);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Markdown 代码块中提取代码
|
||||
* @param str Markdown 字符串
|
||||
* @param language 语言类型,不指定则返回所有代码块
|
||||
* @returns 提取的代码字符串或数组
|
||||
*/
|
||||
extractCodeFromMarkdown(str: string, language?: string): string | string[] | null {
|
||||
if (language) {
|
||||
const regex = new RegExp(`\`\`\`${language}\\s*([\\s\\S]*?)\\s*\`\`\``, 'g');
|
||||
const matches = str.match(regex);
|
||||
if (!matches) return null;
|
||||
return matches.map(m => m.replace(new RegExp(`\`\`\`${language}\\s*|\\s*\`\`\``, 'g'), '').trim());
|
||||
}
|
||||
|
||||
const regex = /```[\w]*\s*([\s\S]*?)\s*```/g;
|
||||
const matches = [...str.matchAll(regex)];
|
||||
if (matches.length === 0) return null;
|
||||
return matches.map(m => m[1].trim());
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理 AI 响应中的多余空白和格式
|
||||
* @param str 原始字符串
|
||||
* @returns 清理后的字符串
|
||||
*/
|
||||
cleanResponse(str: string): string {
|
||||
return str
|
||||
.trim()
|
||||
.replace(/\n{3,}/g, '\n\n') // 多个换行符替换为两个
|
||||
.replace(/[ \t]+$/gm, ''); // 删除行尾空格
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 AI 响应中提取标签
|
||||
* @param str 响应字符串
|
||||
* @returns 标签数组
|
||||
*/
|
||||
extractTags(str: string): string[] {
|
||||
const tagPatterns = [
|
||||
/#(\w+)/g, // #tag 格式
|
||||
/\[(\w+)\]/g, // [tag] 格式
|
||||
/tags?:\s*\[([^\]]+)\]/gi, // tags: [...] 格式
|
||||
];
|
||||
|
||||
const tags = new Set<string>();
|
||||
|
||||
for (const pattern of tagPatterns) {
|
||||
const matches = str.matchAll(pattern);
|
||||
for (const match of matches) {
|
||||
if (match[1]) {
|
||||
const extracted = match[1].split(/[,;]/).map(t => t.trim()).filter(Boolean);
|
||||
extracted.forEach(tag => tags.add(tag));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(tags);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从文本中提取 URL
|
||||
* @param str 文本字符串
|
||||
* @returns URL 数组
|
||||
*/
|
||||
extractUrls(str: string): string[] {
|
||||
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
||||
const matches = str.match(urlRegex);
|
||||
return matches || [];
|
||||
}
|
||||
|
||||
/**
|
||||
* 分割长文本为指定 token 数量的块
|
||||
* @param text 原始文本
|
||||
* @param maxTokens 每块最大 token 数(粗略估算:1 token ≈ 4 字符)
|
||||
* @returns 文本块数组
|
||||
*/
|
||||
chunkText(text: string, maxTokens: number = 1000): string[] {
|
||||
const chunkSize = maxTokens * 4; // 粗略估算
|
||||
const chunks: string[] = [];
|
||||
|
||||
// 按段落分割
|
||||
const paragraphs = text.split(/\n\n+/);
|
||||
let currentChunk = '';
|
||||
|
||||
for (const paragraph of paragraphs) {
|
||||
if ((currentChunk + paragraph).length > chunkSize && currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
currentChunk = paragraph;
|
||||
} else {
|
||||
currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
|
||||
}
|
||||
}
|
||||
|
||||
if (currentChunk) {
|
||||
chunks.push(currentChunk.trim());
|
||||
}
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* 移除 AI 响应中的思考过程(thinking 标签)
|
||||
* @param str 响应字符串
|
||||
* @returns 清理后的字符串
|
||||
*/
|
||||
removeThinkingTags(str: string): string {
|
||||
return str
|
||||
.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '')
|
||||
.replace(/\[thinking\][\s\S]*?\[\/thinking\]/gi, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* 转义特殊字符用于 AI 提示词
|
||||
* @param str 原始字符串
|
||||
* @returns 转义后的字符串
|
||||
*/
|
||||
escapeForPrompt(str: string): string {
|
||||
return str
|
||||
.replace(/\\/g, '\\\\')
|
||||
.replace(/`/g, '\\`')
|
||||
.replace(/\$/g, '\\$');
|
||||
}
|
||||
|
||||
/**
|
||||
* 统计文本的大致 token 数量
|
||||
* @param text 文本
|
||||
* @returns 估算的 token 数量
|
||||
*/
|
||||
estimateTokens(text: string): number {
|
||||
// 简单估算:中文约 1.5 字符/token,英文约 4 字符/token
|
||||
const chineseChars = (text.match(/[\u4e00-\u9fa5]/g) || []).length;
|
||||
const otherChars = text.length - chineseChars;
|
||||
return Math.ceil(chineseChars / 1.5 + otherChars / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* 从响应中提取结构化数据(key: value 格式)
|
||||
* @param str 响应字符串
|
||||
* @returns 键值对对象
|
||||
*/
|
||||
extractKeyValuePairs(str: string): Record<string, string> {
|
||||
const result: Record<string, string> = {};
|
||||
const lines = str.split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
const match = line.match(/^([^::]+)[::]\s*(.+)$/);
|
||||
if (match) {
|
||||
const key = match[1].trim();
|
||||
const value = match[2].trim();
|
||||
result[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证 AI 响应是否完整(检查截断)
|
||||
* @param str 响应字符串
|
||||
* @returns 是否完整
|
||||
*/
|
||||
isResponseComplete(str: string): boolean {
|
||||
const incompleteSigns = [
|
||||
/```[\w]*\s*[\s\S]*?(?<!```)$/, // 未闭合的代码块
|
||||
/\{[\s\S]*(?<!\})$/, // 未闭合的 JSON
|
||||
/\[[\s\S]*(?<!\])$/, // 未闭合的数组
|
||||
/\.{3,}$/, // 结尾省略号
|
||||
];
|
||||
|
||||
return !incompleteSigns.some(pattern => pattern.test(str.trim()));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user