From 2fcba3a0966f9dd0d3628b65ff7fa923e0cc89b6 Mon Sep 17 00:00:00 2001 From: xiongxiao Date: Wed, 31 Dec 2025 03:36:27 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=96=87=E6=A1=A3=EF=BC=8C?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=B0=8F=E7=BA=A2=E4=B9=A6=E6=A8=A1=E5=9D=97?= =?UTF-8?q?=E6=8F=8F=E8=BF=B0=EF=BC=9B=E9=87=8D=E6=9E=84=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E6=A8=A1=E5=BC=8F=EF=BC=8C=E5=A2=9E=E5=8A=A0=E7=AC=94?= =?UTF-8?q?=E8=AE=B0=E5=92=8C=E7=94=A8=E6=88=B7=E4=BF=A1=E6=81=AF=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=EF=BC=9B=E4=BC=98=E5=8C=96=E6=A0=B8=E5=BF=83=E9=80=BB?= =?UTF-8?q?=E8=BE=91=EF=BC=8C=E5=A2=9E=E5=8A=A0=E8=AE=B0=E5=BD=95=E8=B6=85?= =?UTF-8?q?=E6=97=B6=E5=A4=84=E7=90=86=EF=BC=9B=E6=9B=B4=E6=96=B0=E7=A4=BA?= =?UTF-8?q?=E4=BE=8B=E6=95=B0=E6=8D=AE=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/xhs/feed.json | 84 +++++++++++++++++++++++ readme.md | 13 +++- src/app.ts | 6 ++ src/db/schema.ts | 47 +++++++++++-- src/playwright/core.ts | 14 ++++ src/routes/xhs/index.ts | 2 +- src/routes/xhs/search-notes.ts | 42 +++++++----- typings/note.d.ts | 121 +++++++++++++++++++++++++++++++++ 8 files changed, 306 insertions(+), 23 deletions(-) create mode 100644 examples/xhs/feed.json diff --git a/examples/xhs/feed.json b/examples/xhs/feed.json new file mode 100644 index 0000000..4634b39 --- /dev/null +++ b/examples/xhs/feed.json @@ -0,0 +1,84 @@ +{ + "cursor_score": "", + "items": [ + { + "id": "692d2c3c000000000d035cfd", + "model_type": "note", + "note_card": { + "title": "怎么让自己一台设备控制另一台电脑快捷键?", + "user": { + "user_id": "6726cef4000000001c019303", + "nickname": "小熊猫呜呜呜", + "avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31nptpebimm605pp6prq734o33ikhhig", + "xsec_token": "ABgQHM5P-hKJbm3K-GuN6GGJGXQGIRQX2d4m2TmuGPW0Y=" + }, + "tag_list": [], + "at_user_list": [], + "time": 1764568124000, + "share_info": { + "un_share": false + }, + "note_id": "692d2c3c000000000d035cfd", + "type": "normal", + "image_list": [ + { + "file_id": "", + "width": 1440, + "info_list": [ + { + "image_scene": "WB_PRV", + "url": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3" + }, + { + "url": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3", + "image_scene": "WB_DFT" + } + ], + "url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3", + "height": 2400, + "url": "", + "trace_id": "", + "url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3", + "stream": {}, + "live_photo": false + }, + { + "url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3", + "file_id": "", + "url": "", + "url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3", + "info_list": [ + { + "image_scene": "WB_PRV", + "url": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3" + }, + { + "image_scene": "WB_DFT", + "url": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3" + } + ], + "stream": {}, + "live_photo": false, + "height": 2400, + "width": 1440, + "trace_id": "" + } + ], + "last_update_time": 1764568125000, + "ip_location": "浙江", + "desc": "", + "interact_info": { + "liked_count": "1", + "collected": false, + "collected_count": "1", + "comment_count": "0", + "share_count": "0", + "followed": false, + "relation": "none", + "liked": false + } + } + } + ], + "current_time": 1767116763109 +} \ No newline at end of file diff --git a/readme.md b/readme.md index 4d742d5..03d4aac 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,10 @@ # 浏览器自动化助手 +> 信息收集工具,过滤一些烂的资料,通过ai去过滤一些无用的信息,帮助用户更高效地收集和整理信息,提升工作和学习效率。 + 实现功能,浏览了页面,自动把想要的数据,存储到数据库中,方便后续分析和使用。 + ## 初始化 ```bash @@ -9,8 +12,14 @@ pnpm install pnpm run init ``` -## 启动studio +## 启动 studio + +studio 是 drizzle 提供的可视化数据库管理工具,可以用来查看和管理本地的 SQLite 数据库。 ```bash pnpm run studio -``` \ No newline at end of file +``` + +## 关于小红书模块 + +过滤自己想要的笔记,保存到本地数据库中,方便后续使用。 diff --git a/src/app.ts b/src/app.ts index e6cb14e..58ee629 100644 --- a/src/app.ts +++ b/src/app.ts @@ -58,6 +58,12 @@ export const core = useConfigKey('core', () => new Core({ console.error('解析搜索笔记响应失败:', error); } } + }, + { + /** + * 小红书笔记详情响应处理 + */ + path: 'https://edith.xiaohongshu.com/api/sns/web/v1/feed', } ] })); diff --git a/src/db/schema.ts b/src/db/schema.ts index ab93301..4649a80 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -1,5 +1,5 @@ import { sqliteTable, text, integer } from 'drizzle-orm/sqlite-core'; - +import { randomUUID } from 'node:crypto'; export const cache = sqliteTable('cache', { key: text('key').primaryKey(), value: text('value').notNull(), @@ -9,16 +9,55 @@ export const cache = sqliteTable('cache', { export const xhsNote = sqliteTable('xhs_note', { id: text('id').primaryKey(), - content: text('content').notNull(), + title: text('title'), + summary: text('summary'), description: text('description'), - tags: text('tags').notNull(), - noteUrl: text('note_url'), + link: text('link'), + data: text('data'), + tags: text('tags'), + status: text('status'), authorUrl: text('author_url'), cover: text('cover'), + syncStatus: integer('sync_status').notNull(), syncAt: integer('sync_at').notNull(), + + star: integer('star'), + + pushedAt: integer('pushed_at'), createdAt: integer('created_at').notNull(), updatedAt: integer('updated_at').notNull(), + deletedAt: integer('deleted_at'), +}); + +export const xhsUser = sqliteTable('xhs_user', { + id: text('id').primaryKey().$defaultFn(() => randomUUID()), + user_id: text('user_id').notNull(), + + username: text('username'), + nickname: text('nickname'), + avatar: text('avatar'), + + title: text('title'), + summary: text('summary'), + description: text('description'), + link: text('link'), + data: text('data'), + tags: text('tags'), + + followersCount: integer('followers_count'), + followingCount: integer('following_count'), + + status: text('status'), + + syncStatus: integer('sync_status').notNull(), + syncAt: integer('sync_at').notNull(), + + star: integer('star'), + + createdAt: integer('created_at').notNull(), + updatedAt: integer('updated_at').notNull(), + deletedAt: integer('deleted_at'), }); \ No newline at end of file diff --git a/src/playwright/core.ts b/src/playwright/core.ts index 7284b00..672416e 100644 --- a/src/playwright/core.ts +++ b/src/playwright/core.ts @@ -31,6 +31,7 @@ export class Core { emitter = new EventEmitter(); listeners: Listener[] = []; recordReady: boolean = false; + timer: NodeJS.Timeout | null = null; data: T | null = null; constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[] }) { if (opts?.debugPort) { @@ -126,6 +127,18 @@ export class Core { if (this.recordReady !== ready) { this.recordReady = ready; } + if (ready === true) { + this.timer && clearTimeout(this.timer); + const that = this; + this.timer = setTimeout(() => { + that.recordReady = false; + that.timer = null; + console.log('记录超时,已自动设置为未就绪状态'); + }, 5 * 60 * 1000); // 5分钟后自动设置为未就绪, 防止长时间占用资源 + } else { + this.timer && clearTimeout(this.timer); + this.timer = null; + } } async setData(data?: any) { if (!data) { @@ -162,6 +175,7 @@ export class Core { console.log('记录未就绪,跳过响应处理'); return } + console.log(`捕获到响应: ${url}`); try { const status = response.status(); const contentType = response.headers()['content-type'] || ''; diff --git a/src/routes/xhs/index.ts b/src/routes/xhs/index.ts index c12704d..ec14a9c 100644 --- a/src/routes/xhs/index.ts +++ b/src/routes/xhs/index.ts @@ -1 +1 @@ -import './search-notes.ts'; \ No newline at end of file +import './search-notes.ts'; diff --git a/src/routes/xhs/search-notes.ts b/src/routes/xhs/search-notes.ts index cced1a6..d078b33 100644 --- a/src/routes/xhs/search-notes.ts +++ b/src/routes/xhs/search-notes.ts @@ -156,7 +156,7 @@ app.route({ app.route({ path: 'xhs', key: 'save-search-notes', - description: '保存搜索笔记结果', + description: '保存搜索笔记结果, 浏览器自动化完成搜索后调用此接口保存结果。', middleware: ['auth'], metadata: { tags: ['小红书', '搜索', '保存'], @@ -164,6 +164,9 @@ app.route({ } }).define(async (ctx) => { const data = ctx.query!.data as XHS.SearchNote[]; + if (!data || !Array.isArray(data)) { + ctx.throw(400, '缺少有效的 data 参数'); + } try { const getNoteUrl = (note: XHS.SearchNote) => { const id = note.id; @@ -184,24 +187,31 @@ app.route({ return cover?.url_default || '' } const keyword = sessionCache.get('xhs-search-keyword'); - const notes = data.filter(note => note.model_type === 'note').map(note => ({ - id: note.id, - content: JSON.stringify(note), - description: keyword || '', - title: note.note_card?.display_title || '', - authorUrl: getUserUrl(note), - tags: '', - syncStatus: 0, - noteUrl: getNoteUrl(note), - cover: getCover(note), - syncAt: 0, - createdAt: Date.now(), - updatedAt: Date.now(), - })); + const notes = data.filter(note => note.model_type === 'note').map(note => { + const cornnerTag = note.note_card?.corner_tag_info; + const pushTime = cornnerTag?.find(tag => tag.type === 'publish_time')?.text || ''; + // 一天前 pushTime 包含 "前" + return { + id: note.id, + title: note.note_card?.display_title || '', + tags: '', + summary: '', + description: keyword || '', + link: getNoteUrl(note), + data: JSON.stringify(note), + cover: getCover(note), + authorUrl: getUserUrl(note), + syncStatus: 0, + // pushedAt: 0, + syncAt: 0, + createdAt: Date.now(), + updatedAt: Date.now(), + } + }); await db.insert(xhsNote).values(notes).onConflictDoUpdate({ target: xhsNote.id, set: { - content: sql`excluded.content`, + summary: sql`excluded.summary`, updatedAt: Date.now(), }, }).execute(); diff --git a/typings/note.d.ts b/typings/note.d.ts index ddffe01..54e00e7 100644 --- a/typings/note.d.ts +++ b/typings/note.d.ts @@ -103,4 +103,125 @@ declare namespace XHS { hasMore: boolean; items: T[]; } + +} + +declare namespace XHS { + /** 分享信息 */ + export interface ShareInfo { + /** 是否不可分享 */ + un_share: boolean; + } + + /** 标签 */ + export interface Tag { + /** 标签ID */ + id: string; + /** 标签名称 */ + name: string; + /** 标签类型(如topic话题) */ + type: 'topic' | string; + } + + /** 完整的笔记交互信息 */ + export interface FullInteractInfo { + /** 分享数 */ + share_count: string; + /** 是否已关注 */ + followed: boolean; + /** 关系(如none无) */ + relation: 'none' | 'following' | string; + /** 是否已点赞 */ + liked: boolean; + /** 点赞数 */ + liked_count: string; + /** 是否已收藏 */ + collected: boolean; + /** 收藏数 */ + collected_count: string; + /** 评论数 */ + comment_count: string; + } + + /** 完整的图片信息 */ + export interface FullImageInfo { + /** 图片场景(如WB_DFT默认、WB_PRV预览) */ + image_scene: string; + /** 图片URL */ + url: string; + } + + /** 完整的笔记图片 */ + export interface FullImage { + /** 图片宽度 */ + width: number; + /** 图片高度 */ + height: number; + /** 图片信息列表(不同场景的URL) */ + info_list: FullImageInfo[]; + /** 流信息 */ + stream: Record; + /** 是否Live Photo */ + live_photo: boolean; + /** 文件ID */ + file_id: string; + /** URL */ + url: string; + /** 追踪ID */ + trace_id: string; + /** 预览URL */ + url_pre: string; + /** 默认URL */ + url_default: string; + } + + /** 完整的笔记卡片 */ + export interface NoteCardDetail { + /** 时间 */ + time: number; + /** 分享信息 */ + share_info: ShareInfo; + /** 描述 */ + desc: string; + /** 用户信息 */ + user: NoteUser; + /** 标签列表 */ + tag_list: Tag[]; + /** 交互信息 */ + interact_info: FullInteractInfo; + /** 图片列表 */ + image_list: FullImage[]; + /** @用户列表 */ + at_user_list: unknown[]; + /** 最后更新时间 */ + last_update_time: number; + /** IP位置 */ + ip_location: string; + /** 笔记ID */ + note_id: string; + /** 类型(如normal普通) */ + type: 'normal' | 'video' | string; + /** 标题 */ + title: string; + } + + /** 笔记详情(Feed中的完整笔记) */ + export interface NoteDetail { + /** 笔记ID */ + id: string; + /** 模型类型(如note笔记) */ + model_type: 'note' | string; + /** 笔记卡片 */ + note_card: NoteCard; + } + + /** Feed响应 */ + export interface FeedResponse { + /** 游标分数 */ + cursor_score: string; + /** 笔记列表 */ + items: NoteDetail[]; + /** 当前时间 */ + current_time: number; + } } \ No newline at end of file