更新文档,添加小红书模块描述;重构数据库模式,增加笔记和用户信息字段;优化核心逻辑,增加记录超时处理;更新示例数据。

This commit is contained in:
2025-12-31 03:36:27 +08:00
parent 3bede583bf
commit 2fcba3a096
8 changed files with 306 additions and 23 deletions

84
examples/xhs/feed.json Normal file
View File

@@ -0,0 +1,84 @@
{
"cursor_score": "",
"items": [
{
"id": "692d2c3c000000000d035cfd",
"model_type": "note",
"note_card": {
"title": "怎么让自己一台设备控制另一台电脑快捷键?",
"user": {
"user_id": "6726cef4000000001c019303",
"nickname": "小熊猫呜呜呜",
"avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31nptpebimm605pp6prq734o33ikhhig",
"xsec_token": "ABgQHM5P-hKJbm3K-GuN6GGJGXQGIRQX2d4m2TmuGPW0Y="
},
"tag_list": [],
"at_user_list": [],
"time": 1764568124000,
"share_info": {
"un_share": false
},
"note_id": "692d2c3c000000000d035cfd",
"type": "normal",
"image_list": [
{
"file_id": "",
"width": 1440,
"info_list": [
{
"image_scene": "WB_PRV",
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3"
},
{
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
"image_scene": "WB_DFT"
}
],
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3",
"height": 2400,
"url": "",
"trace_id": "",
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
"stream": {},
"live_photo": false
},
{
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3",
"file_id": "",
"url": "",
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3",
"info_list": [
{
"image_scene": "WB_PRV",
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3"
},
{
"image_scene": "WB_DFT",
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3"
}
],
"stream": {},
"live_photo": false,
"height": 2400,
"width": 1440,
"trace_id": ""
}
],
"last_update_time": 1764568125000,
"ip_location": "浙江",
"desc": "",
"interact_info": {
"liked_count": "1",
"collected": false,
"collected_count": "1",
"comment_count": "0",
"share_count": "0",
"followed": false,
"relation": "none",
"liked": false
}
}
}
],
"current_time": 1767116763109
}

View File

@@ -1,7 +1,10 @@
# 浏览器自动化助手
> 信息收集工具,过滤一些烂的资料,通过ai去过滤一些无用的信息帮助用户更高效地收集和整理信息提升工作和学习效率。
实现功能,浏览了页面,自动把想要的数据,存储到数据库中,方便后续分析和使用。
## 初始化
```bash
@@ -11,6 +14,12 @@ pnpm run init
## 启动 studio
studio 是 drizzle 提供的可视化数据库管理工具,可以用来查看和管理本地的 SQLite 数据库。
```bash
pnpm run studio
```
## 关于小红书模块
过滤自己想要的笔记,保存到本地数据库中,方便后续使用。

View File

@@ -58,6 +58,12 @@ export const core = useConfigKey<Core>('core', () => new Core({
console.error('解析搜索笔记响应失败:', error);
}
}
},
{
/**
* 小红书笔记详情响应处理
*/
path: 'https://edith.xiaohongshu.com/api/sns/web/v1/feed',
}
]
}));

View File

@@ -1,5 +1,5 @@
import { sqliteTable, text, integer } from 'drizzle-orm/sqlite-core';
import { randomUUID } from 'node:crypto';
export const cache = sqliteTable('cache', {
key: text('key').primaryKey(),
value: text('value').notNull(),
@@ -9,16 +9,55 @@ export const cache = sqliteTable('cache', {
export const xhsNote = sqliteTable('xhs_note', {
id: text('id').primaryKey(),
content: text('content').notNull(),
title: text('title'),
summary: text('summary'),
description: text('description'),
tags: text('tags').notNull(),
noteUrl: text('note_url'),
link: text('link'),
data: text('data'),
tags: text('tags'),
status: text('status'),
authorUrl: text('author_url'),
cover: text('cover'),
syncStatus: integer('sync_status').notNull(),
syncAt: integer('sync_at').notNull(),
star: integer('star'),
pushedAt: integer('pushed_at'),
createdAt: integer('created_at').notNull(),
updatedAt: integer('updated_at').notNull(),
deletedAt: integer('deleted_at'),
});
export const xhsUser = sqliteTable('xhs_user', {
id: text('id').primaryKey().$defaultFn(() => randomUUID()),
user_id: text('user_id').notNull(),
username: text('username'),
nickname: text('nickname'),
avatar: text('avatar'),
title: text('title'),
summary: text('summary'),
description: text('description'),
link: text('link'),
data: text('data'),
tags: text('tags'),
followersCount: integer('followers_count'),
followingCount: integer('following_count'),
status: text('status'),
syncStatus: integer('sync_status').notNull(),
syncAt: integer('sync_at').notNull(),
star: integer('star'),
createdAt: integer('created_at').notNull(),
updatedAt: integer('updated_at').notNull(),
deletedAt: integer('deleted_at'),
});

View File

@@ -31,6 +31,7 @@ export class Core<T = {}> {
emitter = new EventEmitter();
listeners: Listener[] = [];
recordReady: boolean = false;
timer: NodeJS.Timeout | null = null;
data: T | null = null;
constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[] }) {
if (opts?.debugPort) {
@@ -126,6 +127,18 @@ export class Core<T = {}> {
if (this.recordReady !== ready) {
this.recordReady = ready;
}
if (ready === true) {
this.timer && clearTimeout(this.timer);
const that = this;
this.timer = setTimeout(() => {
that.recordReady = false;
that.timer = null;
console.log('记录超时,已自动设置为未就绪状态');
}, 5 * 60 * 1000); // 5分钟后自动设置为未就绪, 防止长时间占用资源
} else {
this.timer && clearTimeout(this.timer);
this.timer = null;
}
}
async setData(data?: any) {
if (!data) {
@@ -162,6 +175,7 @@ export class Core<T = {}> {
console.log('记录未就绪,跳过响应处理');
return
}
console.log(`捕获到响应: ${url}`);
try {
const status = response.status();
const contentType = response.headers()['content-type'] || '';

View File

@@ -156,7 +156,7 @@ app.route({
app.route({
path: 'xhs',
key: 'save-search-notes',
description: '保存搜索笔记结果',
description: '保存搜索笔记结果, 浏览器自动化完成搜索后调用此接口保存结果。',
middleware: ['auth'],
metadata: {
tags: ['小红书', '搜索', '保存'],
@@ -164,6 +164,9 @@ app.route({
}
}).define(async (ctx) => {
const data = ctx.query!.data as XHS.SearchNote[];
if (!data || !Array.isArray(data)) {
ctx.throw(400, '缺少有效的 data 参数');
}
try {
const getNoteUrl = (note: XHS.SearchNote) => {
const id = note.id;
@@ -184,24 +187,31 @@ app.route({
return cover?.url_default || ''
}
const keyword = sessionCache.get('xhs-search-keyword');
const notes = data.filter(note => note.model_type === 'note').map(note => ({
const notes = data.filter(note => note.model_type === 'note').map(note => {
const cornnerTag = note.note_card?.corner_tag_info;
const pushTime = cornnerTag?.find(tag => tag.type === 'publish_time')?.text || '';
// 一天前 pushTime 包含 "前"
return {
id: note.id,
content: JSON.stringify(note),
description: keyword || '',
title: note.note_card?.display_title || '',
authorUrl: getUserUrl(note),
tags: '',
syncStatus: 0,
noteUrl: getNoteUrl(note),
summary: '',
description: keyword || '',
link: getNoteUrl(note),
data: JSON.stringify(note),
cover: getCover(note),
authorUrl: getUserUrl(note),
syncStatus: 0,
// pushedAt: 0,
syncAt: 0,
createdAt: Date.now(),
updatedAt: Date.now(),
}));
}
});
await db.insert(xhsNote).values(notes).onConflictDoUpdate({
target: xhsNote.id,
set: {
content: sql`excluded.content`,
summary: sql`excluded.summary`,
updatedAt: Date.now(),
},
}).execute();

121
typings/note.d.ts vendored
View File

@@ -103,4 +103,125 @@ declare namespace XHS {
hasMore: boolean;
items: T[];
}
}
declare namespace XHS {
/** 分享信息 */
export interface ShareInfo {
/** 是否不可分享 */
un_share: boolean;
}
/** 标签 */
export interface Tag {
/** 标签ID */
id: string;
/** 标签名称 */
name: string;
/** 标签类型如topic话题 */
type: 'topic' | string;
}
/** 完整的笔记交互信息 */
export interface FullInteractInfo {
/** 分享数 */
share_count: string;
/** 是否已关注 */
followed: boolean;
/** 关系如none无 */
relation: 'none' | 'following' | string;
/** 是否已点赞 */
liked: boolean;
/** 点赞数 */
liked_count: string;
/** 是否已收藏 */
collected: boolean;
/** 收藏数 */
collected_count: string;
/** 评论数 */
comment_count: string;
}
/** 完整的图片信息 */
export interface FullImageInfo {
/** 图片场景如WB_DFT默认、WB_PRV预览 */
image_scene: string;
/** 图片URL */
url: string;
}
/** 完整的笔记图片 */
export interface FullImage {
/** 图片宽度 */
width: number;
/** 图片高度 */
height: number;
/** 图片信息列表不同场景的URL */
info_list: FullImageInfo[];
/** 流信息 */
stream: Record<string, unknown>;
/** 是否Live Photo */
live_photo: boolean;
/** 文件ID */
file_id: string;
/** URL */
url: string;
/** 追踪ID */
trace_id: string;
/** 预览URL */
url_pre: string;
/** 默认URL */
url_default: string;
}
/** 完整的笔记卡片 */
export interface NoteCardDetail {
/** 时间 */
time: number;
/** 分享信息 */
share_info: ShareInfo;
/** 描述 */
desc: string;
/** 用户信息 */
user: NoteUser;
/** 标签列表 */
tag_list: Tag[];
/** 交互信息 */
interact_info: FullInteractInfo;
/** 图片列表 */
image_list: FullImage[];
/** @用户列表 */
at_user_list: unknown[];
/** 最后更新时间 */
last_update_time: number;
/** IP位置 */
ip_location: string;
/** 笔记ID */
note_id: string;
/** 类型如normal普通 */
type: 'normal' | 'video' | string;
/** 标题 */
title: string;
}
/** 笔记详情Feed中的完整笔记 */
export interface NoteDetail {
/** 笔记ID */
id: string;
/** 模型类型如note笔记 */
model_type: 'note' | string;
/** 笔记卡片 */
note_card: NoteCard;
}
/** Feed响应 */
export interface FeedResponse {
/** 游标分数 */
cursor_score: string;
/** 笔记列表 */
items: NoteDetail[];
/** 当前时间 */
current_time: number;
}
}