更新文档,添加小红书模块描述;重构数据库模式,增加笔记和用户信息字段;优化核心逻辑,增加记录超时处理;更新示例数据。
This commit is contained in:
84
examples/xhs/feed.json
Normal file
84
examples/xhs/feed.json
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
{
|
||||||
|
"cursor_score": "",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"id": "692d2c3c000000000d035cfd",
|
||||||
|
"model_type": "note",
|
||||||
|
"note_card": {
|
||||||
|
"title": "怎么让自己一台设备控制另一台电脑快捷键?",
|
||||||
|
"user": {
|
||||||
|
"user_id": "6726cef4000000001c019303",
|
||||||
|
"nickname": "小熊猫呜呜呜",
|
||||||
|
"avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31nptpebimm605pp6prq734o33ikhhig",
|
||||||
|
"xsec_token": "ABgQHM5P-hKJbm3K-GuN6GGJGXQGIRQX2d4m2TmuGPW0Y="
|
||||||
|
},
|
||||||
|
"tag_list": [],
|
||||||
|
"at_user_list": [],
|
||||||
|
"time": 1764568124000,
|
||||||
|
"share_info": {
|
||||||
|
"un_share": false
|
||||||
|
},
|
||||||
|
"note_id": "692d2c3c000000000d035cfd",
|
||||||
|
"type": "normal",
|
||||||
|
"image_list": [
|
||||||
|
{
|
||||||
|
"file_id": "",
|
||||||
|
"width": 1440,
|
||||||
|
"info_list": [
|
||||||
|
{
|
||||||
|
"image_scene": "WB_PRV",
|
||||||
|
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
|
||||||
|
"image_scene": "WB_DFT"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3",
|
||||||
|
"height": 2400,
|
||||||
|
"url": "",
|
||||||
|
"trace_id": "",
|
||||||
|
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
|
||||||
|
"stream": {},
|
||||||
|
"live_photo": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3",
|
||||||
|
"file_id": "",
|
||||||
|
"url": "",
|
||||||
|
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3",
|
||||||
|
"info_list": [
|
||||||
|
{
|
||||||
|
"image_scene": "WB_PRV",
|
||||||
|
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"image_scene": "WB_DFT",
|
||||||
|
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"stream": {},
|
||||||
|
"live_photo": false,
|
||||||
|
"height": 2400,
|
||||||
|
"width": 1440,
|
||||||
|
"trace_id": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"last_update_time": 1764568125000,
|
||||||
|
"ip_location": "浙江",
|
||||||
|
"desc": "",
|
||||||
|
"interact_info": {
|
||||||
|
"liked_count": "1",
|
||||||
|
"collected": false,
|
||||||
|
"collected_count": "1",
|
||||||
|
"comment_count": "0",
|
||||||
|
"share_count": "0",
|
||||||
|
"followed": false,
|
||||||
|
"relation": "none",
|
||||||
|
"liked": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"current_time": 1767116763109
|
||||||
|
}
|
||||||
@@ -1,7 +1,10 @@
|
|||||||
# 浏览器自动化助手
|
# 浏览器自动化助手
|
||||||
|
|
||||||
|
> 信息收集工具,过滤一些烂的资料,通过ai去过滤一些无用的信息,帮助用户更高效地收集和整理信息,提升工作和学习效率。
|
||||||
|
|
||||||
实现功能,浏览了页面,自动把想要的数据,存储到数据库中,方便后续分析和使用。
|
实现功能,浏览了页面,自动把想要的数据,存储到数据库中,方便后续分析和使用。
|
||||||
|
|
||||||
|
|
||||||
## 初始化
|
## 初始化
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -11,6 +14,12 @@ pnpm run init
|
|||||||
|
|
||||||
## 启动 studio
|
## 启动 studio
|
||||||
|
|
||||||
|
studio 是 drizzle 提供的可视化数据库管理工具,可以用来查看和管理本地的 SQLite 数据库。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pnpm run studio
|
pnpm run studio
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 关于小红书模块
|
||||||
|
|
||||||
|
过滤自己想要的笔记,保存到本地数据库中,方便后续使用。
|
||||||
|
|||||||
@@ -58,6 +58,12 @@ export const core = useConfigKey<Core>('core', () => new Core({
|
|||||||
console.error('解析搜索笔记响应失败:', error);
|
console.error('解析搜索笔记响应失败:', error);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* 小红书笔记详情响应处理
|
||||||
|
*/
|
||||||
|
path: 'https://edith.xiaohongshu.com/api/sns/web/v1/feed',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}));
|
}));
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import { sqliteTable, text, integer } from 'drizzle-orm/sqlite-core';
|
import { sqliteTable, text, integer } from 'drizzle-orm/sqlite-core';
|
||||||
|
import { randomUUID } from 'node:crypto';
|
||||||
export const cache = sqliteTable('cache', {
|
export const cache = sqliteTable('cache', {
|
||||||
key: text('key').primaryKey(),
|
key: text('key').primaryKey(),
|
||||||
value: text('value').notNull(),
|
value: text('value').notNull(),
|
||||||
@@ -9,16 +9,55 @@ export const cache = sqliteTable('cache', {
|
|||||||
|
|
||||||
export const xhsNote = sqliteTable('xhs_note', {
|
export const xhsNote = sqliteTable('xhs_note', {
|
||||||
id: text('id').primaryKey(),
|
id: text('id').primaryKey(),
|
||||||
content: text('content').notNull(),
|
|
||||||
title: text('title'),
|
title: text('title'),
|
||||||
|
summary: text('summary'),
|
||||||
description: text('description'),
|
description: text('description'),
|
||||||
tags: text('tags').notNull(),
|
link: text('link'),
|
||||||
noteUrl: text('note_url'),
|
data: text('data'),
|
||||||
|
tags: text('tags'),
|
||||||
|
|
||||||
status: text('status'),
|
status: text('status'),
|
||||||
authorUrl: text('author_url'),
|
authorUrl: text('author_url'),
|
||||||
cover: text('cover'),
|
cover: text('cover'),
|
||||||
|
|
||||||
syncStatus: integer('sync_status').notNull(),
|
syncStatus: integer('sync_status').notNull(),
|
||||||
syncAt: integer('sync_at').notNull(),
|
syncAt: integer('sync_at').notNull(),
|
||||||
|
|
||||||
|
star: integer('star'),
|
||||||
|
|
||||||
|
pushedAt: integer('pushed_at'),
|
||||||
createdAt: integer('created_at').notNull(),
|
createdAt: integer('created_at').notNull(),
|
||||||
updatedAt: integer('updated_at').notNull(),
|
updatedAt: integer('updated_at').notNull(),
|
||||||
|
deletedAt: integer('deleted_at'),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const xhsUser = sqliteTable('xhs_user', {
|
||||||
|
id: text('id').primaryKey().$defaultFn(() => randomUUID()),
|
||||||
|
user_id: text('user_id').notNull(),
|
||||||
|
|
||||||
|
username: text('username'),
|
||||||
|
nickname: text('nickname'),
|
||||||
|
avatar: text('avatar'),
|
||||||
|
|
||||||
|
title: text('title'),
|
||||||
|
summary: text('summary'),
|
||||||
|
description: text('description'),
|
||||||
|
link: text('link'),
|
||||||
|
data: text('data'),
|
||||||
|
tags: text('tags'),
|
||||||
|
|
||||||
|
followersCount: integer('followers_count'),
|
||||||
|
followingCount: integer('following_count'),
|
||||||
|
|
||||||
|
status: text('status'),
|
||||||
|
|
||||||
|
syncStatus: integer('sync_status').notNull(),
|
||||||
|
syncAt: integer('sync_at').notNull(),
|
||||||
|
|
||||||
|
star: integer('star'),
|
||||||
|
|
||||||
|
createdAt: integer('created_at').notNull(),
|
||||||
|
updatedAt: integer('updated_at').notNull(),
|
||||||
|
deletedAt: integer('deleted_at'),
|
||||||
});
|
});
|
||||||
@@ -31,6 +31,7 @@ export class Core<T = {}> {
|
|||||||
emitter = new EventEmitter();
|
emitter = new EventEmitter();
|
||||||
listeners: Listener[] = [];
|
listeners: Listener[] = [];
|
||||||
recordReady: boolean = false;
|
recordReady: boolean = false;
|
||||||
|
timer: NodeJS.Timeout | null = null;
|
||||||
data: T | null = null;
|
data: T | null = null;
|
||||||
constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[] }) {
|
constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[] }) {
|
||||||
if (opts?.debugPort) {
|
if (opts?.debugPort) {
|
||||||
@@ -126,6 +127,18 @@ export class Core<T = {}> {
|
|||||||
if (this.recordReady !== ready) {
|
if (this.recordReady !== ready) {
|
||||||
this.recordReady = ready;
|
this.recordReady = ready;
|
||||||
}
|
}
|
||||||
|
if (ready === true) {
|
||||||
|
this.timer && clearTimeout(this.timer);
|
||||||
|
const that = this;
|
||||||
|
this.timer = setTimeout(() => {
|
||||||
|
that.recordReady = false;
|
||||||
|
that.timer = null;
|
||||||
|
console.log('记录超时,已自动设置为未就绪状态');
|
||||||
|
}, 5 * 60 * 1000); // 5分钟后自动设置为未就绪, 防止长时间占用资源
|
||||||
|
} else {
|
||||||
|
this.timer && clearTimeout(this.timer);
|
||||||
|
this.timer = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
async setData(data?: any) {
|
async setData(data?: any) {
|
||||||
if (!data) {
|
if (!data) {
|
||||||
@@ -162,6 +175,7 @@ export class Core<T = {}> {
|
|||||||
console.log('记录未就绪,跳过响应处理');
|
console.log('记录未就绪,跳过响应处理');
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
console.log(`捕获到响应: ${url}`);
|
||||||
try {
|
try {
|
||||||
const status = response.status();
|
const status = response.status();
|
||||||
const contentType = response.headers()['content-type'] || '';
|
const contentType = response.headers()['content-type'] || '';
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ app.route({
|
|||||||
app.route({
|
app.route({
|
||||||
path: 'xhs',
|
path: 'xhs',
|
||||||
key: 'save-search-notes',
|
key: 'save-search-notes',
|
||||||
description: '保存搜索笔记结果',
|
description: '保存搜索笔记结果, 浏览器自动化完成搜索后调用此接口保存结果。',
|
||||||
middleware: ['auth'],
|
middleware: ['auth'],
|
||||||
metadata: {
|
metadata: {
|
||||||
tags: ['小红书', '搜索', '保存'],
|
tags: ['小红书', '搜索', '保存'],
|
||||||
@@ -164,6 +164,9 @@ app.route({
|
|||||||
}
|
}
|
||||||
}).define(async (ctx) => {
|
}).define(async (ctx) => {
|
||||||
const data = ctx.query!.data as XHS.SearchNote[];
|
const data = ctx.query!.data as XHS.SearchNote[];
|
||||||
|
if (!data || !Array.isArray(data)) {
|
||||||
|
ctx.throw(400, '缺少有效的 data 参数');
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const getNoteUrl = (note: XHS.SearchNote) => {
|
const getNoteUrl = (note: XHS.SearchNote) => {
|
||||||
const id = note.id;
|
const id = note.id;
|
||||||
@@ -184,24 +187,31 @@ app.route({
|
|||||||
return cover?.url_default || ''
|
return cover?.url_default || ''
|
||||||
}
|
}
|
||||||
const keyword = sessionCache.get('xhs-search-keyword');
|
const keyword = sessionCache.get('xhs-search-keyword');
|
||||||
const notes = data.filter(note => note.model_type === 'note').map(note => ({
|
const notes = data.filter(note => note.model_type === 'note').map(note => {
|
||||||
|
const cornnerTag = note.note_card?.corner_tag_info;
|
||||||
|
const pushTime = cornnerTag?.find(tag => tag.type === 'publish_time')?.text || '';
|
||||||
|
// 一天前 pushTime 包含 "前"
|
||||||
|
return {
|
||||||
id: note.id,
|
id: note.id,
|
||||||
content: JSON.stringify(note),
|
|
||||||
description: keyword || '',
|
|
||||||
title: note.note_card?.display_title || '',
|
title: note.note_card?.display_title || '',
|
||||||
authorUrl: getUserUrl(note),
|
|
||||||
tags: '',
|
tags: '',
|
||||||
syncStatus: 0,
|
summary: '',
|
||||||
noteUrl: getNoteUrl(note),
|
description: keyword || '',
|
||||||
|
link: getNoteUrl(note),
|
||||||
|
data: JSON.stringify(note),
|
||||||
cover: getCover(note),
|
cover: getCover(note),
|
||||||
|
authorUrl: getUserUrl(note),
|
||||||
|
syncStatus: 0,
|
||||||
|
// pushedAt: 0,
|
||||||
syncAt: 0,
|
syncAt: 0,
|
||||||
createdAt: Date.now(),
|
createdAt: Date.now(),
|
||||||
updatedAt: Date.now(),
|
updatedAt: Date.now(),
|
||||||
}));
|
}
|
||||||
|
});
|
||||||
await db.insert(xhsNote).values(notes).onConflictDoUpdate({
|
await db.insert(xhsNote).values(notes).onConflictDoUpdate({
|
||||||
target: xhsNote.id,
|
target: xhsNote.id,
|
||||||
set: {
|
set: {
|
||||||
content: sql`excluded.content`,
|
summary: sql`excluded.summary`,
|
||||||
updatedAt: Date.now(),
|
updatedAt: Date.now(),
|
||||||
},
|
},
|
||||||
}).execute();
|
}).execute();
|
||||||
|
|||||||
121
typings/note.d.ts
vendored
121
typings/note.d.ts
vendored
@@ -103,4 +103,125 @@ declare namespace XHS {
|
|||||||
hasMore: boolean;
|
hasMore: boolean;
|
||||||
items: T[];
|
items: T[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
declare namespace XHS {
|
||||||
|
/** 分享信息 */
|
||||||
|
export interface ShareInfo {
|
||||||
|
/** 是否不可分享 */
|
||||||
|
un_share: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 标签 */
|
||||||
|
export interface Tag {
|
||||||
|
/** 标签ID */
|
||||||
|
id: string;
|
||||||
|
/** 标签名称 */
|
||||||
|
name: string;
|
||||||
|
/** 标签类型(如topic话题) */
|
||||||
|
type: 'topic' | string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 完整的笔记交互信息 */
|
||||||
|
export interface FullInteractInfo {
|
||||||
|
/** 分享数 */
|
||||||
|
share_count: string;
|
||||||
|
/** 是否已关注 */
|
||||||
|
followed: boolean;
|
||||||
|
/** 关系(如none无) */
|
||||||
|
relation: 'none' | 'following' | string;
|
||||||
|
/** 是否已点赞 */
|
||||||
|
liked: boolean;
|
||||||
|
/** 点赞数 */
|
||||||
|
liked_count: string;
|
||||||
|
/** 是否已收藏 */
|
||||||
|
collected: boolean;
|
||||||
|
/** 收藏数 */
|
||||||
|
collected_count: string;
|
||||||
|
/** 评论数 */
|
||||||
|
comment_count: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 完整的图片信息 */
|
||||||
|
export interface FullImageInfo {
|
||||||
|
/** 图片场景(如WB_DFT默认、WB_PRV预览) */
|
||||||
|
image_scene: string;
|
||||||
|
/** 图片URL */
|
||||||
|
url: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 完整的笔记图片 */
|
||||||
|
export interface FullImage {
|
||||||
|
/** 图片宽度 */
|
||||||
|
width: number;
|
||||||
|
/** 图片高度 */
|
||||||
|
height: number;
|
||||||
|
/** 图片信息列表(不同场景的URL) */
|
||||||
|
info_list: FullImageInfo[];
|
||||||
|
/** 流信息 */
|
||||||
|
stream: Record<string, unknown>;
|
||||||
|
/** 是否Live Photo */
|
||||||
|
live_photo: boolean;
|
||||||
|
/** 文件ID */
|
||||||
|
file_id: string;
|
||||||
|
/** URL */
|
||||||
|
url: string;
|
||||||
|
/** 追踪ID */
|
||||||
|
trace_id: string;
|
||||||
|
/** 预览URL */
|
||||||
|
url_pre: string;
|
||||||
|
/** 默认URL */
|
||||||
|
url_default: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 完整的笔记卡片 */
|
||||||
|
export interface NoteCardDetail {
|
||||||
|
/** 时间 */
|
||||||
|
time: number;
|
||||||
|
/** 分享信息 */
|
||||||
|
share_info: ShareInfo;
|
||||||
|
/** 描述 */
|
||||||
|
desc: string;
|
||||||
|
/** 用户信息 */
|
||||||
|
user: NoteUser;
|
||||||
|
/** 标签列表 */
|
||||||
|
tag_list: Tag[];
|
||||||
|
/** 交互信息 */
|
||||||
|
interact_info: FullInteractInfo;
|
||||||
|
/** 图片列表 */
|
||||||
|
image_list: FullImage[];
|
||||||
|
/** @用户列表 */
|
||||||
|
at_user_list: unknown[];
|
||||||
|
/** 最后更新时间 */
|
||||||
|
last_update_time: number;
|
||||||
|
/** IP位置 */
|
||||||
|
ip_location: string;
|
||||||
|
/** 笔记ID */
|
||||||
|
note_id: string;
|
||||||
|
/** 类型(如normal普通) */
|
||||||
|
type: 'normal' | 'video' | string;
|
||||||
|
/** 标题 */
|
||||||
|
title: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** 笔记详情(Feed中的完整笔记) */
|
||||||
|
export interface NoteDetail {
|
||||||
|
/** 笔记ID */
|
||||||
|
id: string;
|
||||||
|
/** 模型类型(如note笔记) */
|
||||||
|
model_type: 'note' | string;
|
||||||
|
/** 笔记卡片 */
|
||||||
|
note_card: NoteCard;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Feed响应 */
|
||||||
|
export interface FeedResponse {
|
||||||
|
/** 游标分数 */
|
||||||
|
cursor_score: string;
|
||||||
|
/** 笔记列表 */
|
||||||
|
items: NoteDetail[];
|
||||||
|
/** 当前时间 */
|
||||||
|
current_time: number;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user