更新文档,添加小红书模块描述;重构数据库模式,增加笔记和用户信息字段;优化核心逻辑,增加记录超时处理;更新示例数据。
This commit is contained in:
84
examples/xhs/feed.json
Normal file
84
examples/xhs/feed.json
Normal file
@@ -0,0 +1,84 @@
|
||||
{
|
||||
"cursor_score": "",
|
||||
"items": [
|
||||
{
|
||||
"id": "692d2c3c000000000d035cfd",
|
||||
"model_type": "note",
|
||||
"note_card": {
|
||||
"title": "怎么让自己一台设备控制另一台电脑快捷键?",
|
||||
"user": {
|
||||
"user_id": "6726cef4000000001c019303",
|
||||
"nickname": "小熊猫呜呜呜",
|
||||
"avatar": "https://sns-avatar-qc.xhscdn.com/avatar/1040g2jo31nptpebimm605pp6prq734o33ikhhig",
|
||||
"xsec_token": "ABgQHM5P-hKJbm3K-GuN6GGJGXQGIRQX2d4m2TmuGPW0Y="
|
||||
},
|
||||
"tag_list": [],
|
||||
"at_user_list": [],
|
||||
"time": 1764568124000,
|
||||
"share_info": {
|
||||
"un_share": false
|
||||
},
|
||||
"note_id": "692d2c3c000000000d035cfd",
|
||||
"type": "normal",
|
||||
"image_list": [
|
||||
{
|
||||
"file_id": "",
|
||||
"width": 1440,
|
||||
"info_list": [
|
||||
{
|
||||
"image_scene": "WB_PRV",
|
||||
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3"
|
||||
},
|
||||
{
|
||||
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
|
||||
"image_scene": "WB_DFT"
|
||||
}
|
||||
],
|
||||
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/01744c61858f30b8e306f79c3d17902d/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_prv_wlteh_webp_3",
|
||||
"height": 2400,
|
||||
"url": "",
|
||||
"trace_id": "",
|
||||
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/166c6ac26933a554f17aefd009037ee7/1040g00831phpsi4p3c6g5pp6prq734o3ghs6t8g!nd_dft_wlteh_webp_3",
|
||||
"stream": {},
|
||||
"live_photo": false
|
||||
},
|
||||
{
|
||||
"url_default": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3",
|
||||
"file_id": "",
|
||||
"url": "",
|
||||
"url_pre": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3",
|
||||
"info_list": [
|
||||
{
|
||||
"image_scene": "WB_PRV",
|
||||
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a26e9c3737fee7be0844121a6b83f897/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_prv_wlteh_webp_3"
|
||||
},
|
||||
{
|
||||
"image_scene": "WB_DFT",
|
||||
"url": "http://sns-webpic-qc.xhscdn.com/202512310146/a4c16164274aa5833d119d4b9d150da3/1040g00831phpsi4p3c605pp6prq734o3dmgotbo!nd_dft_wlteh_webp_3"
|
||||
}
|
||||
],
|
||||
"stream": {},
|
||||
"live_photo": false,
|
||||
"height": 2400,
|
||||
"width": 1440,
|
||||
"trace_id": ""
|
||||
}
|
||||
],
|
||||
"last_update_time": 1764568125000,
|
||||
"ip_location": "浙江",
|
||||
"desc": "",
|
||||
"interact_info": {
|
||||
"liked_count": "1",
|
||||
"collected": false,
|
||||
"collected_count": "1",
|
||||
"comment_count": "0",
|
||||
"share_count": "0",
|
||||
"followed": false,
|
||||
"relation": "none",
|
||||
"liked": false
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"current_time": 1767116763109
|
||||
}
|
||||
13
readme.md
13
readme.md
@@ -1,7 +1,10 @@
|
||||
# 浏览器自动化助手
|
||||
|
||||
> 信息收集工具,过滤一些烂的资料,通过ai去过滤一些无用的信息,帮助用户更高效地收集和整理信息,提升工作和学习效率。
|
||||
|
||||
实现功能,浏览了页面,自动把想要的数据,存储到数据库中,方便后续分析和使用。
|
||||
|
||||
|
||||
## 初始化
|
||||
|
||||
```bash
|
||||
@@ -9,8 +12,14 @@ pnpm install
|
||||
pnpm run init
|
||||
```
|
||||
|
||||
## 启动studio
|
||||
## 启动 studio
|
||||
|
||||
studio 是 drizzle 提供的可视化数据库管理工具,可以用来查看和管理本地的 SQLite 数据库。
|
||||
|
||||
```bash
|
||||
pnpm run studio
|
||||
```
|
||||
```
|
||||
|
||||
## 关于小红书模块
|
||||
|
||||
过滤自己想要的笔记,保存到本地数据库中,方便后续使用。
|
||||
|
||||
@@ -58,6 +58,12 @@ export const core = useConfigKey<Core>('core', () => new Core({
|
||||
console.error('解析搜索笔记响应失败:', error);
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
/**
|
||||
* 小红书笔记详情响应处理
|
||||
*/
|
||||
path: 'https://edith.xiaohongshu.com/api/sns/web/v1/feed',
|
||||
}
|
||||
]
|
||||
}));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { sqliteTable, text, integer } from 'drizzle-orm/sqlite-core';
|
||||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
export const cache = sqliteTable('cache', {
|
||||
key: text('key').primaryKey(),
|
||||
value: text('value').notNull(),
|
||||
@@ -9,16 +9,55 @@ export const cache = sqliteTable('cache', {
|
||||
|
||||
export const xhsNote = sqliteTable('xhs_note', {
|
||||
id: text('id').primaryKey(),
|
||||
content: text('content').notNull(),
|
||||
|
||||
title: text('title'),
|
||||
summary: text('summary'),
|
||||
description: text('description'),
|
||||
tags: text('tags').notNull(),
|
||||
noteUrl: text('note_url'),
|
||||
link: text('link'),
|
||||
data: text('data'),
|
||||
tags: text('tags'),
|
||||
|
||||
status: text('status'),
|
||||
authorUrl: text('author_url'),
|
||||
cover: text('cover'),
|
||||
|
||||
syncStatus: integer('sync_status').notNull(),
|
||||
syncAt: integer('sync_at').notNull(),
|
||||
|
||||
star: integer('star'),
|
||||
|
||||
pushedAt: integer('pushed_at'),
|
||||
createdAt: integer('created_at').notNull(),
|
||||
updatedAt: integer('updated_at').notNull(),
|
||||
deletedAt: integer('deleted_at'),
|
||||
});
|
||||
|
||||
export const xhsUser = sqliteTable('xhs_user', {
|
||||
id: text('id').primaryKey().$defaultFn(() => randomUUID()),
|
||||
user_id: text('user_id').notNull(),
|
||||
|
||||
username: text('username'),
|
||||
nickname: text('nickname'),
|
||||
avatar: text('avatar'),
|
||||
|
||||
title: text('title'),
|
||||
summary: text('summary'),
|
||||
description: text('description'),
|
||||
link: text('link'),
|
||||
data: text('data'),
|
||||
tags: text('tags'),
|
||||
|
||||
followersCount: integer('followers_count'),
|
||||
followingCount: integer('following_count'),
|
||||
|
||||
status: text('status'),
|
||||
|
||||
syncStatus: integer('sync_status').notNull(),
|
||||
syncAt: integer('sync_at').notNull(),
|
||||
|
||||
star: integer('star'),
|
||||
|
||||
createdAt: integer('created_at').notNull(),
|
||||
updatedAt: integer('updated_at').notNull(),
|
||||
deletedAt: integer('deleted_at'),
|
||||
});
|
||||
@@ -31,6 +31,7 @@ export class Core<T = {}> {
|
||||
emitter = new EventEmitter();
|
||||
listeners: Listener[] = [];
|
||||
recordReady: boolean = false;
|
||||
timer: NodeJS.Timeout | null = null;
|
||||
data: T | null = null;
|
||||
constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[] }) {
|
||||
if (opts?.debugPort) {
|
||||
@@ -126,6 +127,18 @@ export class Core<T = {}> {
|
||||
if (this.recordReady !== ready) {
|
||||
this.recordReady = ready;
|
||||
}
|
||||
if (ready === true) {
|
||||
this.timer && clearTimeout(this.timer);
|
||||
const that = this;
|
||||
this.timer = setTimeout(() => {
|
||||
that.recordReady = false;
|
||||
that.timer = null;
|
||||
console.log('记录超时,已自动设置为未就绪状态');
|
||||
}, 5 * 60 * 1000); // 5分钟后自动设置为未就绪, 防止长时间占用资源
|
||||
} else {
|
||||
this.timer && clearTimeout(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
}
|
||||
async setData(data?: any) {
|
||||
if (!data) {
|
||||
@@ -162,6 +175,7 @@ export class Core<T = {}> {
|
||||
console.log('记录未就绪,跳过响应处理');
|
||||
return
|
||||
}
|
||||
console.log(`捕获到响应: ${url}`);
|
||||
try {
|
||||
const status = response.status();
|
||||
const contentType = response.headers()['content-type'] || '';
|
||||
|
||||
@@ -1 +1 @@
|
||||
import './search-notes.ts';
|
||||
import './search-notes.ts';
|
||||
|
||||
@@ -156,7 +156,7 @@ app.route({
|
||||
app.route({
|
||||
path: 'xhs',
|
||||
key: 'save-search-notes',
|
||||
description: '保存搜索笔记结果',
|
||||
description: '保存搜索笔记结果, 浏览器自动化完成搜索后调用此接口保存结果。',
|
||||
middleware: ['auth'],
|
||||
metadata: {
|
||||
tags: ['小红书', '搜索', '保存'],
|
||||
@@ -164,6 +164,9 @@ app.route({
|
||||
}
|
||||
}).define(async (ctx) => {
|
||||
const data = ctx.query!.data as XHS.SearchNote[];
|
||||
if (!data || !Array.isArray(data)) {
|
||||
ctx.throw(400, '缺少有效的 data 参数');
|
||||
}
|
||||
try {
|
||||
const getNoteUrl = (note: XHS.SearchNote) => {
|
||||
const id = note.id;
|
||||
@@ -184,24 +187,31 @@ app.route({
|
||||
return cover?.url_default || ''
|
||||
}
|
||||
const keyword = sessionCache.get('xhs-search-keyword');
|
||||
const notes = data.filter(note => note.model_type === 'note').map(note => ({
|
||||
id: note.id,
|
||||
content: JSON.stringify(note),
|
||||
description: keyword || '',
|
||||
title: note.note_card?.display_title || '',
|
||||
authorUrl: getUserUrl(note),
|
||||
tags: '',
|
||||
syncStatus: 0,
|
||||
noteUrl: getNoteUrl(note),
|
||||
cover: getCover(note),
|
||||
syncAt: 0,
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now(),
|
||||
}));
|
||||
const notes = data.filter(note => note.model_type === 'note').map(note => {
|
||||
const cornnerTag = note.note_card?.corner_tag_info;
|
||||
const pushTime = cornnerTag?.find(tag => tag.type === 'publish_time')?.text || '';
|
||||
// 一天前 pushTime 包含 "前"
|
||||
return {
|
||||
id: note.id,
|
||||
title: note.note_card?.display_title || '',
|
||||
tags: '',
|
||||
summary: '',
|
||||
description: keyword || '',
|
||||
link: getNoteUrl(note),
|
||||
data: JSON.stringify(note),
|
||||
cover: getCover(note),
|
||||
authorUrl: getUserUrl(note),
|
||||
syncStatus: 0,
|
||||
// pushedAt: 0,
|
||||
syncAt: 0,
|
||||
createdAt: Date.now(),
|
||||
updatedAt: Date.now(),
|
||||
}
|
||||
});
|
||||
await db.insert(xhsNote).values(notes).onConflictDoUpdate({
|
||||
target: xhsNote.id,
|
||||
set: {
|
||||
content: sql`excluded.content`,
|
||||
summary: sql`excluded.summary`,
|
||||
updatedAt: Date.now(),
|
||||
},
|
||||
}).execute();
|
||||
|
||||
121
typings/note.d.ts
vendored
121
typings/note.d.ts
vendored
@@ -103,4 +103,125 @@ declare namespace XHS {
|
||||
hasMore: boolean;
|
||||
items: T[];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
declare namespace XHS {
|
||||
/** 分享信息 */
|
||||
export interface ShareInfo {
|
||||
/** 是否不可分享 */
|
||||
un_share: boolean;
|
||||
}
|
||||
|
||||
/** 标签 */
|
||||
export interface Tag {
|
||||
/** 标签ID */
|
||||
id: string;
|
||||
/** 标签名称 */
|
||||
name: string;
|
||||
/** 标签类型(如topic话题) */
|
||||
type: 'topic' | string;
|
||||
}
|
||||
|
||||
/** 完整的笔记交互信息 */
|
||||
export interface FullInteractInfo {
|
||||
/** 分享数 */
|
||||
share_count: string;
|
||||
/** 是否已关注 */
|
||||
followed: boolean;
|
||||
/** 关系(如none无) */
|
||||
relation: 'none' | 'following' | string;
|
||||
/** 是否已点赞 */
|
||||
liked: boolean;
|
||||
/** 点赞数 */
|
||||
liked_count: string;
|
||||
/** 是否已收藏 */
|
||||
collected: boolean;
|
||||
/** 收藏数 */
|
||||
collected_count: string;
|
||||
/** 评论数 */
|
||||
comment_count: string;
|
||||
}
|
||||
|
||||
/** 完整的图片信息 */
|
||||
export interface FullImageInfo {
|
||||
/** 图片场景(如WB_DFT默认、WB_PRV预览) */
|
||||
image_scene: string;
|
||||
/** 图片URL */
|
||||
url: string;
|
||||
}
|
||||
|
||||
/** 完整的笔记图片 */
|
||||
export interface FullImage {
|
||||
/** 图片宽度 */
|
||||
width: number;
|
||||
/** 图片高度 */
|
||||
height: number;
|
||||
/** 图片信息列表(不同场景的URL) */
|
||||
info_list: FullImageInfo[];
|
||||
/** 流信息 */
|
||||
stream: Record<string, unknown>;
|
||||
/** 是否Live Photo */
|
||||
live_photo: boolean;
|
||||
/** 文件ID */
|
||||
file_id: string;
|
||||
/** URL */
|
||||
url: string;
|
||||
/** 追踪ID */
|
||||
trace_id: string;
|
||||
/** 预览URL */
|
||||
url_pre: string;
|
||||
/** 默认URL */
|
||||
url_default: string;
|
||||
}
|
||||
|
||||
/** 完整的笔记卡片 */
|
||||
export interface NoteCardDetail {
|
||||
/** 时间 */
|
||||
time: number;
|
||||
/** 分享信息 */
|
||||
share_info: ShareInfo;
|
||||
/** 描述 */
|
||||
desc: string;
|
||||
/** 用户信息 */
|
||||
user: NoteUser;
|
||||
/** 标签列表 */
|
||||
tag_list: Tag[];
|
||||
/** 交互信息 */
|
||||
interact_info: FullInteractInfo;
|
||||
/** 图片列表 */
|
||||
image_list: FullImage[];
|
||||
/** @用户列表 */
|
||||
at_user_list: unknown[];
|
||||
/** 最后更新时间 */
|
||||
last_update_time: number;
|
||||
/** IP位置 */
|
||||
ip_location: string;
|
||||
/** 笔记ID */
|
||||
note_id: string;
|
||||
/** 类型(如normal普通) */
|
||||
type: 'normal' | 'video' | string;
|
||||
/** 标题 */
|
||||
title: string;
|
||||
}
|
||||
|
||||
/** 笔记详情(Feed中的完整笔记) */
|
||||
export interface NoteDetail {
|
||||
/** 笔记ID */
|
||||
id: string;
|
||||
/** 模型类型(如note笔记) */
|
||||
model_type: 'note' | string;
|
||||
/** 笔记卡片 */
|
||||
note_card: NoteCard;
|
||||
}
|
||||
|
||||
/** Feed响应 */
|
||||
export interface FeedResponse {
|
||||
/** 游标分数 */
|
||||
cursor_score: string;
|
||||
/** 笔记列表 */
|
||||
items: NoteDetail[];
|
||||
/** 当前时间 */
|
||||
current_time: number;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user