From 66e6370013cff70565015e9d9b4e69664857cd5a Mon Sep 17 00:00:00 2001 From: xiongxiao Date: Thu, 25 Dec 2025 15:29:55 +0800 Subject: [PATCH] update --- .gitignore | 6 +- bun.lock | 19 ---- package.json | 15 +++- pnpm-lock.yaml | 59 +++++++++++++ src/playwright/index.ts | 186 ++++++++++++++++++++++++++++++++++++++-- start-browser.ts | 35 ++++++++ typings/note.ts | 106 +++++++++++++++++++++++ 7 files changed, 397 insertions(+), 29 deletions(-) delete mode 100644 bun.lock create mode 100644 pnpm-lock.yaml create mode 100644 start-browser.ts create mode 100644 typings/note.ts diff --git a/.gitignore b/.gitignore index b512c09..4a2603f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -node_modules \ No newline at end of file +node_modules + +browser-context + +cache \ No newline at end of file diff --git a/bun.lock b/bun.lock deleted file mode 100644 index 3f9b619..0000000 --- a/bun.lock +++ /dev/null @@ -1,19 +0,0 @@ -{ - "lockfileVersion": 1, - "configVersion": 1, - "workspaces": { - "": { - "name": "xhs-helper", - "dependencies": { - "playwright": "^1.57.0", - }, - }, - }, - "packages": { - "fsevents": ["fsevents@2.3.2", "", { "os": "darwin" }, "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA=="], - - "playwright": ["playwright@1.57.0", "", { "dependencies": { "playwright-core": "1.57.0" }, "optionalDependencies": { "fsevents": "2.3.2" }, "bin": { "playwright": "cli.js" } }, "sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw=="], - - "playwright-core": ["playwright-core@1.57.0", "", { "bin": { "playwright-core": "cli.js" } }, "sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ=="], - } -} diff --git a/package.json b/package.json index a56c488..c31e2bd 100644 --- a/package.json +++ b/package.json @@ -3,15 +3,26 @@ "version": "0.0.1", "description": "", "main": "index.js", + "types": "typings/note.ts", "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" + "start": "tsx src/playwright/index.ts", + "init:base": "npx playwright install", + "browser": "pm2 start start-browser.ts --name xhs-helper-browser --interpreter=tsx" }, "keywords": [], + "files": [ + "typings", + "src", + "start-browser.ts" + ], "author": "abearxiong (https://www.xiongxiao.me)", "license": "MIT", "packageManager": "pnpm@10.26.0", "type": "module", "dependencies": { "playwright": "^1.57.0" + }, + "devDependencies": { + "@types/node": "^25.0.3" } -} +} \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml new file mode 100644 index 0000000..a210a90 --- /dev/null +++ b/pnpm-lock.yaml @@ -0,0 +1,59 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + playwright: + specifier: ^1.57.0 + version: 1.57.0 + devDependencies: + '@types/node': + specifier: ^25.0.3 + version: 25.0.3 + +packages: + + '@types/node@25.0.3': + resolution: {integrity: sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==} + + fsevents@2.3.2: + resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} + engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + os: [darwin] + + playwright-core@1.57.0: + resolution: {integrity: sha512-agTcKlMw/mjBWOnD6kFZttAAGHgi/Nw0CZ2o6JqWSbMlI219lAFLZZCyqByTsvVAJq5XA5H8cA6PrvBRpBWEuQ==} + engines: {node: '>=18'} + hasBin: true + + playwright@1.57.0: + resolution: {integrity: sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==} + engines: {node: '>=18'} + hasBin: true + + undici-types@7.16.0: + resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + +snapshots: + + '@types/node@25.0.3': + dependencies: + undici-types: 7.16.0 + + fsevents@2.3.2: + optional: true + + playwright-core@1.57.0: {} + + playwright@1.57.0: + dependencies: + playwright-core: 1.57.0 + optionalDependencies: + fsevents: 2.3.2 + + undici-types@7.16.0: {} diff --git a/src/playwright/index.ts b/src/playwright/index.ts index f885708..aa1e6c0 100644 --- a/src/playwright/index.ts +++ b/src/playwright/index.ts @@ -1,11 +1,183 @@ -import { chromium } from 'playwright'; +import { chromium, Page, BrowserContext } from 'playwright'; +import path from 'node:path'; +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import fs from 'node:fs'; +const execAsync = promisify(exec); +export const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); export const main = async () => { - const browser = await chromium.launch({ headless: true }); - const page = await browser.newPage(); - await page.goto('https://www.xiaohongshu.com'); - console.log(await page.title()); - await browser.close(); + const debugPort = 9223; + // 等待并检查端口是否监听 + for (let i = 0; i < 15; i++) { + await new Promise(resolve => setTimeout(resolve, 2000)); + + try { + // 检查端口是否被监听 + const { stdout } = await execAsync(`netstat -ano | findstr :${debugPort}`); + console.log(`端口 ${debugPort} 已在监听:\n${stdout}`); + + // 尝试连接 + const browser = await chromium.connectOverCDP(`http://127.0.0.1:${debugPort}`); + console.log('成功连接到 Chrome CDP!'); + + // 获取已有的 context + const context = browser.contexts()[0]; + + // 获取已有的页面或创建新页面 + const page = context.pages()[0] || await context.newPage(); + + // console.log('Navigating to xiaohongshu.com'); + // await page.goto('https://www.xiaohongshu.com/search_result?keyword=%25E5%25A4%259A%25E7%25BB%25B4%25E8%25A1%25A8%25E6%25A0%25BC&type=51'); + // console.log('当前页面标题:', await page.title()); + // 点击筛选按钮的示例 + // await page.click('text=筛选'); + + // PAIU + await listenFetchRequests(context); + await sleep(2000); + // 关闭浏览器连接,不关闭实际浏览器 + await hoverPickerExample(page, { keyword: '多维表格' }); + // 等待更长时间让请求有机会发生 + console.log('等待 API 请求...'); + await sleep(5000); + // 清理所有路由,避免 TargetClosedError + await context.unrouteAll({ behavior: 'ignoreErrors' }); + // 获取 + await browser.close(); + return; + } catch (error: any) { + console.log(`尝试 ${i + 1}/15: ${(error as Error).message.slice(0, 100)}`); + } + } + + throw new Error(`无法连接到 Chrome CDP,端口 ${debugPort} 可能未正确启动`); } -main(); \ No newline at end of file +main(); + +type HoverPickerOptions = { + keyword?: string; + pushTime?: '一天内' | '一周内' | '半年内'; + sort?: '综合' | '最新' | '最多点赞' | '最多评论'; + searchRange?: '不限' | '已看过' | '未看过' | '已关注'; + distance?: '不限' | '同城' | '附近'; + scrollTimes?: number; +} +const hoverPickerExample = async (page: Page, opts?: HoverPickerOptions) => { + const { pushTime = '一天内', sort = '最新', keyword = '', scrollTimes = 5 } = opts || {}; + if (keyword) { + // class为 input-box + const inputBox = await page.$('.input-box'); + const input = await inputBox?.$('input'); + if (input) { + //先获取当前内容,如果一样就不输入 + const currentValue = await input.inputValue(); + if (currentValue === keyword) { + console.log('关键词已存在,无需输入'); + } else { + await input.fill(keyword); + await input.press('Enter'); + console.log(`已输入关键词: ${keyword}`); + await sleep(3000); // 等待搜索结果加载 + } + } + } + // 查找筛选按钮并保持 hover 状态 + let filterButton = await page.$('text=筛选'); + if (filterButton) { + await filterButton.hover(); + console.log('鼠标悬停在筛选按钮上'); + // 在保持 hover 的情况下,等待筛选面板出现并点击元素 + const filterClassPanel = await page.$('.filter-panel'); + if (filterClassPanel) { + console.log('筛选面板已打开'); + + // 点击最新选项 + const latestOption = await filterClassPanel.$(`text=${sort}`); + if (latestOption) { + await latestOption.click(); + console.log(`已选择${sort}选项`); + } + + // 点击一周内选项 + const oneWeekOption = await filterClassPanel.$(`text=${pushTime}`); + if (oneWeekOption) { + await oneWeekOption.click(); + console.log(`已选择${pushTime}选项`); + } + if (opts?.distance && opts.distance !== '不限') { + const distanceOption = await filterClassPanel.$(`text=${opts.distance}`); + if (distanceOption) { + await distanceOption.click(); + console.log(`已选择${opts.distance}选项`); + } + } + if (opts?.searchRange && opts.searchRange !== '不限') { + const rangeOption = await filterClassPanel.$(`text=${opts.searchRange}`); + if (rangeOption) { + await rangeOption.click(); + console.log(`已选择${opts.searchRange}选项`); + } + } + // 点击收起按钮 + const shouquButton = await filterClassPanel.$('text=收起'); + if (shouquButton) { + await shouquButton.click(); + console.log('已点击收起按钮'); + } + } + } + // 将鼠标移到页面外,移除 hover 状态 + await page.mouse.move(0, 0); + console.log('已移除 hover 状态'); + // 自动滚动页面5次以触发更多请求 + for (let i = 0; i < scrollTimes; i++) { + await page.evaluate(() => { + window.scrollBy({ + top: window.innerHeight, + left: 0, + behavior: 'smooth' + }); + }); + console.log(`已滚动页面 ${i + 1} 次`); + // 判断是否滚动到底部 + const isBottom = await page.evaluate(() => { + return (window.innerHeight + window.scrollY) >= document.body.scrollHeight; + }); + if (isBottom) { + console.log('已到达页面底部,停止滚动'); + break; + } + await sleep(2000); // 等待2秒以加载新内容 + } +} + +const listenFetchRequests = async (context: BrowserContext) => { + // 监听访问 https://edith.xiaohongshu.com/api/sns/web/v1/search/notes + // 返回对应的结果 - 使用 context 级别的路由,刷新后不会丢失 + await context.route('https://edith.xiaohongshu.com/api/sns/web/v1/search/notes*', async (route) => { + const request = route.request(); + console.log('捕获到请求:', request.url()); + await route.continue(); + }); + + // 使用 response 事件来获取响应内容 + context.on('response', async (response) => { + const url = response.url(); + // 打印所有 edith.xiaohongshu.com 的 API 响应 + if (url.includes('edith.xiaohongshu.com/api/')) { + console.log('收到 API 响应:', url); + console.log('状态:', response.status()); + if (url.includes('search/notes')) { + try { + const responseBody = await response.text(); + fs.writeFileSync(path.join(process.cwd(), 'cache', Date.now().toString() + '.json'), responseBody, 'utf-8'); + } catch (e) { + console.log('无法读取响应内容'); + } + } + + } + }); +} \ No newline at end of file diff --git a/start-browser.ts b/start-browser.ts new file mode 100644 index 0000000..96ad733 --- /dev/null +++ b/start-browser.ts @@ -0,0 +1,35 @@ +import { chromium } from 'playwright'; +import { spawn } from 'node:child_process'; +import path from 'node:path'; + +export const main = async () => { + // Chrome 路径和配置 + const executablePath = 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'; + // 使用独立的用户数据目录,避免与 Chrome 冲突 + const userDataDir = path.join(process.cwd(), 'browser-context'); + const debugPort = 9223; + + console.log('启动 Chrome...'); + console.log(`端口: ${debugPort}`); + console.log(`用户数据目录: ${userDataDir}`); + // console.log('注意:需要手动登录账号和安装插件'); + + // 启动 Chrome(带远程调试端口) + const chromeProcess = spawn(executablePath, [ + `--remote-debugging-port=${debugPort}`, + `--user-data-dir=${userDataDir}`, + ], { + detached: false, + stdio: 'inherit', + }); + + chromeProcess.on('error', (err) => { + console.error('Chrome 启动失败:', err); + }); + + chromeProcess.on('exit', (code, signal) => { + console.log(`Chrome 进程退出,代码: ${code}, 信号: ${signal}`); + }); +} + +main(); \ No newline at end of file diff --git a/typings/note.ts b/typings/note.ts new file mode 100644 index 0000000..ba295ef --- /dev/null +++ b/typings/note.ts @@ -0,0 +1,106 @@ +export namespace XHS { + /** 笔记用户信息 */ + export interface NoteUser { + /** 昵称 */ + nick_name: string; + /** 头像URL */ + avatar: string; + /** 用户ID */ + user_id: string; + /** 昵称(与nick_name相同) */ + nickname: string; + /** 安全令牌 */ + xsec_token: string; + } + + /** 笔记交互信息 */ + export interface InteractInfo { + /** 评论数 */ + comment_count: string; + /** 分享数 */ + shared_count: string; + /** 是否已点赞 */ + liked: boolean; + /** 点赞数 */ + liked_count: string; + /** 是否已收藏 */ + collected: boolean; + /** 收藏数 */ + collected_count: string; + } + + /** 图片信息 */ + export interface ImageInfo { + /** 图片场景(如WB_DFT默认、WB_PRV预览) */ + image_scene: string; + /** 图片URL */ + url: string; + } + + /** 笔记图片 */ + export interface Image { + /** 图片宽度 */ + width: number; + /** 图片高度 */ + height: number; + /** 图片信息列表(不同场景的URL) */ + info_list: ImageInfo[]; + } + + /** 笔记封面 */ + export interface Cover { + /** 封面高度 */ + height: number; + /** 封面宽度 */ + width: number; + /** 默认URL */ + url_default: string; + /** 预览URL */ + url_pre: string; + } + + /** 角标信息 */ + export interface CornerTagInfo { + /** TODO: 角标类型(如publish_time发布时间) */ + type: 'publish_time' | string; + /** 角标文本 */ + text: string; + } + + /** 笔记卡片 */ + export interface NoteCard { + /** 显示标题 */ + display_title: string; + /** 用户信息 */ + user: NoteUser; + /** 交互信息 */ + interact_info: InteractInfo; + /** 封面 */ + cover: Cover; + /** 图片列表 */ + image_list: Image[]; + /** 角标信息列表 */ + corner_tag_info: CornerTagInfo[]; + /** TODO: 笔记类型(如normal普通) */ + type: 'normal' | 'video'; + } + + /** 笔记 */ + export interface Note { + /** 笔记ID */ + id: string; + /** 模型类型(如note笔记) */ + model_type: string; + /** 笔记卡片 */ + note_card: NoteCard; + /** 安全令牌 */ + xsec_token: string; + } +} + +export namespace XHS { + export interface ResultList { + hasMore: boolean; + items: T[]; + } +} \ No newline at end of file