From a06e12df5a10c507e1a27db7c80a367a4a2a4507 Mon Sep 17 00:00:00 2001 From: abearxiong Date: Thu, 25 Dec 2025 11:10:33 +0800 Subject: [PATCH] update --- package.json | 1 + packages/xhs/src/libs/client-base.js | 520 ++++++++++++++++++++++ packages/xhs/src/libs/utils/exception.js | 46 ++ packages/xhs/src/libs/utils/helper.js | 32 ++ packages/xhs/src/libs/xhs.ts | 24 +- packages/xhs/src/test/common.ts | 4 + packages/xhs/src/test/query/mention.ts | 1 - packages/xhs/src/test/query/query-keys.ts | 6 +- packages/xhs/tsconfig.json | 2 +- pnpm-lock.yaml | 6 +- 10 files changed, 622 insertions(+), 20 deletions(-) create mode 100644 packages/xhs/src/libs/client-base.js create mode 100644 packages/xhs/src/libs/utils/exception.js create mode 100644 packages/xhs/src/libs/utils/helper.js diff --git a/package.json b/package.json index 0913e52..2c79853 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "@kevisual/context": "^0.0.4", "@kevisual/router": "0.0.49", "@kevisual/use-config": "^1.0.21", + "axios": "^1.9.0", "cookie": "^1.1.1", "dayjs": "^1.11.19", "formidable": "^3.5.4", diff --git a/packages/xhs/src/libs/client-base.js b/packages/xhs/src/libs/client-base.js new file mode 100644 index 0000000..dc108e5 --- /dev/null +++ b/packages/xhs/src/libs/client-base.js @@ -0,0 +1,520 @@ +import qs from 'querystring'; + +import { getSearchId, SearchSortType, SearchNoteType } from './utils/helper.js'; +import { ErrorEnum, DataFetchError, IPBlockError, NeedVerifyError } from './utils/exception.js'; + +const camelToUnderscore = (key) => { + return key.replace(/([A-Z])/g, '_$1').toLowerCase(); +}; + +const transformJsonKeys = (jsonData) => { + const dataDict = typeof jsonData === 'string' ? JSON.parse(jsonData) : jsonData; + const dictNew = {}; + for (const [key, value] of Object.entries(dataDict)) { + const newKey = camelToUnderscore(key); + if (!value) { + dictNew[newKey] = value; + } else if (typeof value === 'object' && !Array.isArray(value)) { + dictNew[newKey] = transformJsonKeys(value); + } else if (Array.isArray(value)) { + dictNew[newKey] = value.map((item) => (item && typeof item === 'object' ? transformJsonKeys(item) : item)); + } else { + dictNew[newKey] = value; + } + } + return dictNew; +}; + +class XhsClient { + /** + * Constructor for XhsClient + * @param {Object} options - Configuration options + * @param {string} options.cookie - Cookie string for authentication + * @param {string} options.userAgent - User agent string for requests + * @param {number} options.timeout - Request timeout in milliseconds + * @param {string} options.proxies - Proxy settings + */ + constructor({ cookie = null, userAgent = null, timeout = 10000, proxies = null } = {}) { + this.proxies = proxies; + this.timeout = timeout; + this._host = 'https://edith.xiaohongshu.com'; + this._creatorHost = 'https://creator.xiaohongshu.com'; + this._customerHost = 'https://customer.xiaohongshu.com'; + this.home = 'https://www.xiaohongshu.com'; + this.userAgent = userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'; + this._cookie = cookie || ''; + this._headers = { + 'user-agent': this.userAgent, + 'Content-Type': 'application/json', + }; + if (cookie) { + this._headers.Cookie = cookie; + } + } + /** + * @params {*} args + */ + printResult(...args) { + // + } + // Getter for cookie + get cookie() { + return this._cookie; + } + + // Setter for cookie + set cookie(cookie) { + this._cookie = cookie; + this._headers.Cookie = cookie; + } + + // Getter for cookieDict + get cookieDict() { + const cookieStr = this._cookie; + return cookieStr ? qs.parse(cookieStr.replace(/; /g, '&')) : {}; + } + getAgentHeader(config) { + const headers = config?.headers || {}; + const newHeaders = { + 'user-agent': this.userAgent, + 'Content-Type': 'application/json', + ...headers, + }; + config.headers = newHeaders; + return config; + } + getCookieMap() { + const cookie = this.cookie; + let cookieDict = {}; + if (cookie) { + const cookieArray = cookie.split(';'); + cookieArray.forEach((item) => { + const [key, value] = item.split('='); + const trimKey = key.trim(); + if (trimKey) { + const _value = value ? value.trim() : ''; + cookieDict[trimKey] = _value; + } + }); + return cookieDict; + } + return {}; + } + /** + * + * @param {*} data + */ + setCookieMap(data = {}) { + const cookieDict = this.getCookieMap(); + const newCookieDict = { ...cookieDict, ...data }; + const cookieStr = Object.entries(newCookieDict) + .map(([key, value]) => { + const trimmedKey = key.trim(); + const trimmedValue = value ? value.trim() : ''; + return `${trimmedKey}=${trimmedValue}`; + }) + .join('; '); + this._cookie = cookieStr; + this._headers.Cookie = cookieStr; + this.cookie = cookieStr; + } + + async request(method, url, config = {}) { + try { + delete config.sign; + const headers = { ...this._headers, ...(config.headers || {}) }; + + const fetchOptions = { + method: method, + headers: headers, + }; + + if (config.data) { + fetchOptions.body = typeof config.data === 'string' ? config.data : JSON.stringify(config.data); + } + + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.timeout); + fetchOptions.signal = controller.signal; + + this.printResult('request', { method, url, config }); + + const response = await fetch(url, fetchOptions); + clearTimeout(timeoutId); + + if (response.status === 471 || response.status === 461) { + const verifyType = response.headers.get('verifytype'); + const verifyUuid = response.headers.get('verifyuuid'); + throw new NeedVerifyError(`出现验证码,请求失败,Verifytype: ${verifyType},Verifyuuid: ${verifyUuid}`, response, verifyType, verifyUuid); + } + + let data; + const contentType = response.headers.get('content-type'); + if (contentType && contentType.includes('application/json')) { + data = await response.json(); + } else { + data = await response.text(); + } + + this.printResult('response', { + url: url, + response, + }); + + if (!response.ok) { + return { + code: response.status, + status: response.status, + msg: '请求失败', + data: null, + }; + } + + return data; + } catch (error) { + if (error.name === 'AbortError') { + return { + code: 408, + msg: '请求超时', + data: null, + }; + } + if (error instanceof NeedVerifyError) { + throw error; + } + console.error('Error in request:', error); + return { + code: 500, + msg: '请求失败', + data: null, + }; + } + } + /** + * GET的请求 data必须为null + * @param {*} uri + * @param {*} data + * @param {*} config + * @returns + */ + async requestSign(uri, data = null, config = {}, method = 'GET') { + const needSign = config.needSign ?? true; + if (needSign && config.sign) { + await config.sign(uri, data, config, method); + } + delete config.sign; + delete config.needSign; + + return config; + } + getEndpoint(config) { + let endpoint = this._host; + let isCreator = config?.isCreator ?? false; + let isCustomer = config?.isCustomer ?? false; + if (isCustomer) { + endpoint = this._customerHost; + } else if (isCreator) { + endpoint = this._creatorHost; + } + delete config.isCreator; + delete config.isCustomer; + + return { endpoint, isCreator, isCustomer }; + } + /** + * + * @param {*} uri + * @param {*} params + * @param {Object} config + * @param {*} [config.sign] - Whether to sign the request + * @param {boolean} [config.isCreator] - Whether the request is for a creator + * @param {boolean} [config.isCustomer] - Whether the request is for a customer + * @param {boolean} [config.needSign] - 是否需要 sign + * @param {*} [config.headers] - XSEC token for authentication + * @returns + */ + async get(uri, params = null, config = {}) { + if (params) { + uri = `${uri}?${qs.stringify(params)}`; + } + this.printResult('get', { uri, params, config }); + const endpoint = this.getEndpoint(config).endpoint; + config = await this.requestSign(uri, null, config, 'GET'); + + return this.request('GET', `${endpoint}${uri}`, config); + } + /** + * + * @param {*} uri + * @param {*} data + * @param {Object} config + * @param {*} [config.sign] - Whether to sign the request + * @param {boolean} [config.isCreator] - Whether the request is for a creator + * @param {boolean} [config.isCustomer] - Whether the request is for a customer + * @param {boolean} [config.needSign] - 是否需要 sign + * @param {*} [config.headers] - XSEC token for authentication + * @returns + */ + async post(uri, data = null, config = {}) { + const endpoint = this.getEndpoint(config).endpoint; + config = await this.requestSign(uri, data, config, 'POST'); + this.printResult('post', { uri, data, config }); + return this.request('POST', `${endpoint}${uri}`, { + ...config, + data: data, + }); + } + + /** + * 获取笔记详情 + * 注意: 需要xsec_token + * @uri /api/sns/web/v1/feed + * @param {string} noteId + * @returns + */ + async getNoteById(noteId, xsecToken, xsecSource = 'pc_feed', config = {}) { + if (!xsecToken) { + throw new Error('xsecToken is required'); + } + const data = { + source_note_id: noteId, + image_scenes: ['CRD_WM_WEBP'], + xsec_token: xsecToken, + xsec_source: xsecSource, + }; + const uri = '/api/sns/web/v1/feed'; + + try { + const res = await this.post(uri, data, config); + return res.items[0].note_card; + } catch (error) { + console.error('Error fetching note:', error); + throw error; + } + } + /** + * 获取笔记详情 + * @uri /api/sns/web/v1/feed + * @param {string} noteId + * @param {string} xsecToken + * @param {string} [xsecSource=pc_feed] + * @returns + */ + async getNoteByIdFromHtml(noteId, xsecToken, xsecSource = 'pc_feed') { + const url = `https://www.xiaohongshu.com/explore/${noteId}?xsec_token=${xsecToken}&xsec_source=${xsecSource}`; + this.printResult('html', { url, noteId, xsecToken, xsecSource }); + let html = ''; + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.timeout); + + const response = await fetch(url, { + headers: { + 'user-agent': this.userAgent, + referer: 'https://www.xiaohongshu.com/', + ...this._headers, + }, + signal: controller.signal, + }); + clearTimeout(timeoutId); + + html = await response.text(); + const stateMatch = html.match(/window.__INITIAL_STATE__=({.*})<\/script>/); + + if (stateMatch) { + const state = stateMatch[1].replace(/undefined/g, '""'); + if (state !== '{}') { + const noteDict = transformJsonKeys(JSON.parse(state)); + return { code: 0, data: noteDict.note.note_detail_map[noteId].note }; + } + } + + if (html.includes(ErrorEnum.IP_BLOCK.value)) { + throw new IPBlockError(ErrorEnum.IP_BLOCK.value); + } + + throw new DataFetchError(html); + } catch (error) { + console.error('Error fetching note:', error); + return { + code: 500, + msg: '请求失败', + error: error.message, + data: null, + }; + } + } + /** + * 获取用户信息 + * @uri /api/sns/web/v1/user/selfinfo + * @returns + */ + async getSelfInfo() { + const uri = '/api/sns/web/v1/user/selfinfo'; + return this.get(uri); + } + /** + * @uri /api/sns/web/v2/user/me + * @returns + */ + async getSelfInfoV2() { + const uri = '/api/sns/web/v2/user/me'; + return this.get(uri); + } + /** + * 获取用户信息 + * @uri /api/sns/web/v1/user/otherinfo + * @param {string} userId + * @returns + */ + async getUserInfo(userId) { + const uri = '/api/sns/web/v1/user/otherinfo'; + const params = { + target_user_id: userId, + }; + return this.get(uri, params); + } + + /** + * + * @uri /api/sns/web/v1/search/notes + * @param {string} keyword 关键词 + * @param {number} page 页码 + * @param {number} pageSize 分页查询的数量 + * @param {string} sort 搜索的类型,分为: general, popularity_descending, time_descending + * @param {number} noteType 笔记类型 + * @returns + */ + async getNoteByKeyword(keyword, page = 1, pageSize = 20, sort = SearchSortType.GENERAL, noteType = SearchNoteType.ALL) { + const uri = '/api/sns/web/v1/search/notes'; + const data = { + keyword: keyword, + page: page, + page_size: pageSize, + search_id: getSearchId(), + sort: sort.value, + note_type: noteType.value, + image_formats: ['jpg', 'webp', 'avif'], + ext_flags: [], + }; + + return this.post(uri, data); + } + + /** + * 获取笔记评论 + * @uri /api/sns/web/v2/comment/page + * @param {string} noteId 笔记id + * @param {string} cursor 分页查询的下标,默认为"" + * @param {Object} params 其他参数 + * @returns + */ + async getNoteComments(noteId, cursor = '', otherParams = {}) { + const uri = '/api/sns/web/v2/comment/page'; + const params = { + note_id: noteId, + cursor: cursor, + image_formats: 'jpg,webp,avif', + ...otherParams, + }; + return this.get(uri, params); + } + + /** + * 获取用户笔记 + * @uri /api/sns/web/v1/user_posted + * @param {*} userId + * @param {*} cursor + * @returns + */ + async getUserNotes(userId, cursor = '') { + const uri = '/api/sns/web/v1/user_posted'; + const params = { + cursor: cursor, + num: 30, + user_id: userId, + image_scenes: 'FD_WM_WEBP', + }; + return this.get(uri, params); + } + + /** + * 获取账号@我通知 + * @uri /api/sns/web/v1/you/mentions + * @param {*} num + * @param {*} cursor + * @returns + */ + async getMentionNotifications(num = 20, cursor = '') { + const uri = '/api/sns/web/v1/you/mentions'; + const params = { num: num, cursor: cursor }; + return this.get(uri, params); + } + + /** + * 获取点赞通知 + * @uri /api/sns/web/v1/you/likes + * @param {*} num + * @param {*} cursor + * @returns + */ + async getLikeNotifications(num = 20, cursor = '') { + const uri = '/api/sns/web/v1/you/likes'; + const params = { num: num, cursor: cursor }; + return this.get(uri, params); + } + + /** + * 获取关注通知 + * @uri /api/sns/web/v1/you/connections + * @param {*} num + * @param {*} cursor + * @returns + */ + async getFollowNotifications(num = 20, cursor = '', config = {}) { + const uri = '/api/sns/web/v1/you/connections'; + const params = { num: num, cursor: cursor }; + return this.get(uri, params, config); + } + /** + * 获取用户信息 + * @uri /user/profile/{userId} + * @description 通过用户ID获取用户信息 + * @param {string} userId + * @returns + */ + async getUserInfoFromHtml(userId) { + const url = `https://www.xiaohongshu.com/user/profile/${userId}`; + try { + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), this.timeout); + + const response = await fetch(url, { + headers: { + 'user-agent': this.userAgent, + referer: 'https://www.xiaohongshu.com/', + ...this._headers, + }, + signal: controller.signal, + }); + clearTimeout(timeoutId); + + const html = await response.text(); + const stateMatch = html.match(/window.__INITIAL_STATE__=({.*})<\/script>/); + if (stateMatch) { + const state = stateMatch[1].replace(/"undefined"/g, '"_"').replace(/\bundefined\b/g, '""'); + if (state !== '{}') { + const parsedState = JSON.parse(state); + const userBasicInfo = transformJsonKeys(parsedState).user.user_page_data.basic_info; + return userBasicInfo; + } + } + return html; + } catch (error) { + console.error('Error fetching user info:', error); + throw error; + } + } +} + +export { XhsClient }; diff --git a/packages/xhs/src/libs/utils/exception.js b/packages/xhs/src/libs/utils/exception.js new file mode 100644 index 0000000..92a3260 --- /dev/null +++ b/packages/xhs/src/libs/utils/exception.js @@ -0,0 +1,46 @@ +// ErrorTuple and ErrorEnum +const ErrorEnum = { + IP_BLOCK: { code: 300012, msg: '网络连接异常,请检查网络设置或重启试试' }, + NOTE_CANT_GET: { code: 300031, msg: '当前笔记暂时无法浏览' }, + NOTE_ABNORMAL: { code: -510001, msg: '笔记状态异常,请稍后查看' }, + NOTE_SECRETE_FAULT: { code: -510001, msg: '当前内容无法展示' }, + SIGN_FAULT: { code: 300015, msg: '浏览器异常,请尝试关闭/卸载风险插件或重启试试!' }, + SESSION_EXPIRED: { code: -100, msg: '登录已过期' }, +}; + +// Custom error classes +class DataFetchError extends Error { + constructor(message) { + super(message); + this.message = message; + this.name = 'DataFetchError'; + } +} + +class IPBlockError extends Error { + constructor(message) { + super(message); + this.message = message; + this.name = 'IPBlockError'; + } +} + +class SignError extends Error { + constructor(message) { + super(message); + this.message = message; + this.name = 'SignError'; + } +} + +class NeedVerifyError extends Error { + constructor(message, verifyType = null, verifyUuid = null) { + super(message); + this.message = message; + this.name = 'NeedVerifyError'; + this.verifyType = verifyType; + this.verifyUuid = verifyUuid; + } +} + +export { ErrorEnum, DataFetchError, IPBlockError, SignError, NeedVerifyError }; diff --git a/packages/xhs/src/libs/utils/helper.js b/packages/xhs/src/libs/utils/helper.js new file mode 100644 index 0000000..7f4971b --- /dev/null +++ b/packages/xhs/src/libs/utils/helper.js @@ -0,0 +1,32 @@ +function getSearchId() { + const e = BigInt(Date.now()) << 64n; + const t = Math.floor(Math.random() * 2147483647); + return base36encode(e + BigInt(t)); +} + +function base36encode(num) { + return num.toString(36).toUpperCase(); +} +const SearchSortType = Object.freeze({ + // default + GENERAL: { value: 'general' }, + // most popular + MOST_POPULAR: { value: 'popularity_descending' }, + // Latest + LATEST: { value: 'time_descending' }, +}); + +const SearchNoteType = Object.freeze({ + // default + ALL: { value: 0 }, + // only video + VIDEO: { value: 1 }, + // only image + IMAGE: { value: 2 }, +}); + +export { + getSearchId, + SearchSortType, + SearchNoteType, +}; diff --git a/packages/xhs/src/libs/xhs.ts b/packages/xhs/src/libs/xhs.ts index f170a29..798c7d1 100644 --- a/packages/xhs/src/libs/xhs.ts +++ b/packages/xhs/src/libs/xhs.ts @@ -1,7 +1,7 @@ import { getApiInfo } from './xhs-api/api.ts'; -import { XhsClient as XhsClientBase } from '@kevisual/xhs-core'; +import { XhsClient as XhsClientBase } from './client-base.js'; import { Mention, CommonentInfo, ResponseMession } from './xhs-type/mention.ts'; -import { method, pick } from 'lodash-es'; +import { pick } from 'lodash-es'; import { getNote } from './modules/get-note.ts'; export type Result = { code: number; // 0: success @@ -32,14 +32,12 @@ type SignOptions = { }; export const getSign = async (signInfo: SignInfo, options?: SignOptions): Promise => { const { uri, data, a1, method } = signInfo; - // let signUri = new URL(uri, 'http://light.xiongxiao.me:5006').pathname; - // signUri = '/api/sns/web/v2/user/me'; try { let signUrl = options?.signUrl || 'http://localhost:5005/sign'; // signUrl = 'http://localhost:5005/sign'; // const urlA1 = ''http://light.xiongxiao.me:5006/a1'; // const urlA1 = 'http://localhost:5005/a1'; - // console.log('sign', signUrl); + console.log('sign', signUrl, signInfo); const signs = await fetch(signUrl, { method: 'POST', headers: { @@ -49,7 +47,7 @@ export const getSign = async (signInfo: SignInfo, options?: SignOptions): Promis uri: uri, data, a1, - method: signInfo.method || 'POST', + method: method ?? 'POST', }), }).then((res) => res.json()); return signs as SignResponse; @@ -88,13 +86,16 @@ export class XhsClient extends XhsClientBase { } switch (msg) { case 'get': - // console.log('get', data); + console.log('get==>', data); break; case 'sign': - // console.log('sign', data); + console.log('sign==>', data); break; case 'post': - console.log('post', data); + console.log('post==>', data); + break; + case 'error': + console.log('error==>', data); break; default: break; @@ -182,7 +183,7 @@ export class XhsClient extends XhsClientBase { ); return response; } - async sign(uri: string, data: any, config: any) { + async sign(uri: string, data: any, config: any, method?: 'GET' | 'POST') { let headers = config?.headers || {}; const cookieDist = this.getCookieMap(); const apiInfo = this.getApiInfo(uri); @@ -190,8 +191,7 @@ export class XhsClient extends XhsClientBase { return config || {}; } const a1 = cookieDist['a1']; - const res = await getSign({ uri, data, a1, method: 'POST' }, this.signConfig); - console.log('sign response', res); + const res = await getSign({ uri, data, a1, method }, this.signConfig); const _sign = res.sign this.printResult('sign', { uri, apiInfo, res }); const xs = _sign?.['x-s']; diff --git a/packages/xhs/src/test/common.ts b/packages/xhs/src/test/common.ts index 15c0f8a..01ff612 100644 --- a/packages/xhs/src/test/common.ts +++ b/packages/xhs/src/test/common.ts @@ -4,4 +4,8 @@ export const config = useConfig(); import { program } from 'commander'; xhsRootClient.setCookie(config.XHS_ROOT_COOKIE || ''); +xhsServices.setSignConfig({ + signUrl: config.XHS_API_SIGN_URL, +}); + export { program, xhsServices, app }; diff --git a/packages/xhs/src/test/query/mention.ts b/packages/xhs/src/test/query/mention.ts index 2803b97..4238fac 100644 --- a/packages/xhs/src/test/query/mention.ts +++ b/packages/xhs/src/test/query/mention.ts @@ -1,6 +1,5 @@ import { xhsServices, program, app } from '../common.ts'; import util from 'node:util'; -import { omit } from 'lodash-es'; const getMentions = async () => { try { diff --git a/packages/xhs/src/test/query/query-keys.ts b/packages/xhs/src/test/query/query-keys.ts index 3da8da6..967858b 100644 --- a/packages/xhs/src/test/query/query-keys.ts +++ b/packages/xhs/src/test/query/query-keys.ts @@ -5,9 +5,9 @@ import util from 'node:util'; const getNoteByKeyword = async (keyword: string) => { const client = xhsServices.getClient(); - xhsServices.setSignConfig({ - signUrl: config.XHS_API_SIGN_URL, - }); + // xhsServices.setSignConfig({ + // signUrl: config.XHS_API_SIGN_URL, + // }); const res = await client.getNoteByKeyword(keyword).then((res) => { console.log(util.inspect(res, { depth: null })); return res; diff --git a/packages/xhs/tsconfig.json b/packages/xhs/tsconfig.json index 70f615a..463d887 100644 --- a/packages/xhs/tsconfig.json +++ b/packages/xhs/tsconfig.json @@ -12,7 +12,7 @@ } }, "include": [ - "src/**/*.ts", + "src/**/*.ts", "src/libs/client-base.js", ], "exclude": [], } \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 80e8673..102c562 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ importers: '@kevisual/use-config': specifier: ^1.0.21 version: 1.0.21(dotenv@17.2.3) + axios: + specifier: ^1.9.0 + version: 1.9.0 cookie: specifier: ^1.1.1 version: 1.1.1 @@ -39,9 +42,6 @@ importers: specifier: ^5.1.6 version: 5.1.6 devDependencies: - '@kevisual/app-assistant': - specifier: workspace:* - version: link:packages/app-assistant '@kevisual/logger': specifier: ^0.0.4 version: 0.0.4