# -*- coding: utf-8 -*- # Copyright (c) 2025 relakkes@gmail.com # # This file is part of MediaCrawler project. # Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/xhs/playwright_sign.py # GitHub: https://github.com/NanmiCoder # Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1 # # 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: # 1. 不得用于任何商业用途。 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 # 3. 不得进行大规模爬取或对平台造成运营干扰。 # 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。 # 5. 不得用于任何非法或不当的用途。 # # 详细许可条款请参阅项目根目录下的LICENSE文件。 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。 # 通过 Playwright 注入调用 window.mnsv2 生成小红书签名 import hashlib import json import time from typing import Any, Dict, Optional, Union from urllib.parse import urlparse, quote from playwright.async_api import Page from xhs_sign import b64_encode, encode_utf8, get_trace_id, mrc def _build_sign_string(uri: str, data: Optional[Union[Dict, str]] = None, method: str = "POST") -> str: """构建待签名字符串 Args: uri: API路径 data: 请求数据 method: 请求方法 (GET 或 POST) Returns: 待签名字符串 """ if method.upper() == "POST": # POST 请求使用 JSON 格式 c = uri if data is not None: if isinstance(data, dict): c += json.dumps(data, separators=(",", ":"), ensure_ascii=False) elif isinstance(data, str): c += data return c else: # GET 请求使用查询字符串格式 if not data or (isinstance(data, dict) and len(data) == 0): return uri if isinstance(data, dict): params = [] for key in data.keys(): value = data[key] if isinstance(value, list): value_str = ",".join(str(v) for v in value) elif value is not None: value_str = str(value) else: value_str = "" # 使用URL编码(safe参数保留某些字符不编码) # 注意:httpx会对逗号、等号等字符进行编码,我们也需要同样处理 value_str = quote(value_str, safe='') params.append(f"{key}={value_str}") return f"{uri}?{'&'.join(params)}" elif isinstance(data, str): return f"{uri}?{data}" return uri def _md5_hex(s: str) -> str: """计算 MD5 哈希值""" return hashlib.md5(s.encode("utf-8")).hexdigest() def _build_xs_payload(x3_value: str, data_type: str = "object") -> str: """构建 x-s 签名""" s = { "x0": "4.2.1", "x1": "xhs-pc-web", "x2": "Mac OS", "x3": x3_value, "x4": data_type, } return "XYS_" + b64_encode(encode_utf8(json.dumps(s, separators=(",", ":")))) def _build_xs_common(a1: str, b1: str, x_s: str, x_t: str) -> str: """构建 x-s-common 请求头""" payload = { "s0": 3, "s1": "", "x0": "1", "x1": "4.2.2", "x2": "Mac OS", "x3": "xhs-pc-web", "x4": "4.74.0", "x5": a1, "x6": x_t, "x7": x_s, "x8": b1, "x9": mrc(x_t + x_s + b1), "x10": 154, "x11": "normal", } return b64_encode(encode_utf8(json.dumps(payload, separators=(",", ":")))) async def get_b1_from_localstorage(page: Page) -> str: """从 localStorage 获取 b1 值""" try: local_storage = await page.evaluate("() => window.localStorage") return local_storage.get("b1", "") except Exception: return "" async def call_mnsv2(page: Page, sign_str: str, md5_str: str) -> str: """ 通过 playwright 调用 window.mnsv2 函数 Args: page: playwright Page 对象 sign_str: 待签名字符串 (uri + JSON.stringify(data)) md5_str: sign_str 的 MD5 哈希值 Returns: mnsv2 返回的签名字符串 """ sign_str_escaped = sign_str.replace("\\", "\\\\").replace("'", "\\'").replace("\n", "\\n") md5_str_escaped = md5_str.replace("\\", "\\\\").replace("'", "\\'") try: result = await page.evaluate(f"window.mnsv2('{sign_str_escaped}', '{md5_str_escaped}')") return result if result else "" except Exception: return "" async def sign_xs_with_playwright( page: Page, uri: str, data: Optional[Union[Dict, str]] = None, method: str = "POST", ) -> str: """ 通过 playwright 注入生成 x-s 签名 Args: page: playwright Page 对象(必须已打开小红书页面) uri: API 路径,如 "/api/sns/web/v1/search/notes" data: 请求数据(GET 的 params 或 POST 的 payload) method: 请求方法 (GET 或 POST) Returns: x-s 签名字符串 """ sign_str = _build_sign_string(uri, data, method) md5_str = _md5_hex(sign_str) x3_value = await call_mnsv2(page, sign_str, md5_str) data_type = "object" if isinstance(data, (dict, list)) else "string" return _build_xs_payload(x3_value, data_type) async def sign_with_playwright( page: Page, uri: str, data: Optional[Union[Dict, str]] = None, a1: str = "", method: str = "POST", ) -> Dict[str, Any]: """ 通过 playwright 生成完整的签名请求头 Args: page: playwright Page 对象(必须已打开小红书页面) uri: API 路径 data: 请求数据 a1: cookie 中的 a1 值 method: 请求方法 (GET 或 POST) Returns: 包含 x-s, x-t, x-s-common, x-b3-traceid 的字典 """ b1 = await get_b1_from_localstorage(page) x_s = await sign_xs_with_playwright(page, uri, data, method) x_t = str(int(time.time() * 1000)) return { "x-s": x_s, "x-t": x_t, "x-s-common": _build_xs_common(a1, b1, x_s, x_t), "x-b3-traceid": get_trace_id(), } async def pre_headers_with_playwright( page: Page, url: str, cookie_dict: Dict[str, str], params: Optional[Dict] = None, payload: Optional[Dict] = None, ) -> Dict[str, str]: """ 使用 playwright 注入方式生成请求头签名 可直接替换 client.py 中的 _pre_headers 方法 Args: page: playwright Page 对象 url: 请求 URL cookie_dict: cookie 字典 params: GET 请求参数 payload: POST 请求参数 Returns: 签名后的请求头字典 """ a1_value = cookie_dict.get("a1", "") uri = urlparse(url).path # 确定请求数据和方法 if params is not None: data = params method = "GET" elif payload is not None: data = payload method = "POST" else: raise ValueError("params or payload is required") signs = await sign_with_playwright(page, uri, data, a1_value, method) return { "X-S": signs["x-s"], "X-T": signs["x-t"], "x-S-Common": signs["x-s-common"], "X-B3-Traceid": signs["x-b3-traceid"], }