diff --git a/package.json b/package.json index 48f48a3..7e928f9 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,8 @@ "dependencies": { "better-sqlite3": "^12.5.0", "playwright": "^1.57.0", + "playwright-extra": "^4.3.6", + "playwright-extra-plugin-stealth": "^0.0.1", "user-agents": "^1.1.669", "zod": "^4.2.1", "zod-to-json-schema": "^3.25.1" @@ -62,6 +64,7 @@ "drizzle-orm": "^0.45.1", "es-toolkit": "^1.43.0", "eventemitter3": "^5.0.1", - "lru-cache": "^11.2.4" + "lru-cache": "^11.2.4", + "puppeteer-extra-plugin-stealth": "^2.11.2" } } \ No newline at end of file diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index aa79619..fd7ac48 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -14,6 +14,12 @@ importers: playwright: specifier: ^1.57.0 version: 1.57.0 + playwright-extra: + specifier: ^4.3.6 + version: 4.3.6(playwright-core@1.57.0)(playwright@1.57.0) + playwright-extra-plugin-stealth: + specifier: ^0.0.1 + version: 0.0.1 user-agents: specifier: ^1.1.669 version: 1.1.669 @@ -72,6 +78,9 @@ importers: lru-cache: specifier: ^11.2.4 version: 11.2.4 + puppeteer-extra-plugin-stealth: + specifier: ^2.11.2 + version: 2.11.2(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) packages: @@ -439,16 +448,29 @@ packages: '@types/bun@1.3.5': resolution: {integrity: sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w==} + '@types/debug@4.1.12': + resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==} + + '@types/ms@2.1.0': + resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==} + '@types/node@25.0.3': resolution: {integrity: sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==} '@types/user-agents@1.0.4': resolution: {integrity: sha512-AjeFc4oX5WPPflgKfRWWJfkEk7Wu82fnj1rROPsiqFt6yElpdGFg8Srtm/4PU4rA9UiDUZlruGPgcwTMQlwq4w==} + arr-union@3.1.0: + resolution: {integrity: sha512-sKpyeERZ02v1FeCZT8lrfJq5u6goHCtpTAzPwJYe7c8SPFOboNjNg1vz2L4VTn9T4PQxEx13TbXLmYUcS6Ug7Q==} + engines: {node: '>=0.10.0'} + asn1js@3.0.7: resolution: {integrity: sha512-uLvq6KJu04qoQM6gvBfKFjlh6Gl0vOKQuR5cJMDHQkmwfMOQeN3F3SHCv9SNYSL+CRoHvOGFfllDlVz03GQjvQ==} engines: {node: '>=12.0.0'} + balanced-match@1.0.2: + resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + base64-js@1.5.1: resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} @@ -462,6 +484,9 @@ packages: bl@4.1.0: resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} + brace-expansion@1.1.12: + resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==} + buffer-from@1.1.2: resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} @@ -478,10 +503,17 @@ packages: chownr@1.1.4: resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + clone-deep@0.2.4: + resolution: {integrity: sha512-we+NuQo2DHhSl+DP6jlUiAhyAjBQrYnpOk15rN6c6JSPScjiCLh8IbSU+VTcph6YS3o7mASE8a0+gbZ7ChLpgg==} + engines: {node: '>=0.10.0'} + commander@14.0.2: resolution: {integrity: sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ==} engines: {node: '>=20'} + concat-map@0.0.1: + resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + debug@4.4.3: resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} engines: {node: '>=6.0'} @@ -499,6 +531,10 @@ packages: resolution: {integrity: sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==} engines: {node: '>=4.0.0'} + deepmerge@4.3.1: + resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} + engines: {node: '>=0.10.0'} + depd@2.0.0: resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} engines: {node: '>= 0.8'} @@ -652,6 +688,18 @@ packages: file-uri-to-path@1.0.0: resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + for-in@0.1.8: + resolution: {integrity: sha512-F0to7vbBSHP8E3l6dCjxNOLuSFAACIxFy3UehTUlG7svlXi37HHsDkyVcHo0Pq8QwrE+pXvWSVX3ZT1T9wAZ9g==} + engines: {node: '>=0.10.0'} + + for-in@1.0.2: + resolution: {integrity: sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==} + engines: {node: '>=0.10.0'} + + for-own@0.1.5: + resolution: {integrity: sha512-SKmowqGTJoPzLO1T0BBJpkfp3EMacCMOuH40hOUbrbzElVktk4DioXVM99QkLCyKoiuOmyjgcWMpVz2xjE7LZw==} + engines: {node: '>=0.10.0'} + fresh@2.0.0: resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==} engines: {node: '>= 0.8'} @@ -659,6 +707,13 @@ packages: fs-constants@1.0.0: resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} + fs-extra@10.1.0: + resolution: {integrity: sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==} + engines: {node: '>=12'} + + fs.realpath@1.0.0: + resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + fsevents@2.3.2: resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -670,6 +725,13 @@ packages: github-from-package@0.0.0: resolution: {integrity: sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==} + glob@7.2.3: + resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + deprecated: Glob versions prior to v9 are no longer supported + + graceful-fs@4.2.11: + resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + http-errors@2.0.1: resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==} engines: {node: '>= 0.8'} @@ -677,12 +739,50 @@ packages: ieee754@1.2.1: resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} + inflight@1.0.6: + resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. + inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} ini@1.3.8: resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==} + is-buffer@1.1.6: + resolution: {integrity: sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==} + + is-extendable@0.1.1: + resolution: {integrity: sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==} + engines: {node: '>=0.10.0'} + + is-plain-object@2.0.4: + resolution: {integrity: sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==} + engines: {node: '>=0.10.0'} + + isobject@3.0.1: + resolution: {integrity: sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==} + engines: {node: '>=0.10.0'} + + jsonfile@6.2.0: + resolution: {integrity: sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==} + + kind-of@2.0.1: + resolution: {integrity: sha512-0u8i1NZ/mg0b+W3MGGw5I7+6Eib2nx72S/QvXa0hYjEkjTknYmEYQJwGu3mLC0BrhtJjtQafTkyRUQ75Kx0LVg==} + engines: {node: '>=0.10.0'} + + kind-of@3.2.2: + resolution: {integrity: sha512-NOW9QQXMoZGg/oqnVNoNTTIFEIid1627WCffUBJEdMxYApq7mNE7CpzucIPc+ZQg25Phej7IJSmX3hO+oblOtQ==} + engines: {node: '>=0.10.0'} + + lazy-cache@0.2.7: + resolution: {integrity: sha512-gkX52wvU/R8DVMMt78ATVPFMJqfW8FPz1GZ1sVHBVQHmu/WvhIWE4cE1GBzhJNFicDeYhnwp6Rl35BcAIM3YOQ==} + engines: {node: '>=0.10.0'} + + lazy-cache@1.0.4: + resolution: {integrity: sha512-RE2g0b5VGZsOCFOCgP7omTRYFqydmZkBwl5oNnQ1lDYC57uyO9KqNnNVxT7COSHTxrRCWVcAVOcbjk+tvh/rgQ==} + engines: {node: '>=0.10.0'} + lodash.clonedeep@4.5.0: resolution: {integrity: sha512-H5ZhCF25riFd9uB5UCkVKo61m3S/xZk1x4wA6yp/L3RFP6Z/eHH1ymQcGLo7J3GMPfm0V/7m1tryHuGVxpqEBQ==} @@ -690,6 +790,10 @@ packages: resolution: {integrity: sha512-B5Y16Jr9LB9dHVkh6ZevG+vAbOsNOYCX+sXvFWFu7B3Iz5mijW3zdbMyhsh8ANd2mSWBYdJgnqi+mL7/LrOPYg==} engines: {node: 20 || >=22} + merge-deep@3.0.3: + resolution: {integrity: sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==} + engines: {node: '>=0.10.0'} + mime-db@1.54.0: resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==} engines: {node: '>= 0.6'} @@ -702,9 +806,16 @@ packages: resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==} engines: {node: '>=10'} + minimatch@3.1.2: + resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + mixin-object@2.0.1: + resolution: {integrity: sha512-ALGF1Jt9ouehcaXaHhn6t1yGWRqGaHkPFndtFVHfZXOvkIZ/yoGaSi0AHVTafb3ZBGg4dr/bDwnaEKqCXzchMA==} + engines: {node: '>=0.10.0'} + mkdirp-classic@0.5.3: resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} @@ -730,6 +841,10 @@ packages: once@1.4.0: resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + path-is-absolute@1.0.1: + resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} + engines: {node: '>=0.10.0'} + path-to-regexp@8.3.0: resolution: {integrity: sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==} @@ -742,6 +857,21 @@ packages: engines: {node: '>=18'} hasBin: true + playwright-extra-plugin-stealth@0.0.1: + resolution: {integrity: sha512-eI0Ujf4MXbcupzlVEXaaOnb+Exjt1sFi7t/3KxIA5pVww+WRAXRWdhqTz0glX62jJq2YM8fLu+GyvULpjTpZrw==} + + playwright-extra@4.3.6: + resolution: {integrity: sha512-q2rVtcE8V8K3vPVF1zny4pvwZveHLH8KBuVU2MoE3Jw4OKVoBWsHI9CH9zPydovHHOCDxjGN2Vg+2m644q3ijA==} + engines: {node: '>=12'} + peerDependencies: + playwright: '*' + playwright-core: '*' + peerDependenciesMeta: + playwright: + optional: true + playwright-core: + optional: true + playwright@1.57.0: resolution: {integrity: sha512-ilYQj1s8sr2ppEJ2YVadYBN0Mb3mdo9J0wQ+UuDhzYqURwSoW4n1Xs5vs7ORwgDGmyEh33tRMeS8KhdkMoLXQw==} engines: {node: '>=18'} @@ -755,6 +885,54 @@ packages: pump@3.0.3: resolution: {integrity: sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==} + puppeteer-extra-plugin-stealth@2.11.2: + resolution: {integrity: sha512-bUemM5XmTj9i2ZerBzsk2AN5is0wHMNE6K0hXBzBXOzP5m5G3Wl0RHhiqKeHToe/uIH8AoZiGhc1tCkLZQPKTQ==} + engines: {node: '>=8'} + peerDependencies: + playwright-extra: '*' + puppeteer-extra: '*' + peerDependenciesMeta: + playwright-extra: + optional: true + puppeteer-extra: + optional: true + + puppeteer-extra-plugin-user-data-dir@2.4.1: + resolution: {integrity: sha512-kH1GnCcqEDoBXO7epAse4TBPJh9tEpVEK/vkedKfjOVOhZAvLkHGc9swMs5ChrJbRnf8Hdpug6TJlEuimXNQ+g==} + engines: {node: '>=8'} + peerDependencies: + playwright-extra: '*' + puppeteer-extra: '*' + peerDependenciesMeta: + playwright-extra: + optional: true + puppeteer-extra: + optional: true + + puppeteer-extra-plugin-user-preferences@2.4.1: + resolution: {integrity: sha512-i1oAZxRbc1bk8MZufKCruCEC3CCafO9RKMkkodZltI4OqibLFXF3tj6HZ4LZ9C5vCXZjYcDWazgtY69mnmrQ9A==} + engines: {node: '>=8'} + peerDependencies: + playwright-extra: '*' + puppeteer-extra: '*' + peerDependenciesMeta: + playwright-extra: + optional: true + puppeteer-extra: + optional: true + + puppeteer-extra-plugin@3.2.3: + resolution: {integrity: sha512-6RNy0e6pH8vaS3akPIKGg28xcryKscczt4wIl0ePciZENGE2yoaQJNd17UiEbdmh5/6WW6dPcfRWT9lxBwCi2Q==} + engines: {node: '>=9.11.2'} + peerDependencies: + playwright-extra: '*' + puppeteer-extra: '*' + peerDependenciesMeta: + playwright-extra: + optional: true + puppeteer-extra: + optional: true + pvtsutils@1.3.6: resolution: {integrity: sha512-PLgQXQ6H2FWCaeRak8vvk1GW462lMxB5s3Jm673N82zI4vqtVUPuZdffdZbPDFRoU8kAhItWFtPCWiPpp4/EDg==} @@ -780,6 +958,11 @@ packages: resolve-pkg-maps@1.0.0: resolution: {integrity: sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==} + rimraf@3.0.2: + resolution: {integrity: sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==} + deprecated: Rimraf versions prior to v4 are no longer supported + hasBin: true + safe-buffer@5.2.1: resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} @@ -799,6 +982,10 @@ packages: setprototypeof@1.2.0: resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==} + shallow-clone@0.1.2: + resolution: {integrity: sha512-J1zdXCky5GmNnuauESROVu31MQSnLoYvlyEn6j2Ztk6Q5EHFIhxkMhYcv6vuDzl2XEzoRr856QwzMgWM/TmZgw==} + engines: {node: '>=0.10.0'} + simple-concat@1.0.1: resolution: {integrity: sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==} @@ -850,6 +1037,10 @@ packages: undici-types@7.16.0: resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + universalify@2.0.1: + resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==} + engines: {node: '>= 10.0.0'} + user-agents@1.1.669: resolution: {integrity: sha512-pbIzG+AOqCaIpySKJ4IAm1l0VyE4jMnK4y1thV8lm8PYxI+7X5uWcppOK7zY79TCKKTAnJH3/4gaVIZHsjrmJA==} @@ -1153,18 +1344,28 @@ snapshots: dependencies: bun-types: 1.3.5 + '@types/debug@4.1.12': + dependencies: + '@types/ms': 2.1.0 + + '@types/ms@2.1.0': {} + '@types/node@25.0.3': dependencies: undici-types: 7.16.0 '@types/user-agents@1.0.4': {} + arr-union@3.1.0: {} + asn1js@3.0.7: dependencies: pvtsutils: 1.3.6 pvutils: 1.1.5 tslib: 2.8.1 + balanced-match@1.0.2: {} + base64-js@1.5.1: {} better-sqlite3@12.5.0: @@ -1182,6 +1383,11 @@ snapshots: inherits: 2.0.4 readable-stream: 3.6.2 + brace-expansion@1.1.12: + dependencies: + balanced-match: 1.0.2 + concat-map: 0.0.1 + buffer-from@1.1.2: {} buffer@5.7.1: @@ -1197,8 +1403,18 @@ snapshots: chownr@1.1.4: {} + clone-deep@0.2.4: + dependencies: + for-own: 0.1.5 + is-plain-object: 2.0.4 + kind-of: 3.2.2 + lazy-cache: 1.0.4 + shallow-clone: 0.1.2 + commander@14.0.2: {} + concat-map@0.0.1: {} + debug@4.4.3: dependencies: ms: 2.1.3 @@ -1209,6 +1425,8 @@ snapshots: deep-extend@0.6.0: {} + deepmerge@4.3.1: {} + depd@2.0.0: {} detect-libc@2.1.2: {} @@ -1311,10 +1529,26 @@ snapshots: file-uri-to-path@1.0.0: {} + for-in@0.1.8: {} + + for-in@1.0.2: {} + + for-own@0.1.5: + dependencies: + for-in: 1.0.2 + fresh@2.0.0: {} fs-constants@1.0.0: {} + fs-extra@10.1.0: + dependencies: + graceful-fs: 4.2.11 + jsonfile: 6.2.0 + universalify: 2.0.1 + + fs.realpath@1.0.0: {} + fsevents@2.3.2: optional: true @@ -1324,6 +1558,17 @@ snapshots: github-from-package@0.0.0: {} + glob@7.2.3: + dependencies: + fs.realpath: 1.0.0 + inflight: 1.0.6 + inherits: 2.0.4 + minimatch: 3.1.2 + once: 1.4.0 + path-is-absolute: 1.0.1 + + graceful-fs@4.2.11: {} + http-errors@2.0.1: dependencies: depd: 2.0.0 @@ -1334,14 +1579,53 @@ snapshots: ieee754@1.2.1: {} + inflight@1.0.6: + dependencies: + once: 1.4.0 + wrappy: 1.0.2 + inherits@2.0.4: {} ini@1.3.8: {} + is-buffer@1.1.6: {} + + is-extendable@0.1.1: {} + + is-plain-object@2.0.4: + dependencies: + isobject: 3.0.1 + + isobject@3.0.1: {} + + jsonfile@6.2.0: + dependencies: + universalify: 2.0.1 + optionalDependencies: + graceful-fs: 4.2.11 + + kind-of@2.0.1: + dependencies: + is-buffer: 1.1.6 + + kind-of@3.2.2: + dependencies: + is-buffer: 1.1.6 + + lazy-cache@0.2.7: {} + + lazy-cache@1.0.4: {} + lodash.clonedeep@4.5.0: {} lru-cache@11.2.4: {} + merge-deep@3.0.3: + dependencies: + arr-union: 3.1.0 + clone-deep: 0.2.4 + kind-of: 3.2.2 + mime-db@1.54.0: {} mime-types@3.0.2: @@ -1350,8 +1634,17 @@ snapshots: mimic-response@3.1.0: {} + minimatch@3.1.2: + dependencies: + brace-expansion: 1.1.12 + minimist@1.2.8: {} + mixin-object@2.0.1: + dependencies: + for-in: 0.1.8 + is-extendable: 0.1.1 + mkdirp-classic@0.5.3: {} ms@2.1.3: {} @@ -1372,6 +1665,8 @@ snapshots: dependencies: wrappy: 1.0.2 + path-is-absolute@1.0.1: {} + path-to-regexp@8.3.0: {} pkijs@3.3.3: @@ -1385,6 +1680,17 @@ snapshots: playwright-core@1.57.0: {} + playwright-extra-plugin-stealth@0.0.1: {} + + playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0): + dependencies: + debug: 4.4.3 + optionalDependencies: + playwright: 1.57.0 + playwright-core: 1.57.0 + transitivePeerDependencies: + - supports-color + playwright@1.57.0: dependencies: playwright-core: 1.57.0 @@ -1411,6 +1717,48 @@ snapshots: end-of-stream: 1.4.5 once: 1.4.0 + puppeteer-extra-plugin-stealth@2.11.2(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)): + dependencies: + debug: 4.4.3 + puppeteer-extra-plugin: 3.2.3(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) + puppeteer-extra-plugin-user-preferences: 2.4.1(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) + optionalDependencies: + playwright-extra: 4.3.6(playwright-core@1.57.0)(playwright@1.57.0) + transitivePeerDependencies: + - supports-color + + puppeteer-extra-plugin-user-data-dir@2.4.1(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)): + dependencies: + debug: 4.4.3 + fs-extra: 10.1.0 + puppeteer-extra-plugin: 3.2.3(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) + rimraf: 3.0.2 + optionalDependencies: + playwright-extra: 4.3.6(playwright-core@1.57.0)(playwright@1.57.0) + transitivePeerDependencies: + - supports-color + + puppeteer-extra-plugin-user-preferences@2.4.1(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)): + dependencies: + debug: 4.4.3 + deepmerge: 4.3.1 + puppeteer-extra-plugin: 3.2.3(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) + puppeteer-extra-plugin-user-data-dir: 2.4.1(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)) + optionalDependencies: + playwright-extra: 4.3.6(playwright-core@1.57.0)(playwright@1.57.0) + transitivePeerDependencies: + - supports-color + + puppeteer-extra-plugin@3.2.3(playwright-extra@4.3.6(playwright-core@1.57.0)(playwright@1.57.0)): + dependencies: + '@types/debug': 4.1.12 + debug: 4.4.3 + merge-deep: 3.0.3 + optionalDependencies: + playwright-extra: 4.3.6(playwright-core@1.57.0)(playwright@1.57.0) + transitivePeerDependencies: + - supports-color + pvtsutils@1.3.6: dependencies: tslib: 2.8.1 @@ -1436,6 +1784,10 @@ snapshots: resolve-pkg-maps@1.0.0: {} + rimraf@3.0.2: + dependencies: + glob: 7.2.3 + safe-buffer@5.2.1: {} selfsigned@5.4.0: @@ -1463,6 +1815,13 @@ snapshots: setprototypeof@1.2.0: {} + shallow-clone@0.1.2: + dependencies: + is-extendable: 0.1.1 + kind-of: 2.0.1 + lazy-cache: 0.2.7 + mixin-object: 2.0.1 + simple-concat@1.0.1: {} simple-get@4.0.1: @@ -1517,6 +1876,8 @@ snapshots: undici-types@7.16.0: {} + universalify@2.0.1: {} + user-agents@1.1.669: dependencies: lodash.clonedeep: 4.5.0 diff --git a/src/app.ts b/src/app.ts index 58ee629..4a77484 100644 --- a/src/app.ts +++ b/src/app.ts @@ -5,7 +5,7 @@ import Database from 'better-sqlite3'; import { drizzle } from 'drizzle-orm/better-sqlite3'; import { Core } from './playwright/core.ts'; import * as schema from './db/schema.ts'; - +export { schema } export const config = useConfig() export const app = useConfigKey('app', () => new App({ @@ -35,6 +35,8 @@ export const db = useConfigKey('db', () => { }) export const core = useConfigKey('core', () => new Core({ + useDebugPort: true, // 不使用debugPort,避免被网站检测 + useCDPConnect: true, // 使用纯Playwright模式而不是CDP连接 listeners: [ { path: "search/notes", diff --git a/src/db/schema.ts b/src/db/schema.ts index 8b99b59..2e977f1 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -17,7 +17,7 @@ export const xhsNote = sqliteTable('xhs_note', { data: text('data'), tags: text('tags'), - status: text('status'), // 正常笔记,归档,禁止用户,已删除 + status: text('status'), // 正常笔记,归档,禁止用户,删除,不相关 authorUrl: text('author_url'), cover: text('cover'), @@ -71,3 +71,13 @@ export const xhsUser = sqliteTable('xhs_user', { index('idx_xhs_user_tags').on(table.tags), index('idx_xhs_user_bun_tags').on(table.bunTags), ])); + +export const xhsTags = sqliteTable('xhs_tags', { + id: text('id').primaryKey().default(randomUUID()), + title: text('title').notNull(), + description: text('description'), + createdAt: integer('created_at').default(Date.now()).notNull(), + updatedAt: integer('updated_at').default(Date.now()).notNull(), +}, (table) => ([ + index('idx_xhs_tags_title').on(table.title), +])); \ No newline at end of file diff --git a/src/playwright/browser.ts b/src/playwright/browser.ts index 0a7bb0b..3640670 100644 --- a/src/playwright/browser.ts +++ b/src/playwright/browser.ts @@ -27,8 +27,6 @@ export const getExecutablePath = () => { * * 启动 Chrome 浏览器,带远程调试端口 * 注意:需要手动登录账号和安装插件 - * - * @returns {Promise} */ export const main = async (opts?: { executablePath?: string; @@ -60,26 +58,46 @@ export const main = async (opts?: { '--disable-session-crashed-bubble', '--disable-infobars', '--disable-default-apps', + '--disable-blink-features=AutomationControlled', + '--exclude-switches=enable-automation', + '--disable-features=IsolateOrigins,site-per-process', + '--disable-web-security', + '--disable-features=VizDisplayCompositor', `--user-agent=${userAgent}`, + '--disable-sync', + '--no-default-browser-check', + '--no-experiments', + '--disable-popup-blocking', + '--disable-prompt-on-repost', + '--disable-background-networking', + '--disable-component-update', + '--disable-extensions', + '--disable-bundled-ppapi-flash', + // 隐藏automation bar相关特征 + '--disable-renderer-backgrounding', + '--disable-backgrounding-occluded-windows', + '--disable-breakpad', + '--disable-client-side-phishing-detection', + '--disable-component-extensions-with-background-pages', + '--disable-datasaver-prompt', + '--disable-device-discovery-notifications', + '--disable-hang-monitor', + '--disable-ipc-flooding-protection', + '--no-service-autorun', + // 禁用自动化识别 + '--disable-automation', ]; + // 如果需要无头模式,添加额外参数 if (headless) { params.push( '--headless', - '--disable-blink-features=AutomationControlled', - '--disable-infobars', - '--disable-features=IsolateOrigins,site-per-process', - '--disable-features=VizDisplayCompositor', '--window-size=1920,1080', - '--disable-background-networking', - '--disable-background-timer-throttling', - '--disable-backgrounding-occluded-windows', - '--disable-renderer-backgrounding', - '--disable-component-extensions-with-background-pages', - '--disable-features=TranslateUI', - '--disable-ipc-flooding-protection', ); } + + params.push('about:blank'); + console.log('启动参数:', params); if (opts?.kiosk) { params.push('--kiosk'); // 全屏模式,无修改边框 diff --git a/src/playwright/core.ts b/src/playwright/core.ts index cdd6b70..ae485a2 100644 --- a/src/playwright/core.ts +++ b/src/playwright/core.ts @@ -1,9 +1,10 @@ import { chromium, Page, BrowserContext, Browser, CDPSession, Request } from 'playwright'; import { execSync } from 'node:child_process'; +import path from 'node:path'; import { EventEmitter } from 'eventemitter3' const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); import { main } from "./browser.ts"; - +import { stealthMode } from './stealth/index.ts'; type RequestObject = { url: string; path: string; @@ -28,13 +29,15 @@ export class Core { debugPort = 9223; debugHost = '127.0.0.1'; headless = false; + useDebugPort = false; // 默认不使用debugPort以避免检测 + useCDPConnect = false; // 是否使用CDP连接而不是纯Playwright status: 'disconnected' | 'connecting' | 'connected' | 'failed' = 'disconnected'; emitter = new EventEmitter(); listeners: Listener[] = []; recordReady: boolean = false; timer: NodeJS.Timeout | null = null; data: T | null = null; - constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[], headless?: boolean }) { + constructor(opts?: { debugPort?: number, debugHost?: string, listeners?: Listener[], headless?: boolean, useDebugPort?: boolean, useCDPConnect?: boolean }) { if (opts?.debugPort) { this.debugPort = opts.debugPort; } @@ -47,13 +50,44 @@ export class Core { if (opts?.headless !== undefined) { this.headless = opts.headless; } + if (opts?.useDebugPort !== undefined) { + this.useDebugPort = opts.useDebugPort; + } + if (opts?.useCDPConnect !== undefined) { + this.useCDPConnect = opts.useCDPConnect; + } } async createBrowser() { - await main({ debugPort: this.debugPort, headless: this.headless }); + const chrome = await main({ debugPort: this.debugPort, headless: this.headless }); } async init() { const debugPort = this.debugPort; try { + // 如果不使用CDP连接,直接用Playwright启动 + if (!this.useCDPConnect) { + console.log('使用纯Playwright模式启动(无CDP),避免被检测...'); + this.browser = await chromium.launch({ + headless: this.headless, + args: [ + `--user-data-dir=${path.join(process.cwd(), 'browser-context')}`, + '--no-sandbox', + '--disable-blink-features=AutomationControlled', + '--disable-infobars', + '--exclude-switches=enable-automation', + ] + }); + this.browserContext = await this.browser.newContext(); + this.handleRequest(this.browserContext); + this.page = await this.browserContext.newPage(); + + // 应用隐身脚本 + await this.stealthMode(this.page); + + this.emitter.emit('connected'); + return; + } + + // === 以下为CDP连接模式(可选) === const stdout = execSync(`netstat -ano | findstr :${debugPort}`); console.log(`端口 ${debugPort} 已在监听:\n${stdout}`); const debugHost = this.debugHost; @@ -61,15 +95,55 @@ export class Core { console.log('成功连接到 Chrome CDP!'); this.browser = browser; this.browserContext = browser.contexts()[0]; - this.handleRequest(this.browserContext); - this.page = this.browserContext.pages()[0] || await this.browserContext.newPage(); - if (this.headless) { - await this.stealthMode(this.page); + + // 关闭所有现存的页面,防止复用百度等默认页面 + const existingPages = this.browserContext.pages(); + for (const page of existingPages) { + await page.close(); } + + this.handleRequest(this.browserContext); + // 创建全新的空白页面 + this.page = await this.browserContext.newPage(); + + // 在页面创建后立即设置CDP脚本注入(在导航前) + try { + const cdpSession = await this.browserContext.newCDPSession(this.page); + // 禁用webdriver特征 - 在页面加载前注入 + await cdpSession.send('Page.addScriptToEvaluateOnNewDocument', { + source: `Object.defineProperty(navigator, 'webdriver', { get: () => false })` + }); + // 隐藏automation bar相关特征 + await cdpSession.send('Page.addScriptToEvaluateOnNewDocument', { + source: ` + const style = document.createElement('style'); + style.textContent = \` + [class*="automation"], + [id*="automation"], + .infobar, + #infobar-container, + .top-chrome-background, + .automation-bar { + display: none !important; + } + \`; + document.documentElement.appendChild(style); + ` + }); + } catch (e) { + console.log('CDP session设置失败(非致命错误):', (e as Error).message.slice(0, 80)); + } + + // 导航到空白页面,清除任何缓存的导航 + await this.page.goto('about:blank', { waitUntil: 'domcontentloaded' }); + + // 始终启用隐身模式以隐藏debugPort和automation特征 + await this.stealthMode(this.page); + this.emitter.emit('connected'); return; } catch (error: any) { - throw new Error(`无法连接到 Chrome CDP,端口 ${debugPort} 可能未正确启动: ${(error as Error).message.slice(0, 100)}`); + throw new Error(`无法连接到浏览器,错误: ${(error as Error).message.slice(0, 100)}`); } } async connect() { @@ -155,72 +229,7 @@ export class Core { this.data = data; } async stealthMode(page: Page) { - const stealthScript = ` - () => { - Object.defineProperty(navigator, 'webdriver', { - get: () => undefined, - }); - - window.chrome = { - runtime: {}, - }; - - Object.defineProperty(navigator, 'plugins', { - get: () => [1, 2, 3, 4, 5], - }); - - Object.defineProperty(navigator, 'languages', { - get: () => ['en-US', 'en'], - }); - - const originalQuery = window.navigator.permissions.query; - window.navigator.permissions.query = (parameters) => ( - parameters.name === 'notifications' ? - Promise.resolve({ state: Notification.permission }) : - originalQuery(parameters) - ); - - Object.defineProperty(navigator, 'hardwareConcurrency', { - get: () => 4, - }); - - Object.defineProperty(navigator, 'deviceMemory', { - get: () => 8, - }); - - const originalGetContext = HTMLCanvasElement.prototype.getContext; - HTMLCanvasElement.prototype.getContext = function(type) { - const context = originalGetContext.apply(this, arguments); - if (type === '2d' && context) { - const originalGetImageData = context.getImageData; - context.getImageData = function() { - const imageData = originalGetImageData.apply(this, arguments); - for (let i = 0; i < imageData.data.length; i += 4) { - imageData.data[i] = imageData.data[i] + Math.random() * 0.1 - 0.05; - } - return imageData; - }; - } - return context; - }; - - Object.defineProperty(navigator, 'connection', { - get: () => ({ - effectiveType: '4g', - rtt: 100, - downlink: 10, - }), - }); - - window.navigator.getBattery = () => Promise.resolve({ - charging: true, - chargingTime: 0, - dischargingTime: Infinity, - level: 1, - }); - } - `; - await page.addInitScript(stealthScript); + await stealthMode(page); } async handleRequest(context: BrowserContext) { context.on('request', request => { @@ -241,7 +250,7 @@ export class Core { context.on('response', async response => { const url = response.url(); const recordReady = this.recordReady; - + console.log('Response URL:', url); for (let listener of this.listeners) { const type = listener.type || 'both'; if (type === 'request') continue; diff --git a/src/playwright/stealth/index.ts b/src/playwright/stealth/index.ts new file mode 100644 index 0000000..fcc6b7d --- /dev/null +++ b/src/playwright/stealth/index.ts @@ -0,0 +1,227 @@ +import { Page } from 'playwright'; + +export const stealthMode = async (page: Page) => { + const stealthScript = ` + () => { + // 1. 隐藏webdriver属性(最重要的检测点) + Object.defineProperty(navigator, 'webdriver', { + get: () => false, + configurable: true, + }); + + // 2. 隐藏Chrome automation特征 + // 某些网站通过检查特定的Chrome API来判断是否被自动化 + if (!window.chrome) { + window.chrome = {}; + } + window.chrome.runtime = window.chrome.runtime || {}; + + // 移除可能暴露automation的chrome属性 + delete window.chrome.i18n; + delete window.__selenium_evaluate; + delete window.__webdriver_evaluate; + delete window._Selenium_IDE_Recorder; + delete window._selenium; + delete window.callPhantom; + delete window._phantom; + + // 3. 隐藏自动化工具标志 + Object.defineProperty(navigator, 'userAgentData', { + get: () => ({ + brands: [ + { brand: 'Not A(Brand', version: '99' }, + { brand: 'Google Chrome', version: '120' }, + { brand: 'Chromium', version: '120' } + ], + mobile: false, + platform: 'Windows', + platformVersion: '10.0' + }), + configurable: true, + }); + + // 4. 隐藏permissions API中的automation特征 + const originalQuery = window.navigator.permissions.query; + window.navigator.permissions.query = (parameters) => ( + parameters.name === 'notifications' ? + Promise.resolve({ state: Notification.permission }) : + originalQuery(parameters) + ); + + // ===== 以下为其他反检测特征 ===== + + // 伪造chrome对象 + window.chrome = window.chrome || { + runtime: {}, + loadTimes: function() {}, + csi: function() {}, + app: {} + }; + + // 隐藏plugins + Object.defineProperty(navigator, 'plugins', { + get: () => [1, 2, 3, 4, 5], + }); + + // 设置正常的languages + Object.defineProperty(navigator, 'languages', { + get: () => ['zh-CN', 'zh', 'en'], + }); + + // 伪造硬件信息 + Object.defineProperty(navigator, 'hardwareConcurrency', { + get: () => 8, + }); + + Object.defineProperty(navigator, 'deviceMemory', { + get: () => 8, + }); + + // 修改Canvas指纹 + const originalGetContext = HTMLCanvasElement.prototype.getContext; + HTMLCanvasElement.prototype.getContext = function(type) { + const context = originalGetContext.apply(this, arguments); + if (type === '2d' && context) { + const originalGetImageData = context.getImageData; + context.getImageData = function() { + const imageData = originalGetImageData.apply(this, arguments); + for (let i = 0; i < imageData.data.length; i += 4) { + imageData.data[i] = imageData.data[i] + Math.random() * 0.1 - 0.05; + } + return imageData; + }; + } + return context; + }; + + // 伪造网络状况 + Object.defineProperty(navigator, 'connection', { + get: () => ({ + effectiveType: '4g', + rtt: 100, + downlink: 10, + }), + }); + + // 伪造电池信息 + window.navigator.getBattery = () => Promise.resolve({ + charging: true, + chargingTime: 0, + dischargingTime: Infinity, + level: 1, + }); + + // 隐藏触摸点 + Object.defineProperty(navigator, 'maxTouchPoints', { + get: () => 0, + }); + + // 修改toDataURL + const originalToDataURL = HTMLCanvasElement.prototype.toDataURL; + HTMLCanvasElement.prototype.toDataURL = function(type) { + if (type === 'image/webp' || type === 'image/jpeg') { + return originalToDataURL.apply(this, arguments); + } + const context = this.getContext('2d'); + if (context) { + const imageData = context.getImageData(0, 0, this.width, this.height); + for (let i = 0; i < imageData.data.length; i += 4) { + imageData.data[i] += Math.floor(Math.random() * 3) - 1; + } + context.putImageData(imageData, 0, 0); + } + return originalToDataURL.apply(this, arguments); + }; + + // 隐藏CDP/DevTools检测 + window.addEventListener('beforeunload', function(e) { + // 防止某些网站通过beforeunload检测到automation + }, true); + + // 隐藏chrome.debugger API + if (window.chrome && window.chrome.runtime) { + window.chrome.runtime.sendMessage = undefined; + } + + // 重写toString方法,隐藏native code标记 + const nativeToString = Function.prototype.toString; + Function.prototype.toString = function() { + const str = nativeToString.call(this); + if (str.includes('[native code]')) { + return 'function() { [native code] }'; + } + return str; + }; + + // 隐藏devtools打开检测 + let devtools = { open: false, orientation: null }; + const threshold = 160; + setInterval(() => { + if (window.outerHeight - window.innerHeight > threshold || + window.outerWidth - window.innerWidth > threshold) { + if (!devtools.open) { + devtools.open = true; + } + } else { + if (devtools.open) { + devtools.open = false; + } + } + }, 500); + + // 防止网站通过port检测 + Object.defineProperty(window, '__REMOTE_DEBUGGER_PORT__', { + get: () => undefined, + set: () => {}, + configurable: true + }); + + // 隐藏Playwright特征 + Object.defineProperty(navigator, 'vendor', { + get: () => 'Google Inc.', + }); + + Object.defineProperty(navigator, 'platform', { + get: () => 'Win32', + }); + + Object.defineProperty(navigator, 'userAgent', { + get: () => { + const ua = navigator.userAgent || ''; + return ua.replace(/HeadlessChrome/, 'Chrome').replace(/Playwright/, ''); + }, + }); + + // 禁用performance.measure在CDP中的表现 + if (window.performance && window.performance.measure) { + const originalMeasure = window.performance.measure; + window.performance.measure = function() { + return originalMeasure.apply(this, arguments); + }; + } + + // 隐藏其他自动化工具标志 + Object.defineProperty(window, '__nightmare', { + get: () => undefined, + set: () => {}, + configurable: true, + }); + + Object.defineProperty(window, '__puppeteer__', { + get: () => undefined, + set: () => {}, + configurable: true, + }); + + // 防止通过postMessage检测 + const originalPostMessage = window.postMessage; + window.postMessage = function(message, origin) { + if (typeof message === 'object' && message.type === 'WEB_DRIVER') { + return; + } + return originalPostMessage.apply(this, arguments); + }; + } + `; + await page.addInitScript(stealthScript); +} \ No newline at end of file diff --git a/src/routes/xhs/index.ts b/src/routes/xhs/index.ts index ec14a9c..21b0898 100644 --- a/src/routes/xhs/index.ts +++ b/src/routes/xhs/index.ts @@ -1 +1,4 @@ import './search-notes.ts'; +import './xhs-list.ts'; +import './xhs-user-list.ts'; +import './xhs-tags-list.ts'; diff --git a/src/routes/xhs/search-notes.ts b/src/routes/xhs/search-notes.ts index e5e3e93..364f019 100644 --- a/src/routes/xhs/search-notes.ts +++ b/src/routes/xhs/search-notes.ts @@ -1,6 +1,6 @@ -import { xhsNote, xhsUser } from '@/db/schema.ts'; +import { xhsNote, xhsUser, xhsTags } from '@/db/schema.ts'; import { app, core, db } from '../../app.ts'; -import { sql } from 'drizzle-orm'; +import { sql, eq } from 'drizzle-orm'; const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); import { Page } from 'playwright'; import { Core } from '@/playwright/core.ts'; @@ -138,11 +138,16 @@ app.route({ console.log(`导航到搜索页面: ${url.toString()}`); await sleep(3000); // 等待页面加载 } - const keyword = query.keyword as string; + let keyword = query.keyword as string || ''; + keyword = keyword.trim(); + if (!keyword) { + ctx.throw(400, '缺少 keyword 参数'); + } + // 存储关键词到 core 的 data 中,供响应处理使用 sessionCache.set('xhs-search-keyword', keyword); await hoverPickerExample(page, { - keyword: query.keyword as string, + keyword: keyword as string, pushTime: (query.pushTime as '一天内' | '一周内' | '半年内') || '一天内', sort: (query.sort as '综合' | '最新' | '最多点赞' | '最多评论') || '最新', distance: (query.distance as '不限' | '同城' | '附近') || '不限', @@ -204,7 +209,7 @@ app.route({ status: '正常笔记', description: keyword || '', link: getNoteUrl(note), - data: JSON.stringify({ note }), + data: JSON.stringify({ note, keyword }), cover: getCover(note), authorUrl: user.link, user_id: user.user?.user_id || '', @@ -224,6 +229,7 @@ app.route({ nickname: user?.nickname || '', avatar: user?.avatar || '', status: '笔记用户', + link: userData.link, xsec_token: user?.xsec_token || '', data: JSON.stringify({ user }), } @@ -259,6 +265,20 @@ app.route({ }, }).execute(); console.log(`已保存 ${uniqueUsers.length} 条用户信息`); + + // 检查 keyword 是否存在于 xhsTags 的 title 中,如果不存在则添加 + if (keyword) { + const existingTag = await db.select().from(xhsTags).where(eq(xhsTags.title, keyword)).limit(1); + if (existingTag.length === 0) { + await db.insert(xhsTags).values({ + title: keyword, + description: `来自搜索页面的关键词: ${keyword}`, + }).execute(); + console.log(`已添加新的标签: ${keyword}`); + } else { + console.log(`标签已存在: ${keyword}`); + } + } } catch (error) { console.error('保存搜索笔记结果时出错:', error); } diff --git a/src/routes/xhs/xhs-list.ts b/src/routes/xhs/xhs-list.ts new file mode 100644 index 0000000..09d2ad2 --- /dev/null +++ b/src/routes/xhs/xhs-list.ts @@ -0,0 +1,142 @@ +import { desc, eq, count, or, like, and } from 'drizzle-orm'; +import { schema, app, db } from '@/app.ts' +const xhsNote = schema.xhsNote; + +app.route({ + path: 'xhs', + key: 'list', + middleware: ['auth'], + description: '获取小红书笔记列表', + metadata: { + tags: ['小红书', '笔记'], + } +}).define(async (ctx) => { + const { page = 1, pageSize = 20, search, sort = 'DESC' } = ctx.query || {}; + + const offset = (page - 1) * pageSize; + const orderByField = sort === 'ASC' ? xhsNote.updatedAt : desc(xhsNote.updatedAt); + + let whereCondition = undefined; + if (search) { + whereCondition = or( + like(xhsNote.title, `%${search}%`), + like(xhsNote.summary, `%${search}%`), + like(xhsNote.description, `%${search}%`) + ); + } + + const [list, totalCount] = await Promise.all([ + db.select() + .from(xhsNote) + .where(whereCondition) + .limit(pageSize) + .offset(offset) + .orderBy(orderByField), + db.select({ count: count() }) + .from(xhsNote) + .where(whereCondition) + ]); + + ctx.body = { + list, + pagination: { + page, + current: page, + pageSize, + total: totalCount[0]?.count || 0, + }, + }; + return ctx; +}).addTo(app); + +const noteUpdate = `创建或更新一个小红书笔记, 参数定义: +title: 笔记标题, 必填 +summary: 笔记摘要, 选填 +description: 笔记描述, 选填 +tags: 标签数组, 选填 +data: 笔记数据, 对象, 选填 +`; +app.route({ + path: 'xhs', + key: 'update', + middleware: ['auth'], + description: noteUpdate, + metadata: { + tags: ['小红书', '笔记'], + } +}).define(async (ctx) => { + const { id, createdAt, updatedAt, ...rest } = ctx.query.data || {}; + let note; + if (!id) { + note = await db.insert(xhsNote).values({ + id: rest.id || `note_${Date.now()}`, + title: rest.title || '', + description: rest.description || '', + summary: rest.summary || '', + tags: rest.tags ? JSON.stringify(rest.tags) : null, + link: rest.link || '', + data: rest.data ? JSON.stringify(rest.data) : null, + syncStatus: 1, + syncAt: Date.now(), + createdAt: Date.now(), + updatedAt: Date.now(), + }).returning(); + } else { + const existing = await db.select().from(xhsNote).where(eq(xhsNote.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的笔记'); + } + note = await db.update(xhsNote).set({ + title: rest.title, + description: rest.description, + summary: rest.summary, + tags: rest.tags ? JSON.stringify(rest.tags) : undefined, + link: rest.link, + data: rest.data ? JSON.stringify(rest.data) : undefined, + updatedAt: Date.now(), + }).where(eq(xhsNote.id, id)).returning(); + } + ctx.body = note; +}).addTo(app); + + +app.route({ + path: 'xhs', + key: 'delete', + middleware: ['auth'], + description: '删除小红书笔记, 参数: data.id 笔记ID', + metadata: { + tags: ['小红书', '笔记'], + } +}).define(async (ctx) => { + const { id } = ctx.query.data || {}; + if (!id) { + ctx.throw(400, 'id 参数缺失'); + } + const existing = await db.select().from(xhsNote).where(eq(xhsNote.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的笔记'); + } + await db.delete(xhsNote).where(eq(xhsNote.id, id)); + ctx.body = { success: true }; +}).addTo(app); + +app.route({ + path: 'xhs', + key: 'get', + middleware: ['auth'], + description: '获取单个小红书笔记, 参数: data.id 笔记ID', + metadata: { + tags: ['小红书', '笔记'], + } +}).define(async (ctx) => { + const { id } = ctx.query.data || {}; + if (!id) { + ctx.throw(400, 'id 参数缺失'); + } + const existing = await db.select().from(xhsNote).where(eq(xhsNote.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的笔记'); + } + ctx.body = existing[0]; +}).addTo(app); \ No newline at end of file diff --git a/src/routes/xhs/xhs-tags-list.ts b/src/routes/xhs/xhs-tags-list.ts new file mode 100644 index 0000000..4ecede8 --- /dev/null +++ b/src/routes/xhs/xhs-tags-list.ts @@ -0,0 +1,124 @@ +import { desc, eq, count, like } from 'drizzle-orm'; +import { schema, app, db } from '@/app.ts' +const xhsTags = schema.xhsTags; + +app.route({ + path: 'xhs-tags', + key: 'list', + middleware: ['auth'], + description: '获取小红书标签列表', + metadata: { + tags: ['小红书', '标签'], + } +}).define(async (ctx) => { + const { page = 1, pageSize = 20, search, sort = 'DESC' } = ctx.query || {}; + + const offset = (page - 1) * pageSize; + const orderByField = sort === 'ASC' ? xhsTags.updatedAt : desc(xhsTags.updatedAt); + + let whereCondition = undefined; + if (search) { + whereCondition = like(xhsTags.title, `%${search}%`); + } + + const [list, totalCount] = await Promise.all([ + db.select() + .from(xhsTags) + .where(whereCondition) + .limit(pageSize) + .offset(offset) + .orderBy(orderByField), + db.select({ count: count() }) + .from(xhsTags) + .where(whereCondition) + ]); + + ctx.body = { + list, + pagination: { + page, + current: page, + pageSize, + total: totalCount[0]?.count || 0, + }, + }; + return ctx; +}).addTo(app); + +const tagUpdate = `创建或更新一个小红书标签, 参数定义: +title: 标签标题, 必填 +description: 标签描述, 选填 +`; +app.route({ + path: 'xhs-tags', + key: 'update', + middleware: ['auth'], + description: tagUpdate, + metadata: { + tags: ['小红书', '标签'], + } +}).define(async (ctx) => { + const { id, createdAt, updatedAt, ...rest } = ctx.query.data || {}; + let tag; + if (!id) { + tag = await db.insert(xhsTags).values({ + title: rest.title || '', + description: rest.description || '', + createdAt: Date.now(), + updatedAt: Date.now(), + }).returning(); + } else { + const existing = await db.select().from(xhsTags).where(eq(xhsTags.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的标签'); + } + tag = await db.update(xhsTags).set({ + title: rest.title, + description: rest.description, + updatedAt: Date.now(), + }).where(eq(xhsTags.id, id)).returning(); + } + ctx.body = tag; +}).addTo(app); + + +app.route({ + path: 'xhs-tags', + key: 'delete', + middleware: ['auth'], + description: '删除小红书标签, 参数: data.id 标签ID', + metadata: { + tags: ['小红书', '标签'], + } +}).define(async (ctx) => { + const { id } = ctx.query.data || {}; + if (!id) { + ctx.throw(400, 'id 参数缺失'); + } + const existing = await db.select().from(xhsTags).where(eq(xhsTags.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的标签'); + } + await db.delete(xhsTags).where(eq(xhsTags.id, id)); + ctx.body = { success: true }; +}).addTo(app); + +app.route({ + path: 'xhs-tags', + key: 'get', + middleware: ['auth'], + description: '获取单个小红书标签, 参数: data.id 标签ID', + metadata: { + tags: ['小红书', '标签'], + } +}).define(async (ctx) => { + const { id } = ctx.query.data || {}; + if (!id) { + ctx.throw(400, 'id 参数缺失'); + } + const existing = await db.select().from(xhsTags).where(eq(xhsTags.id, id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的标签'); + } + ctx.body = existing[0]; +}).addTo(app); diff --git a/src/routes/xhs/xhs-user-list.ts b/src/routes/xhs/xhs-user-list.ts new file mode 100644 index 0000000..0b7d9c7 --- /dev/null +++ b/src/routes/xhs/xhs-user-list.ts @@ -0,0 +1,147 @@ +import { desc, eq, count, or, like } from 'drizzle-orm'; +import { schema, app, db } from '@/app.ts' +const xhsUser = schema.xhsUser; + +app.route({ + path: 'xhs-users', + key: 'list', + middleware: ['auth'], + description: '获取小红书用户列表', + metadata: { + tags: ['小红书', '用户'], + } +}).define(async (ctx) => { + const { page = 1, pageSize = 20, search, sort = 'DESC' } = ctx.query || {}; + + const offset = (page - 1) * pageSize; + const orderByField = sort === 'ASC' ? xhsUser.updatedAt : desc(xhsUser.updatedAt); + + let whereCondition = undefined; + if (search) { + whereCondition = or( + like(xhsUser.nickname, `%${search}%`), + like(xhsUser.username, `%${search}%`), + like(xhsUser.description, `%${search}%`) + ); + } + + const [list, totalCount] = await Promise.all([ + db.select() + .from(xhsUser) + .where(whereCondition) + .limit(pageSize) + .offset(offset) + .orderBy(orderByField), + db.select({ count: count() }) + .from(xhsUser) + .where(whereCondition) + ]); + + ctx.body = { + list, + pagination: { + page, + current: page, + pageSize, + total: totalCount[0]?.count || 0, + }, + }; + return ctx; +}).addTo(app); + +const userUpdate = `创建或更新一个小红书用户, 参数定义: +nickname: 用户昵称, 必填 +username: 用户名, 选填 +avatar: 用户头像, 选填 +description: 用户描述, 选填 +tags: 标签数组, 选填 +data: 用户数据, 对象, 选填 +`; +app.route({ + path: 'xhs-users', + key: 'update', + middleware: ['auth'], + description: userUpdate, + metadata: { + tags: ['小红书', '用户'], + } +}).define(async (ctx) => { + const { user_id, createdAt, updatedAt, ...rest } = ctx.query.data || {}; + let user; + if (!user_id) { + user = await db.insert(xhsUser).values({ + user_id: rest.user_id || `user_${Date.now()}`, + nickname: rest.nickname || '', + username: rest.username || '', + avatar: rest.avatar || '', + description: rest.description || '', + summary: rest.summary || '', + tags: rest.tags ? JSON.stringify(rest.tags) : null, + link: rest.link || '', + data: rest.data ? JSON.stringify(rest.data) : null, + syncStatus: 1, + syncAt: Date.now(), + createdAt: Date.now(), + updatedAt: Date.now(), + }).returning(); + } else { + const existing = await db.select().from(xhsUser).where(eq(xhsUser.user_id, user_id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的用户'); + } + user = await db.update(xhsUser).set({ + nickname: rest.nickname, + username: rest.username, + avatar: rest.avatar, + description: rest.description, + summary: rest.summary, + tags: rest.tags ? JSON.stringify(rest.tags) : undefined, + link: rest.link, + data: rest.data ? JSON.stringify(rest.data) : undefined, + updatedAt: Date.now(), + }).where(eq(xhsUser.user_id, user_id)).returning(); + } + ctx.body = user; +}).addTo(app); + + +app.route({ + path: 'xhs-users', + key: 'delete', + middleware: ['auth'], + description: '删除小红书用户, 参数: data.user_id 用户ID', + metadata: { + tags: ['小红书', '用户'], + } +}).define(async (ctx) => { + const { user_id } = ctx.query.data || {}; + if (!user_id) { + ctx.throw(400, 'user_id 参数缺失'); + } + const existing = await db.select().from(xhsUser).where(eq(xhsUser.user_id, user_id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的用户'); + } + await db.delete(xhsUser).where(eq(xhsUser.user_id, user_id)); + ctx.body = { success: true }; +}).addTo(app); + +app.route({ + path: 'xhs-users', + key: 'get', + middleware: ['auth'], + description: '获取单个小红书用户, 参数: data.user_id 用户ID', + metadata: { + tags: ['小红书', '用户'], + } +}).define(async (ctx) => { + const { user_id } = ctx.query.data || {}; + if (!user_id) { + ctx.throw(400, 'user_id 参数缺失'); + } + const existing = await db.select().from(xhsUser).where(eq(xhsUser.user_id, user_id)).limit(1); + if (existing.length === 0) { + ctx.throw(404, '没有找到对应的用户'); + } + ctx.body = existing[0]; +}).addTo(app); diff --git a/src/test/zwpy/index.ts b/src/test/zwpy/index.ts new file mode 100644 index 0000000..a44b829 --- /dev/null +++ b/src/test/zwpy/index.ts @@ -0,0 +1,29 @@ +import { chromium } from 'playwright'; +import { main } from '../../playwright/browser.ts'; +import path from 'node:path'; +const checkUrl = 'https://pg.zwpyyds.com/pindou' +const userDataDir = path.join(process.cwd(), 'browser-data-zwpy'); +// const chromeProcess = await main({ +// userDataDir: path.join(process.cwd(), 'browser-data-zwpy'), +// debugPort: 9223, +// }); + + +// await new Promise(resolve => setTimeout(resolve, 3000)); + +// const browser = await chromium.connectOverCDP('http://localhost:9223'); +// const context = browser.contexts()[0]; +// const page = context.pages()[0] || await context.newPage(); +// await page.goto(checkUrl, { waitUntil: 'networkidle' }); + +// await page.route('**/*', (route) => { +// const request = route.request(); +// console.log(`请求URL: ${request.url()}`); +// route.continue(); +// }); + +const context = await chromium.launchPersistentContext(userDataDir, { + headless: false, +}); +const page = context.pages()[0] || await context.newPage(); +await page.goto(checkUrl, { waitUntil: 'networkidle' }); \ No newline at end of file