Initial commit

This commit is contained in:
2025-02-20 19:49:38 +08:00
parent 48b908a5d1
commit 8f0be3c165
61 changed files with 4226 additions and 8 deletions

View File

@@ -0,0 +1,308 @@
import { fileIsExist } from '@kevisual/use-config';
import { useFileStore } from '@kevisual/use-config/file-store';
import fs from 'fs';
import path from 'path';
export const baseUrl = 'https://cn.nikkei.com';
export const taskName = 'nikkei';
export const tabs = [
{
name: '日经精选',
href: '/',
children: [
{
href: '/top/nian-du-pan-dian-zhan-wang.html',
text: '年度盘点展望',
},
{
href: '/top/2021-04-20-01-47-39.html',
text: '日本企业研究',
},
{
href: '/top/2020-08-25-06-34-55.html?types[0]=8',
text: '半导体/AI',
},
{
href: '/top/2019-08-29-06-18-57.html',
text: '中日深度观察',
},
{
href: '/top/201604-3.html',
text: '日本游',
},
{
href: '/top/2021-03-03-07-02-53.html?types[0]=8',
text: '脱碳经济',
},
{
href: '/top/bp.html?types[0]=8',
text: '日经BP精选',
},
{
href: '/top/ft.html?types[0]=8',
text: 'FT中文网精选',
},
{
href: '/top/foa2024.html?types[0]=8',
text: '亚洲的未来',
},
],
},
{
name: '中国',
href: '/china.html',
children: [
{
href: '/china/ceconomy.html',
text: '经济',
},
{
href: '/china/ccompany.html',
text: '企业',
},
{
href: '/china/cfinancial.html',
text: '金融市场',
},
{
href: '/china/cpolicssociety.html',
text: '政治/社会',
},
],
},
{
name: '政经观察',
href: '/politicsaeconomy.html',
children: [
{
href: '/politicsaeconomy/epolitics.html',
text: '宏观经济',
},
{
href: '/politicsaeconomy/economic-policy.html',
text: '经济政策',
},
{
href: '/politicsaeconomy/stockforex.html',
text: '股市/外汇',
},
{
href: '/politicsaeconomy/investtrade.html',
text: '投资/贸易',
},
{
href: '/politicsaeconomy/efinance.html',
text: '金融',
},
{
href: '/politicsaeconomy/commodity.html',
text: '大宗商品',
},
{
href: '/politicsaeconomy/politicsasociety.html',
text: '政治/社会',
},
],
},
{
name: '产业聚焦',
href: '/industry.html',
children: [
{
href: '/industry/icar.html',
text: '汽车',
},
{
href: '/industry/itelectric-appliance.html',
text: 'IT/家电',
},
{
href: '/industry/ienvironment.html',
text: '环境/能源',
},
{
href: '/industry/manufacturing.html',
text: '工业',
},
{
href: '/industry/agriculture.html',
text: '农林水产',
},
{
href: '/industry/propertiesconstruction.html',
text: '地产/建设',
},
{
href: '/industry/tradingretail.html',
text: '商业/消费',
},
{
href: '/industry/scienceatechnology.html',
text: '科学/技术',
},
{
href: '/industry/management-strategy.html',
text: '经营/战略',
},
],
},
{
name: '新产品',
href: '/product.html',
children: [
{
href: '/product/pdigital.html',
text: '数码与家电',
},
{
href: '/product/automobile.html',
text: '汽车',
},
{
href: '/product/beautyahealth.html',
text: '美容与健康',
},
{
href: '/product/prime-goods.html',
text: '美品精选',
},
{
href: '/product/joke-goods.html',
text: '非凡创意',
},
],
},
{
name: '穿梭日本',
href: '/trend.html',
children: [
{
href: '/trend/cool-japan.html',
text: '酷日本',
},
{
href: '/trend/beautyahealth.html',
text: '美容健身',
},
{
href: '/trend/traditional-culture.html',
text: '文化精粹',
},
{
href: '/trend/tourism.html',
text: '日本逍遥行',
},
],
},
{
name: '专栏/观点',
href: '/columnviewpoint.html',
children: [
{
href: '/columnviewpoint/tearoom.html',
text: '中日茶坊',
},
{
href: '/columnviewpoint/columns-b.html',
text: '肖敏捷论中日',
},
{
href: '/columnviewpoint/column-special1.html',
text: '日本人小声说',
},
{
href: '/columnviewpoint/liudicolumn.html',
text: '刘迪观察',
},
{
href: '/columnviewpoint/zhangshicolumn.html',
text: '张石的樱雪鸿泥',
},
{
href: '/columnviewpoint/kelongcolumn.html',
text: '老柯要说话',
},
{
href: '/columnviewpoint/criticism.html',
text: '社评',
},
{
href: '/columnviewpoint/viewpoint.html',
text: '观点',
},
{
href: '/columnviewpoint/column.html',
text: '专栏',
},
],
},
{
name: '职场/深造',
href: '/career.html',
children: [
{
href: '/career/humanresource.html',
text: '人才活用',
},
{
href: '/career/employment.html',
text: '就业',
},
{
href: '/career/abroadstudy.html',
text: '留学/教育',
},
],
},
];
export const allTab = tabs.flatMap((tab) => {
return tab.children.map((child) => {
return {
parentName: tab.name,
parentHref: tab.href,
...child,
};
});
});
export const exampleTab = {
parentName: '日经精选',
parentHref: '/',
href: '/top/bp.html?types[0]=8',
text: '日经BP精选',
};
export const crawlConfigPathRawDir = `./config/${taskName}`;
export const crawlDataPathDir = `./data/${taskName}`;
export const crawlConfigPathRaw = path.join(crawlConfigPathRawDir, 'crawl-config.json');
export const crawlDataPath = useFileStore(crawlDataPathDir, {
needExists: true,
});
export const crawlConfigPath = useFileStore(crawlConfigPathRawDir, {
needExists: true,
});
export const getCrawlConfig = () => {
try {
const config = fs.readFileSync(crawlConfigPathRaw, 'utf-8');
return JSON.parse(config);
} catch (error) {
setCrawlConfig({
currentTabIndex: 0,
});
return {
currentTabIndex: 0,
};
}
};
export const setCrawlConfig = (config: any) => {
if (!fileIsExist(crawlConfigPathRawDir)) {
fs.mkdirSync(crawlConfigPathRawDir, { recursive: true });
}
fs.writeFileSync(crawlConfigPathRaw, JSON.stringify(config, null, 2));
};

View File

@@ -0,0 +1,223 @@
import * as cheerio from 'cheerio';
import * as puppeteer from 'puppeteer';
import fs from 'fs';
import { allTab, baseUrl, exampleTab, taskName } from './constants.ts';
import { getCrawlConfig, setCrawlConfig } from './constants.ts';
// Import the errors object from puppeteer
const crawlConfig = getCrawlConfig();
export const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
let errorCount = 5;
let browser: puppeteer.Browser;
let currentTabIndex = crawlConfig.currentTabIndex || 0;
const initializeBrowser = async () => {
if (!browser) {
browser = await puppeteer.launch({
headless: true, // 继续使用无头模式
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled', // 禁用自动化控制标志
],
ignoreDefaultArgs: ['--enable-automation'],
devtools: false,
defaultViewport: {
width: 1280,
height: 720,
},
});
}
};
const closeBrowser = async () => {
if (browser) {
await browser.close();
}
};
const getPageContent = async (link: string, retryCount = 3) => {
if (!browser) {
await initializeBrowser();
}
let page = await browser.newPage();
// 设置用户代理和语言,模拟常规浏览器
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36');
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9',
});
console.log('start crawl tab', currentTabIndex, link);
try {
await page.goto(link, { timeout: 120000 }); // 将超时时间增加到120秒
} catch (error) {
if (error instanceof puppeteer.TimeoutError) {
console.log('Timeout error when accessing:', link);
await page.close();
if (retryCount > 0) {
console.log('Retrying...', retryCount, 'attempts left');
await sleep(10000); // 等待10秒
await browser.close();
browser = null;
await sleep(10000); // 等待10秒
return await getPageContent(link, retryCount - 1); // Retry with decremented retryCount
} else {
console.log('Skipping after 3 retries:', link);
return null; // Return null or handle as needed when retries are exhausted
}
} else {
console.log('Non-timeout error:', error.message);
await page.close();
throw error; // Re-throw non-timeout errors to handle them in the calling function
}
}
console.log('end crawl tab', currentTabIndex, link);
const html = await page.content();
await page.close();
return html;
};
export const crawlTab = async (link: string, deep = true): Promise<LinkTitleTimeCrawl[]> => {
try {
const html = await getPageContent(link);
const $ = cheerio.load(html);
// class为style01 mB10的div
const title = $('.style01.mB10').text();
// 获取id为contentDiv的所有的文本内容
const contentDiv = $('#contentDiv');
// contentDiv 下面的class为newsText的内容
const content = contentDiv.find('.newsText').text();
// class为pagenavbar的是分页导航
const pageNavbar = $('.pagenavbar');
const aList = pageNavbar.find('a');
const aListArray = aList.toArray();
let aListFullLink = aListArray
.map((item) => {
const a = $(item);
const href = a.attr('href');
return getFullLink(href);
})
.filter((item) => {
// 过滤 ?start=0 和 当前的链接
return !item.includes('?start=0') && item !== link;
});
// 去重复
let page = [];
if (deep && aListFullLink.length > 0) {
aListFullLink = [...new Set(aListFullLink)];
console.log('aListFullLink', aListFullLink);
for (const item of aListFullLink) {
const one = await crawlTab(item, false);
page.push(...one);
}
page = page.flat();
return [...page, { title, content, link: link, deep: deep }];
}
errorCount = 0;
return [{ title, content, link: link, deep: deep }, ...page];
} catch (e) {
// 如果是超时错误
if (e instanceof puppeteer.TimeoutError) {
console.error('crawlTab error', 'currentTabIndex', currentTabIndex, 'link', link, 'error', e.message);
await closeBrowser();
console.error('close browser', 'errorCount', errorCount);
// 重试
errorCount++;
if (errorCount > 3) {
console.error('crawlTab error and return empty', 'currentTabIndex', currentTabIndex, link);
return [];
}
await sleep(3000);
return await crawlTab(link, deep);
} else {
console.error('crawlTab other error', 'currentTabIndex', currentTabIndex, 'link', link, 'error', e.message);
return [];
}
}
};
export const getFullLink = (link: string) => {
return `${baseUrl}${link}`;
};
type LinkTitleTime = {
link: string;
fullLink?: string;
title: string;
time: string;
crawlTime: string;
crawl?: { title: string; content: string; link: string; page: string; deep: boolean }[];
};
type LinkTitleTimeCrawl = { title: string; content: string; link: string; page: string; deep: boolean };
export const crawlTabList = async (link: string, fileName?: string) => {
await initializeBrowser();
const html = await getPageContent(link);
const $ = cheerio.load(html);
// 获取id为contentDiv的所有的文本内容
const title = $('h1').text();
const newsContent02 = $('.newsContent02');
const linkTitleTimeList: LinkTitleTime[] = [];
const nowTime = new Date().getTime();
newsContent02.find('dt').each((index, element) => {
const link = $(element).find('a').attr('href');
const title = $(element).find('a').text();
let time = $(element).find('span').text();
console.log('Link:', link);
console.log('Title:', title);
console.log('Time:', time);
// time包含的时候去掉(和)
time = time.replace(/\(/, '');
time = time.replace(/\)/, '');
const createTime = new Date(time).getTime();
// 当前的时间和createTime的差值有效期在二周内
const diffTime = nowTime - createTime;
if (diffTime > 14 * 24 * 60 * 60 * 1000) {
console.log('time is out of date', time);
return;
}
linkTitleTimeList.push({ link, title, fullLink: getFullLink(link), time, crawlTime: new Date().toISOString() });
});
// console.log('linkTitleTimeList', linkTitleTimeList, linkTitleTimeList.length);
linkTitleTimeList.length && console.log('current linkTitleTimeList', linkTitleTimeList.length);
for (const item of linkTitleTimeList) {
const fullLink = getFullLink(item.link);
//
item.crawl = await crawlTab(fullLink, true);
}
fileName = fileName || 'a.json';
const filePath = `./data/${taskName}/${fileName}`;
fs.writeFileSync(filePath, JSON.stringify(linkTitleTimeList, null, 2));
return title;
};
const tabExampleUrl = `${baseUrl}${exampleTab.href}`;
const startTime = new Date().getTime();
const crawlAllTab = async () => {
await initializeBrowser();
console.time(`${taskName} crawlAllTab`);
for (let i = currentTabIndex; i < allTab.length; i++) {
const tab = allTab[i];
const link = `${baseUrl}${tab.href}`;
console.log('crawl tab', tab.parentName, tab.text, link);
try {
await crawlTabList(link, `${currentTabIndex}.json`);
} catch (error) {
console.log('crawl tab error', link, error);
}
const diffTime = (new Date().getTime() - startTime) / 1000;
console.log('crawl tab end', tab.parentName, tab.text, link, 'run-time:', diffTime, 's\n\n');
currentTabIndex++;
setCrawlConfig({
currentTabIndex,
});
}
await closeBrowser();
setCrawlConfig({
currentTabIndex: 0,
});
console.timeEnd(`${taskName} crawlAllTab`);
};
crawlAllTab();
// crawlTabList(tabExampleUrl, 'a.json');

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,111 @@
console.log(name);
<div class="banner">
<ul class="bannerList fix" id="bannerList">
<li class="bannerCho on">
<h2><a href="/" target="_blank">日经精选</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/top/nian-du-pan-dian-zhan-wang.html" target="_blank">年度盘点展望</a></li>
<li><a href="/top/2021-04-20-01-47-39.html" target="_blank">日本企业研究</a></li>
<li><a href="/top/2020-08-25-06-34-55.html?types[0]=8" target="_blank">半导体/AI</a></li>
<li><a href="/top/2019-08-29-06-18-57.html" target="_blank">中日深度观察</a></li>
<li><a href="/top/201604-3.html" target="_blank">日本游</a></li>
<li><a href="/top/2021-03-03-07-02-53.html?types[0]=8" target="_blank">脱碳经济</a></li>
<li><a href="/top/bp.html?types[0]=8" target="_blank">日经BP精选</a></li>
<li><a href="/top/ft.html?types[0]=8" target="_blank">FT中文网精选</a></li>
<li class="end"><a href="/top/foa2024.html?types[0]=8" target="_blank">亚洲的未来</a></li>
</ul>
</div>
</li>
<li class="bannerChi">
<h2><a href="/china.html" target="_blank">中国</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/china/ceconomy.html" target="_blank">经济</a></li>
<li><a href="/china/ccompany.html" target="_blank">企业</a></li>
<li><a href="/china/cfinancial.html" target="_blank">金融市场</a></li>
<li class="end"><a href="/china/cpolicssociety.html" target="_blank">政治/社会</a></li>
</ul>
</div>
</li>
<li class="bannerObs">
<h2><a href="/politicsaeconomy.html" target="_blank">政经观察</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/politicsaeconomy/epolitics.html" target="_blank">宏观经济</a></li>
<li><a href="/politicsaeconomy/economic-policy.html" target="_blank">经济政策</a></li>
<li><a href="/politicsaeconomy/stockforex.html" target="_blank">股市/外汇</a></li>
<li><a href="/politicsaeconomy/investtrade.html" target="_blank">投资/贸易</a></li>
<li><a href="/politicsaeconomy/efinance.html" target="_blank">金融</a></li>
<li><a href="/politicsaeconomy/commodity.html" target="_blank">大宗商品</a></li>
<li class="end"><a href="/politicsaeconomy/politicsasociety.html" target="_blank">政治/社会</a></li>
</ul>
</div>
</li>
<li class="bannerInd">
<h2><a href="/industry.html" target="_blank">产业聚焦</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/industry/icar.html" target="_blank">汽车</a></li>
<li><a href="/industry/itelectric-appliance.html" target="_blank">IT/家电</a></li>
<li><a href="/industry/ienvironment.html" target="_blank">环境/能源</a></li>
<li><a href="/industry/manufacturing.html" target="_blank">工业</a></li>
<li><a href="/industry/agriculture.html" target="_blank">农林水产</a></li>
<li><a href="/industry/propertiesconstruction.html" target="_blank">地产/建设</a></li>
<li><a href="/industry/tradingretail.html" target="_blank">商业/消费</a></li>
<li><a href="/industry/scienceatechnology.html" target="_blank">科学/技术</a></li>
<li class="end"><a href="/industry/management-strategy.html" target="_blank">经营/战略</a></li>
</ul>
</div>
</li>
<li class="bannerNpr">
<h2><a href="/product.html" target="_blank">新产品</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/product/pdigital.html" target="_blank">数码与家电</a></li>
<li><a href="/product/automobile.html" target="_blank">汽车</a></li>
<li><a href="/product/beautyahealth.html" target="_blank">美容与健康</a></li>
<li><a href="/product/prime-goods.html" target="_blank">美品精选</a></li>
<li class="end"><a href="/product/joke-goods.html" target="_blank">非凡创意</a></li>
</ul>
</div>
</li>
<li class="bannerCjp">
<h2><a href="/trend.html" target="_blank">穿梭日本</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/trend/cool-japan.html" target="_blank">酷日本</a></li>
<li><a href="/trend/beautyahealth.html" target="_blank">美容健身</a></li>
<li><a href="/trend/traditional-culture.html" target="_blank">文化精粹</a></li>
<li class="end"><a href="/trend/tourism.html" target="_blank">日本逍遥行</a></li>
</ul>
</div>
</li>
<li class="bannerPoi">
<h2><a href="/columnviewpoint.html" target="_blank">专栏/观点</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/columnviewpoint/tearoom.html" target="_blank">中日茶坊</a></li>
<li><a href="/columnviewpoint/columns-b.html" target="_blank">肖敏捷论中日</a></li>
<li><a href="/columnviewpoint/column-special1.html" target="_blank">日本人小声说</a></li>
<li><a href="/columnviewpoint/liudicolumn.html" target="_blank">刘迪观察</a></li>
<li><a href="/columnviewpoint/zhangshicolumn.html" target="_blank">张石的樱雪鸿泥</a></li>
<li><a href="/columnviewpoint/kelongcolumn.html" target="_blank">老柯要说话</a></li>
<li><a href="/columnviewpoint/criticism.html" target="_blank">社评</a></li>
<li><a href="/columnviewpoint/viewpoint.html" target="_blank">观点</a></li>
<li class="end"><a href="/columnviewpoint/column.html" target="_blank">专栏</a></li>
</ul>
</div>
</li>
<li class="bannerJob">
<h2><a href="/career.html" target="_blank">职场/深造</a></h2>
<div>
<ul class="fix" style="width:980">
<li><a href="/career/humanresource.html" target="_blank">人才活用</a></li>
<li><a href="/career/employment.html" target="_blank">就业</a></li>
<li class="end"><a href="/career/abroadstudy.html" target="_blank">留学/教育</a></li>
</ul>
</div>
</li>
</ul>
</div>ƒ

View File

@@ -0,0 +1,302 @@
[
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/nian-du-pan-dian-zhan-wang.html",
"text": "年度盘点展望"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/2021-04-20-01-47-39.html",
"text": "日本企业研究"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/2020-08-25-06-34-55.html?types[0]=8",
"text": "半导体/AI"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/2019-08-29-06-18-57.html",
"text": "中日深度观察"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/201604-3.html",
"text": "日本游"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/2021-03-03-07-02-53.html?types[0]=8",
"text": "脱碳经济"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/bp.html?types[0]=8",
"text": "日经BP精选"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/ft.html?types[0]=8",
"text": "FT中文网精选"
},
{
"parentName": "日经精选",
"parentHref": "/",
"href": "/top/foa2024.html?types[0]=8",
"text": "亚洲的未来"
},
{
"parentName": "中国",
"parentHref": "/china.html",
"href": "/china/ceconomy.html",
"text": "经济"
},
{
"parentName": "中国",
"parentHref": "/china.html",
"href": "/china/ccompany.html",
"text": "企业"
},
{
"parentName": "中国",
"parentHref": "/china.html",
"href": "/china/cfinancial.html",
"text": "金融市场"
},
{
"parentName": "中国",
"parentHref": "/china.html",
"href": "/china/cpolicssociety.html",
"text": "政治/社会"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/epolitics.html",
"text": "宏观经济"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/economic-policy.html",
"text": "经济政策"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/stockforex.html",
"text": "股市/外汇"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/investtrade.html",
"text": "投资/贸易"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/efinance.html",
"text": "金融"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/commodity.html",
"text": "大宗商品"
},
{
"parentName": "政经观察",
"parentHref": "/politicsaeconomy.html",
"href": "/politicsaeconomy/politicsasociety.html",
"text": "政治/社会"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/icar.html",
"text": "汽车"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/itelectric-appliance.html",
"text": "IT/家电"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/ienvironment.html",
"text": "环境/能源"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/manufacturing.html",
"text": "工业"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/agriculture.html",
"text": "农林水产"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/propertiesconstruction.html",
"text": "地产/建设"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/tradingretail.html",
"text": "商业/消费"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/scienceatechnology.html",
"text": "科学/技术"
},
{
"parentName": "产业聚焦",
"parentHref": "/industry.html",
"href": "/industry/management-strategy.html",
"text": "经营/战略"
},
{
"parentName": "新产品",
"parentHref": "/product.html",
"href": "/product/pdigital.html",
"text": "数码与家电"
},
{
"parentName": "新产品",
"parentHref": "/product.html",
"href": "/product/automobile.html",
"text": "汽车"
},
{
"parentName": "新产品",
"parentHref": "/product.html",
"href": "/product/beautyahealth.html",
"text": "美容与健康"
},
{
"parentName": "新产品",
"parentHref": "/product.html",
"href": "/product/prime-goods.html",
"text": "美品精选"
},
{
"parentName": "新产品",
"parentHref": "/product.html",
"href": "/product/joke-goods.html",
"text": "非凡创意"
},
{
"parentName": "穿梭日本",
"parentHref": "/trend.html",
"href": "/trend/cool-japan.html",
"text": "酷日本"
},
{
"parentName": "穿梭日本",
"parentHref": "/trend.html",
"href": "/trend/beautyahealth.html",
"text": "美容健身"
},
{
"parentName": "穿梭日本",
"parentHref": "/trend.html",
"href": "/trend/traditional-culture.html",
"text": "文化精粹"
},
{
"parentName": "穿梭日本",
"parentHref": "/trend.html",
"href": "/trend/tourism.html",
"text": "日本逍遥行"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/tearoom.html",
"text": "中日茶坊"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/columns-b.html",
"text": "肖敏捷论中日"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/column-special1.html",
"text": "日本人小声说"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/liudicolumn.html",
"text": "刘迪观察"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/zhangshicolumn.html",
"text": "张石的樱雪鸿泥"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/kelongcolumn.html",
"text": "老柯要说话"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/criticism.html",
"text": "社评"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/viewpoint.html",
"text": "观点"
},
{
"parentName": "专栏/观点",
"parentHref": "/columnviewpoint.html",
"href": "/columnviewpoint/column.html",
"text": "专栏"
},
{
"parentName": "职场/深造",
"parentHref": "/career.html",
"href": "/career/humanresource.html",
"text": "人才活用"
},
{
"parentName": "职场/深造",
"parentHref": "/career.html",
"href": "/career/employment.html",
"text": "就业"
},
{
"parentName": "职场/深造",
"parentHref": "/career.html",
"href": "/career/abroadstudy.html",
"text": "留学/教育"
}
]

6
src/crawl/nikkei/main.ts Normal file
View File

@@ -0,0 +1,6 @@
// 抓取nikkei网站的新闻
// import { crawlUrl } from '../constants'