mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-01 09:38:04 +08:00
126 lines
4.0 KiB
JavaScript
126 lines
4.0 KiB
JavaScript
const path = require('path');
|
|
const { art } = require('@/utils/render');
|
|
const got = require('@/utils/got');
|
|
const cheerio = require('cheerio');
|
|
|
|
const renderDescription = ({ subtitle, quotation, pics, article }) =>
|
|
art(path.join(__dirname, 'templates/description.art'), {
|
|
subtitle,
|
|
quotation,
|
|
pics,
|
|
article,
|
|
});
|
|
|
|
const getPageLength = async (url) => {
|
|
const response = await got({
|
|
method: 'get',
|
|
url,
|
|
});
|
|
|
|
const $ = cheerio.load(response.data);
|
|
const pageLength = $('.right1 .bmname').children().length;
|
|
return { pageLength, $ };
|
|
};
|
|
|
|
const getArticleList = ($, paperUrl) => {
|
|
const pageName = $('.zi .zi-top .banci strong').text();
|
|
const list = $('.zi-meat ul>li')
|
|
.map((_, item) => {
|
|
const link = $(item).find('a').attr('href');
|
|
const title = $(item).find('a div').text();
|
|
return {
|
|
link: `${paperUrl}/${link}`,
|
|
title: `[${pageName}] ${title}`,
|
|
// pubDate,
|
|
};
|
|
})
|
|
.get();
|
|
|
|
return list;
|
|
};
|
|
const createGetPageArticleList = (paperUrl, page = 1) => {
|
|
const getPageArticleList = async () => {
|
|
const currentUrl = `${paperUrl}/node_${page + 1}.htm`;
|
|
const response = await got({
|
|
method: 'get',
|
|
url: currentUrl,
|
|
});
|
|
|
|
const $ = cheerio.load(response.data);
|
|
return getArticleList($, paperUrl);
|
|
};
|
|
|
|
return getPageArticleList;
|
|
};
|
|
|
|
const getListArticles = async (list, cache) => {
|
|
const items = await Promise.all(
|
|
list.map((item) =>
|
|
cache.tryGet(item.link, async () => {
|
|
const { data } = await got(item.link);
|
|
const $ = cheerio.load(data);
|
|
|
|
const quotation = $('.right-meat .yinti').text();
|
|
const subtitle = $('.right-meat .futi').text();
|
|
const article = $('.right-meat .tuwen .article #ozoom').html();
|
|
const pics = $('.right-meat .tuwen .picture')
|
|
.map((_, item) => {
|
|
const pic = {};
|
|
$(item)
|
|
.find('tr')
|
|
.map((_, row) => {
|
|
const src = $(row).find('img').attr('src');
|
|
if (src) {
|
|
pic.src = src;
|
|
} else {
|
|
pic.des = $(row).find('td').text();
|
|
}
|
|
return null;
|
|
})
|
|
.get();
|
|
return pic;
|
|
})
|
|
.get();
|
|
|
|
item.author = $('.right-meat .author').text();
|
|
item.description = renderDescription({ subtitle, quotation, article, pics });
|
|
|
|
return item;
|
|
})
|
|
)
|
|
);
|
|
return items;
|
|
};
|
|
|
|
module.exports = async (ctx) => {
|
|
const date = new Date();
|
|
const dateStr = date.toLocaleDateString('zh-CN', {
|
|
year: 'numeric',
|
|
month: '2-digit',
|
|
day: '2-digit',
|
|
});
|
|
const formatedDate = dateStr.replace('/', '-'); // 'yyyy-mm/dd'
|
|
|
|
const rootUrl = 'http://digitalpaper.stdaily.com/http_www.kjrb.com/kjrb/html';
|
|
const currentUrl = `${rootUrl}/${formatedDate}`;
|
|
|
|
// get page
|
|
const restPageLists = [];
|
|
const allPageArticleLists = [];
|
|
const { pageLength, $ } = await getPageLength(`${currentUrl}/node_2.htm`);
|
|
allPageArticleLists.push(...getArticleList($, currentUrl));
|
|
for (let i = 2; i <= pageLength; i++) {
|
|
restPageLists.push(createGetPageArticleList(currentUrl, i));
|
|
}
|
|
// get pages' article list
|
|
allPageArticleLists.push(...(await Promise.all(restPageLists.map((getPageArticleList) => getPageArticleList()))).flat());
|
|
// get all articles
|
|
const items = await getListArticles(allPageArticleLists, ctx.cache);
|
|
|
|
ctx.state.data = {
|
|
title: `科技日报`,
|
|
link: 'http://digitalpaper.stdaily.com',
|
|
item: items,
|
|
};
|
|
};
|