Files
RSSHub/lib/v2/stdaily/digitalpaper.js
lyqluis 6c89b3804a feat(route): add 科技日报 (#12235)
* feat(route): add 科技日报

* feat(route): add 科技日报
2023-04-07 20:17:07 +08:00

126 lines
4.0 KiB
JavaScript

const path = require('path');
const { art } = require('@/utils/render');
const got = require('@/utils/got');
const cheerio = require('cheerio');
const renderDescription = ({ subtitle, quotation, pics, article }) =>
art(path.join(__dirname, 'templates/description.art'), {
subtitle,
quotation,
pics,
article,
});
const getPageLength = async (url) => {
const response = await got({
method: 'get',
url,
});
const $ = cheerio.load(response.data);
const pageLength = $('.right1 .bmname').children().length;
return { pageLength, $ };
};
const getArticleList = ($, paperUrl) => {
const pageName = $('.zi .zi-top .banci strong').text();
const list = $('.zi-meat ul>li')
.map((_, item) => {
const link = $(item).find('a').attr('href');
const title = $(item).find('a div').text();
return {
link: `${paperUrl}/${link}`,
title: `[${pageName}] ${title}`,
// pubDate,
};
})
.get();
return list;
};
const createGetPageArticleList = (paperUrl, page = 1) => {
const getPageArticleList = async () => {
const currentUrl = `${paperUrl}/node_${page + 1}.htm`;
const response = await got({
method: 'get',
url: currentUrl,
});
const $ = cheerio.load(response.data);
return getArticleList($, paperUrl);
};
return getPageArticleList;
};
const getListArticles = async (list, cache) => {
const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const { data } = await got(item.link);
const $ = cheerio.load(data);
const quotation = $('.right-meat .yinti').text();
const subtitle = $('.right-meat .futi').text();
const article = $('.right-meat .tuwen .article #ozoom').html();
const pics = $('.right-meat .tuwen .picture')
.map((_, item) => {
const pic = {};
$(item)
.find('tr')
.map((_, row) => {
const src = $(row).find('img').attr('src');
if (src) {
pic.src = src;
} else {
pic.des = $(row).find('td').text();
}
return null;
})
.get();
return pic;
})
.get();
item.author = $('.right-meat .author').text();
item.description = renderDescription({ subtitle, quotation, article, pics });
return item;
})
)
);
return items;
};
module.exports = async (ctx) => {
const date = new Date();
const dateStr = date.toLocaleDateString('zh-CN', {
year: 'numeric',
month: '2-digit',
day: '2-digit',
});
const formatedDate = dateStr.replace('/', '-'); // 'yyyy-mm/dd'
const rootUrl = 'http://digitalpaper.stdaily.com/http_www.kjrb.com/kjrb/html';
const currentUrl = `${rootUrl}/${formatedDate}`;
// get page
const restPageLists = [];
const allPageArticleLists = [];
const { pageLength, $ } = await getPageLength(`${currentUrl}/node_2.htm`);
allPageArticleLists.push(...getArticleList($, currentUrl));
for (let i = 2; i <= pageLength; i++) {
restPageLists.push(createGetPageArticleList(currentUrl, i));
}
// get pages' article list
allPageArticleLists.push(...(await Promise.all(restPageLists.map((getPageArticleList) => getPageArticleList()))).flat());
// get all articles
const items = await getListArticles(allPageArticleLists, ctx.cache);
ctx.state.data = {
title: `科技日报`,
link: 'http://digitalpaper.stdaily.com',
item: items,
};
};