feat(route): add yahoo japan covid19 news collection 日本疫情消息汇总 (#8099)

This commit is contained in:
sgqy
2021-11-27 16:44:17 +09:00
committed by GitHub
parent 0830c06c6a
commit 1e6c0cf282
4 changed files with 90 additions and 0 deletions

View File

@@ -51,6 +51,12 @@ Official Website: [https://www.ssm.gov.mo/apps1/PreventWuhanInfection/en.aspx](h
<RouteEn author="Gnnng" example="/coronavirus/sg-moh" path="/coronavirus/sg-moh"/>
### Yahoo Japan COVID19 news collection
Official Website: <https://news.yahoo.co.jp/pages/article/20200207>
<RouteEn author="sgqy" example="/coronavirus/yahoo-japan" path="/coronavirus/yahoo-japan/:tdfk?" :paramsDesc="['Romaji of Todofuken. Can be got from URLs on area detail page. Example: kyoto']"/>
## Darwin Awards
### Articles

View File

@@ -870,6 +870,12 @@ type 为 all 时category 参数不支持 cost 和 free
<Route author="Gnnng" example="/coronavirus/sg-moh" path="/coronavirus/sg-moh"/>
### Yahoo Japan 新型コロナウイルス感染症まとめ
新闻主页:<https://news.yahoo.co.jp/pages/article/20200207>
<Route author="sgqy" example="/coronavirus/yahoo-japan" path="/coronavirus/yahoo-japan/:tdfk?" :paramsDesc="['都道府県的拼音,可从地图详情页的链接中获取。例如:京都府 = kyoto']"/>
## 新趣集
> 官方 Feed 地址为: <https://xinquji.com/rss>

View File

@@ -2566,6 +2566,7 @@ router.get('/coronavirus/nhc', lazyloadRouteHandler('./routes/coronavirus/nhc'))
router.get('/coronavirus/mogov-2019ncov/:lang', lazyloadRouteHandler('./routes/coronavirus/mogov-2019ncov'));
router.get('/coronavirus/qq/fact', lazyloadRouteHandler('./routes/tencent/factcheck'));
router.get('/coronavirus/sg-moh', lazyloadRouteHandler('./routes/coronavirus/sg-moh'));
router.get('/coronavirus/yahoo-japan/:tdfk?', lazyloadRouteHandler('./routes/coronavirus/yahoo-japan'));
// 南京林业大学教务处
router.get('/njfu/jwc/:category?', lazyloadRouteHandler('./routes/universities/njfu/jwc'));

77
lib/routes/coronavirus/yahoo-japan.js vendored Normal file
View File

@@ -0,0 +1,77 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const dayjs = require('dayjs');
module.exports = async (ctx) => {
const tdfk = ctx.params.tdfk || false;
const uri = tdfk ? `https://news.yahoo.co.jp/pages/article/covid19${tdfk}` : `https://news.yahoo.co.jp/pages/article/20200207`;
const req_header = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; SM-G960F Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.157 Mobile Safari/537.36' };
const resp = await got({
method: 'get',
url: uri,
headers: req_header,
});
const $ = cheerio.load(resp.data);
const this_year = dayjs().year();
const this_month = dayjs().month() + 1;
const art_uri = [];
$('#layoutFooter ul.dlpThumbLink a').each((i, e) => {
const link = $(e).attr('href');
const text = $(e).find('.dlpThumbText span').eq(0).text();
const author = $(e).find('.dlpQuote').text();
const date = $(e).find('.dlpDate').text() + ' +9'; // explicit timezone
let date_obj = dayjs(date).year(this_year);
if (date_obj.month() + 1 > this_month) {
// if the article is from the last year
date_obj = date_obj.year(this_year - 1);
}
art_uri.push({ l: link, t: text, a: author, d: date_obj.toString() });
});
const getNews = async (uri) => {
const ret = {
link: uri.l,
title: uri.t,
author: uri.a,
pubDate: uri.d,
description: null,
};
if (!uri.l.includes('//news.yahoo.co.jp')) {
return ret; // do not process uncertain pages
}
const page_data = await ctx.cache.tryGet(uri.l, async () => {
const resp = await got({
method: 'get',
url: uri.l,
headers: req_header,
});
return resp.data;
});
const $doc = cheerio.load(page_data);
const iso_date = $doc('meta[name="pubdate"]').attr('content');
ret.pubDate = dayjs(iso_date).toString();
ret.description = $doc('div.article_body').html() || $doc('meta[name="description"]').attr('content');
return ret;
};
const items = await Promise.all(art_uri.map(getNews));
ctx.state.data = {
title: $('title').text(),
link: uri,
description: $('meta[name="description"]').attr('content'),
item: items,
};
};