diff --git a/docs/en/other.md b/docs/en/other.md index fe27986a59..ea9355faff 100644 --- a/docs/en/other.md +++ b/docs/en/other.md @@ -51,6 +51,12 @@ Official Website: [https://www.ssm.gov.mo/apps1/PreventWuhanInfection/en.aspx](h +### Yahoo Japan COVID19 news collection + +Official Website: + + + ## Darwin Awards ### Articles diff --git a/docs/other.md b/docs/other.md index babd3206e8..dd44c98f9e 100644 --- a/docs/other.md +++ b/docs/other.md @@ -870,6 +870,12 @@ type 为 all 时,category 参数不支持 cost 和 free +### Yahoo Japan 新型コロナウイルス感染症まとめ + +新闻主页: + + + ## 新趣集 > 官方 Feed 地址为: diff --git a/lib/router.js b/lib/router.js index 2383030b4d..c335b8675f 100644 --- a/lib/router.js +++ b/lib/router.js @@ -2566,6 +2566,7 @@ router.get('/coronavirus/nhc', lazyloadRouteHandler('./routes/coronavirus/nhc')) router.get('/coronavirus/mogov-2019ncov/:lang', lazyloadRouteHandler('./routes/coronavirus/mogov-2019ncov')); router.get('/coronavirus/qq/fact', lazyloadRouteHandler('./routes/tencent/factcheck')); router.get('/coronavirus/sg-moh', lazyloadRouteHandler('./routes/coronavirus/sg-moh')); +router.get('/coronavirus/yahoo-japan/:tdfk?', lazyloadRouteHandler('./routes/coronavirus/yahoo-japan')); // 南京林业大学教务处 router.get('/njfu/jwc/:category?', lazyloadRouteHandler('./routes/universities/njfu/jwc')); diff --git a/lib/routes/coronavirus/yahoo-japan.js b/lib/routes/coronavirus/yahoo-japan.js new file mode 100644 index 0000000000..50aa1fae7f --- /dev/null +++ b/lib/routes/coronavirus/yahoo-japan.js @@ -0,0 +1,77 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const dayjs = require('dayjs'); + +module.exports = async (ctx) => { + const tdfk = ctx.params.tdfk || false; + const uri = tdfk ? `https://news.yahoo.co.jp/pages/article/covid19${tdfk}` : `https://news.yahoo.co.jp/pages/article/20200207`; + const req_header = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; SM-G960F Build/PPR1.180610.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.157 Mobile Safari/537.36' }; + + const resp = await got({ + method: 'get', + url: uri, + headers: req_header, + }); + + const $ = cheerio.load(resp.data); + + const this_year = dayjs().year(); + const this_month = dayjs().month() + 1; + + const art_uri = []; + $('#layoutFooter ul.dlpThumbLink a').each((i, e) => { + const link = $(e).attr('href'); + const text = $(e).find('.dlpThumbText span').eq(0).text(); + const author = $(e).find('.dlpQuote').text(); + const date = $(e).find('.dlpDate').text() + ' +9'; // explicit timezone + + let date_obj = dayjs(date).year(this_year); + + if (date_obj.month() + 1 > this_month) { + // if the article is from the last year + date_obj = date_obj.year(this_year - 1); + } + + art_uri.push({ l: link, t: text, a: author, d: date_obj.toString() }); + }); + + const getNews = async (uri) => { + const ret = { + link: uri.l, + title: uri.t, + author: uri.a, + pubDate: uri.d, + description: null, + }; + + if (!uri.l.includes('//news.yahoo.co.jp')) { + return ret; // do not process uncertain pages + } + + const page_data = await ctx.cache.tryGet(uri.l, async () => { + const resp = await got({ + method: 'get', + url: uri.l, + headers: req_header, + }); + return resp.data; + }); + + const $doc = cheerio.load(page_data); + + const iso_date = $doc('meta[name="pubdate"]').attr('content'); + ret.pubDate = dayjs(iso_date).toString(); + ret.description = $doc('div.article_body').html() || $doc('meta[name="description"]').attr('content'); + + return ret; + }; + + const items = await Promise.all(art_uri.map(getNews)); + + ctx.state.data = { + title: $('title').text(), + link: uri, + description: $('meta[name="description"]').attr('content'), + item: items, + }; +};