diff --git a/docs/en/traditional-media.md b/docs/en/traditional-media.md index 79a406e9de..c7d7e6199a 100644 --- a/docs/en/traditional-media.md +++ b/docs/en/traditional-media.md @@ -259,27 +259,56 @@ Parameters can be obtained from the official website, for instance: ## Reuters -### Channel +::: warning Migration notes - +1. Reuters Chinese site (`cn.reuters.com`) and British site (`uk.reuters.com`) have been terminated, redirecting to the main site (`www.reuters.com`) +2. The old routes are deprecated. Please migrate to the new routes documented below -Supported sub-sites: +::: -- 中国分站 `cn`: - See [路透社中国分站](../traditional-media.html#lu-tou-she) +### Category/Topic/Author -- US site `us`: - | Business | Markets | World | Politics | Tech | Breakingviews | Wealth | Life | - | -------- | ------- | ----- | -------- | ---------- | ------------- | ------ | --------- | - | business | markets | world | politics | technology | breakingviews | wealth | lifestyle | + -- UK site `uk`: - | Business | World | UK | Tech | Money | Breakingviews | Life | - | -------- | ----- | --- | ---------- | --------------- | ------------- | --------- | - | business | world | uk | technology | personalFinance | breakingviews | lifestyle | +- `:category`: + | World | Business | Legal | Markets | Breakingviews | Technology | + | -------- | ------- | ----- | -------- | ------------- | ---------- | + | world | business | legal | markets | breakingviews | technology | + + +- `world/:topic`: + + | All | Africa | Americas | Asia Pacific | China | Europe | India | Middle East | United Kingdom | United States | The Great Reboot | Reuters Next | + | ---- | ------- | -------- | ------------ | ----- | ------ | ----- | ----------- | -------------- | ------------- | ---------------- | ------------ | + | | africa | americas | asia-pacific | china | europe | india | middle-east | uk | us | the-great-reboot | reuters-next | + + +- `business/:topic`: + + | All | Aerospace & Defense | Autos & Transportation | Energy | Environment | Finance | Healthcare & Pharmaceuticals | Media & Telecom | Retail & Consumer | Sustainable Business | Charged | Future of Health | Future of Money | Take Five | Reuters Impact | + | ---- | ------------------- | ---------------------- | ------ | ----------- | ------- | ---------------------------- | --------------- | ----------------- | -------------------- | ------- | ---------------- | --------------- | --------- | -------------- | + | | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | futrue-of-money | take-five | reuters-impact | + +- `legal/:topic`: + + | All | Goverment | Legal Industry | Litigation | Transaction | + | --- | --------- | -------------- | ---------- | ----------- | + | | goverment | legalindustry | litigation | transaction | + +- `authors/:topic`: + + | Default | Jonathan Landay | any other authors | + | ------- | --------------- | ----------------- | + | reuters | jonathan-landay | their name in URL | + +More could be found in the URL of the category/topic page. +### Inverstigates + + + ## RTHK ### News diff --git a/docs/traditional-media.md b/docs/traditional-media.md index 53e221634c..d212aa84bc 100644 --- a/docs/traditional-media.md +++ b/docs/traditional-media.md @@ -1153,51 +1153,57 @@ category 对应的关键词有 ## 路透社 -### 实时资讯 +::: warning 迁移说明 - +1. 路透社中文网站 (`cn.reuters.com`) 和英国网站 (`uk.reuters.com`) 已经关闭,并重定向至主站 (`www.reuters.com`) +2. 旧路由已被废弃,请迁移至下面列出的新路由 -### 频道 +::: - +### 分类 / 话题 / 作者 -支持语言列表 + -- 中国分站 `cn`: +- `:category`: + | World | Business | Legal | Markets | Breakingviews | Technology | + | -------- | ------- | ----- | -------- | ------------- | ---------- | + | world | business | legal | markets | breakingviews | technology | - - 主频道: - | 深度分析 | 时事要闻 | 生活 | 投资 | - | -------- | ----------- | ---- | --------- | - | analyses | generalnews | life | investing | +- `world/:topic`: - - 资讯子频道: + | All | Africa | Americas | Asia Pacific | China | Europe | India | Middle East | United Kingdom | United States | The Great Reboot | Reuters Next | + | ---- | ------- | -------- | ------------ | ----- | ------ | ----- | ----------- | -------------- | ------------- | ---------------- | ------------ | + | | africa | americas | asia-pacific | china | europe | india | middle-east | uk | us | the-great-reboot | reuters-next | - | 中国财经 | 国际财经 | 新闻人物 | 财经视点 | - | ----- | --------------------- | --------- | -------- | - | china | internationalbusiness | newsmaker | opinions | - - 专栏子频道: +- `business/:topic`: - | 中国财经专栏 | 国际财经专栏 | 大宗商品专栏 | - | -------- | --------- | --------- | - | CnColumn | IntColumn | ComColumn | + | All | Aerospace & Defense | Autos & Transportation | Energy | Environment | Finance | Healthcare & Pharmaceuticals | Media & Telecom | Retail & Consumer | Sustainable Business | Charged | Future of Health | Future of Money | Take Five | Reuters Impact | + | ---- | ------------------- | ---------------------- | ------ | ----------- | ------- | ---------------------------- | --------------- | ----------------- | -------------------- | ------- | ---------------- | --------------- | --------- | -------------- | + | | aerospace-defense | autos-transportation | energy | environment | finance | healthcare-pharmaceuticals | media-telecom | retail-consumer | sustainable-business | charged | future-of-health | futrue-of-money | take-five | reuters-impact | -- 美国分站 `us`: +- `legal/:topic`: - - 主频道: + | All | Goverment | Legal Industry | Litigation | Transaction | + | --- | --------- | -------------- | ---------- | ----------- | + | | goverment | legalindustry | litigation | transaction | - | Business | Markets | World | Politics | Tech | Breakingviews | Wealth | Life | - | -------- | ------- | ----- | -------- | ---------- | ------------- | ------ | --------- | - | business | markets | world | politics | technology | breakingviews | wealth | lifestyle | -- 英国分站 `uk`: - - 主频道: +- `authors/:topic`: - | Business | Markets | World | UK | Tech | Money | Breakingviews | Sport | Life | - | -------- | ------- | ----- | -- | ---------- | --------------- | ------------- | ------ | --------- | - | business | markets | world | uk | technology | personalFinance | breakingviews | sports | lifestyle | + | 默认 | Jonathan Landay | 其他作者 | + | ------- | --------------- | ----------------- | + | reuters | jonathan-landay | 作者名,可在 URL 中找到 | + +可在分类 / 话题页的 URL 中找到更多。 + + + +### 深度调查栏目 + + diff --git a/lib/router.js b/lib/router.js index 7ac0c38db5..ad8c65eb66 100644 --- a/lib/router.js +++ b/lib/router.js @@ -2085,9 +2085,6 @@ router.get('/cug/gcxy/:type?', lazyloadRouteHandler('./routes/universities/cug/g // 海猫吧 router.get('/haimaoba/:id?', lazyloadRouteHandler('./routes/haimaoba/comics')); -// 路透社 -router.get('/reuters/channel/:site/:channel', lazyloadRouteHandler('./routes/reuters/channel')); - // 蒲公英 router.get('/pgyer/:app?', lazyloadRouteHandler('./routes/pgyer/app')); @@ -2976,9 +2973,6 @@ router.get('/dsb/area/:area', lazyloadRouteHandler('./routes/dsb/area')); // 靠谱新闻 router.get('/kaopunews/:language?', lazyloadRouteHandler('./routes/kaopunews')); -// Reuters -router.get('/reuters/theWire', lazyloadRouteHandler('./routes/reuters/theWire')); - // 格隆汇 router.get('/gelonghui/user/:id', lazyloadRouteHandler('./routes/gelonghui/user')); router.get('/gelonghui/subject/:id', lazyloadRouteHandler('./routes/gelonghui/subject')); diff --git a/lib/routes/reuters/channel.js b/lib/routes/reuters/channel.js deleted file mode 100644 index 5f054e6a62..0000000000 --- a/lib/routes/reuters/channel.js +++ /dev/null @@ -1,155 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const utils = require('./utils'); - -module.exports = async (ctx) => { - const { site } = ctx.params || 'cn'; - const homePage = `https://${site === 'us' ? 'www' : site}.reuters.com`; - - let title = 'Reuters ', - link = `https://${site === 'us' ? 'www' : site}.reuters.com/news/`, - linkSelector; - - const { channel } = ctx.params; - - if (channel) { - if (site === 'cn') { - title = '路透社 '; - linkSelector = '.inlineLinks a'; - switch (channel) { - case 'china': - title += '中国财经'; - break; - case 'internationalbusiness': - title += '国际财经'; - break; - case 'newsmaker': - title += '新闻人物'; - break; - case 'opinions': - title += '财经视点'; - break; - case 'analyses': - title += '深度分析'; - break; - case 'generalnews': - title += '时事要闻'; - break; - case 'CnColumn': - title += '中国财经专栏'; - break; - case 'ComColumn': - title += '大宗商品专栏'; - break; - case 'IntColumn': - title += '国际财经专栏'; - break; - case 'investing': - title += '投资'; - link = homePage + '/'; - break; - case 'life': - title += '生活'; - link = homePage + '/'; - break; - default: - break; - } - } else if (site === 'uk') { - title += 'UK '; - linkSelector = '.story-content a'; - switch (channel) { - case 'business': - title += 'Business'; - link = homePage; - break; - case 'markets': - linkSelector = '.moduleBody a, .story-content a'; - title += 'Markets'; - link = 'https://uk.reuters.com/business/'; - break; - case 'world': - title += 'World'; - break; - case 'uk': - title += 'UK'; - break; - case 'technology': - title += 'Tech'; - break; - case 'personalFinance': - title += 'Money'; - link = 'https://uk.reuters.com/business'; - break; - case 'breakingviews': - title += 'Breakingviews'; - link = homePage; - break; - case 'sports': - linkSelector = '.story-content a, .story-title a'; - title += 'Sport'; - break; - case 'lifestyle': - title += 'Life'; - break; - default: - break; - } - } else if (site === 'us') { - linkSelector = '.story-content a'; - switch (channel) { - case 'business': - title += 'Business'; - link = homePage; - break; - case 'markets': - linkSelector = '.moduleBody a, .story-content a'; - title += 'Markets'; - link = 'https://www.reuters.com/finance/'; - break; - case 'world': - title += 'World'; - break; - case 'politics': - title += 'Politics'; - link = 'https://www.reuters.com/'; - break; - case 'technology': - title += 'Tech'; - break; - case 'breakingviews': - title += 'Breakingviews'; - link = homePage; - break; - case 'wealth': - linkSelector = '.story-content a, .story-title a'; - title += 'Wealth'; - link = 'https://www.reuters.com/finance/'; - break; - case 'lifestyle': - title += 'Life'; - break; - default: - break; - } - } - link += channel; - } - - const response = await got.get(link); - - const $ = cheerio.load(response.data); - - const links = $(linkSelector) - .map((i, e) => homePage + e.attribs.href) - .splice(0, 10); - - const items = await Promise.all(links.map((link) => ctx.cache.tryGet(link, () => utils.ProcessFeed(link)))); - - ctx.state.data = { - title, - link, - description: title, - item: items, - }; -}; diff --git a/lib/routes/reuters/theWire.js b/lib/routes/reuters/theWire.js deleted file mode 100644 index 676d0ebfd2..0000000000 --- a/lib/routes/reuters/theWire.js +++ /dev/null @@ -1,45 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const date = require('@/utils/date'); - -module.exports = async (ctx) => { - const url = `https://cn.reuters.com/assets/jsonWireNews`; - const response = await got({ - method: 'get', - url, - }); - - const data = response.data; - const list_item = data.headlines.map((item) => { - const info = { - title: item.headline, - link: 'https://cn.reuters.com' + item.url, - pubDate: date(item.formattedDate), - }; - return info; - }); - - function getDescription(items) { - return Promise.all( - items.map(async (currentValue) => { - currentValue.description = await ctx.cache.tryGet(currentValue.link, async () => { - const r = await got({ - url: currentValue.link, - method: 'get', - }); - const $ = cheerio.load(r.data); - return $('.StandardArticle_content').html(); - }); - return currentValue; - }) - ); - } - - await getDescription(list_item).then(() => { - ctx.state.data = { - title: '路透社 - 实时资讯', - link: `https://cn.reuters.com/theWire`, - item: list_item, - }; - }); -}; diff --git a/lib/routes/reuters/utils.js b/lib/routes/reuters/utils.js deleted file mode 100644 index 1f834c721b..0000000000 --- a/lib/routes/reuters/utils.js +++ /dev/null @@ -1,72 +0,0 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); - -const ProcessFeed = async (link) => { - const response = await got.get(link); - const $ = cheerio.load(response.data); - - const pubDate = $('meta[property="og:article:published_time"]')[0].attribs.content; - const title = $('meta[property="og:title"]')[0].attribs.content; - const author = $('meta[property="og:article:author"]')[0].attribs.content; - const cover = $('meta[property="og:image"]')[0].attribs.content; - const description = $('.ArticleBodyWrapper'); - // if the article's cover photo is not the meaningless logo - if (cover !== 'https://s4.reutersmedia.net/resources_v2/images/rcom-default.png') { - const image = $('.PrimaryAsset_container img'); - - if (image.length > 0) { - image[0].attribs.src = cover; - } - } - - // handle slideshows and videos - const pageDataPattern = new RegExp(`(?<=)`); - const pageDataFind = response.data.match(pageDataPattern); - if (pageDataFind) { - const pageData = JSON.parse(response.data.match(pageDataFind)[0]); - - // keys of this json has random tails, so we have to iterate through them - Object.keys(pageData).forEach((key) => { - // videos and slideshow appear only in `article_list` - if (!key.startsWith('article_list')) { - return; - } - - // add full-res pictures at the end - const slideshowData = pageData[key].first_article.images; - slideshowData && - slideshowData.forEach((imgData) => { - description.insertAfter(`
- ${imgData.title} -
${imgData.caption}
-
`); - }); - - // add full-res pictures at the beginning - const videoData = pageData[key].first_article.videos; - videoData && - videoData.forEach((videoData) => { - description.insertBefore(` - ${videoData.caption}`); - }); - }); - } - - // remove useless DOMs - description - .find( - '.Image_expand-button, .LazyImage_fallback, .Slideshow_container, .Slideshow_caption, .Slideshow_expand-button, .Attribution_container, .StandardArticleBody_trustBadgeContainer, div[class*="SocialTools"], div[class*="SocialTools"], div[class*="Slideshow"]' - ) - .remove(); - - return { link, author, pubDate, title, description: description.html() }; -}; - -module.exports = { - ProcessFeed, -}; diff --git a/lib/v2/reuters/common.js b/lib/v2/reuters/common.js new file mode 100644 index 0000000000..d585e1962e --- /dev/null +++ b/lib/v2/reuters/common.js @@ -0,0 +1,54 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const category = ctx.params.category; + const topic = ctx.params.topic ?? (category === 'authors' ? 'reuters' : ''); + + const rootUrl = 'https://www.reuters.com'; + const currentUrl = topic ? `${rootUrl}/${category}/${topic}/` : `${rootUrl}/${category}/`; + const response = await got(currentUrl); + const $ = cheerio.load(response.data); + + let list = $('.media-story-card__body__3tRWy a.media-story-card__heading__eqhp9') + .map((_, item) => { + item = $(item); + item.find('span.visually-hidden__hidden__2qXMW').remove(); + return { + title: item.text(), + link: rootUrl + item.prop('href'), + }; + }) + .get(); + if (!list.length) { + const metadata = $('script#fusion-metadata').html(); + const metadataObj = JSON.parse(metadata.match(/Fusion.globalContent=(\{[\s\S]*?});/)[1]); + const articles = metadataObj.arcResult?.articles ?? metadataObj.result?.articles ?? []; + list = articles.map((article) => ({ + title: article.title, + link: rootUrl + article.canonical_url, + })); + } + const items = await Promise.all( + list.map((item) => + ctx.cache.tryGet(item.link, async () => { + const detailResponse = await got(item.link); + const content = cheerio.load(detailResponse.data); + + item.title = content('meta[property="og:title"]').attr('content') || item.title; + item.description = content('p[data-testid="paragraph-0"]').text(); + item.pubDate = parseDate(content('meta[name="article:published_time"]').attr('content')); + item.author = content('meta[name="article:author"]').attr('content'); + + return item; + }) + ) + ); + + ctx.state.data = { + title: topic ? `Reuters - ${category} - ${topic}` : `Reuters - ${category}`, + link: currentUrl, + item: items, + }; +}; diff --git a/lib/v2/reuters/investigates.js b/lib/v2/reuters/investigates.js new file mode 100644 index 0000000000..7be1a2799c --- /dev/null +++ b/lib/v2/reuters/investigates.js @@ -0,0 +1,39 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const rootUrl = 'https://www.reuters.com'; + const currentUrl = `${rootUrl}/investigates`; + const response = await got(currentUrl); + + const $ = cheerio.load(response.data); + + const list = $('article.section-article-container.row') + .map((_, item) => ({ + title: $(item).find('h2.subtitle').text(), + link: $(item).find('a.row.d-flex').prop('href'), + })) + .get(); + const items = await Promise.all( + list.map((item) => + ctx.cache.tryGet(item.link, async () => { + const detailResponse = await got(item.link); + const content = cheerio.load(detailResponse.data); + + item.title = content('title').text(); + item.description = content('#paragraph-0').text(); + item.pubDate = parseDate(content('time[itemprop="datePublished"]').attr('datetime')); + item.author = content('meta[property="og:article:publisher"]').attr('content'); + + return item; + }) + ) + ); + + ctx.state.data = { + title: $('h1.series-subtitle').text(), + link: currentUrl, + item: items, + }; +}; diff --git a/lib/v2/reuters/maintainer.js b/lib/v2/reuters/maintainer.js new file mode 100644 index 0000000000..f3b905d8b2 --- /dev/null +++ b/lib/v2/reuters/maintainer.js @@ -0,0 +1,6 @@ +module.exports = { + '/investigates': ['LyleLee'], + '/reuters/channel/:site/:channel': ['LyleLee'], // deprecated + '/reuters/theWire': ['LyleLee'], // deprecated + '/:category/:topic?': ['LyleLee', 'HenryQW', 'proletarius101', 'black-desk'], +}; diff --git a/lib/v2/reuters/migration_prompt.js b/lib/v2/reuters/migration_prompt.js new file mode 100644 index 0000000000..d906ffc0f2 --- /dev/null +++ b/lib/v2/reuters/migration_prompt.js @@ -0,0 +1,3 @@ +module.exports = () => { + throw Error('The route has been deprecated. Please refer to the docs for more details.'); +}; diff --git a/lib/v2/reuters/radar.js b/lib/v2/reuters/radar.js new file mode 100644 index 0000000000..6cdb481690 --- /dev/null +++ b/lib/v2/reuters/radar.js @@ -0,0 +1,19 @@ +module.exports = { + 'reuters.com': { + _name: '路透社', + '.': [ + { + title: '分类/话题/作者', + docs: 'https://docs.rsshub.app/traditional-media.html#lu-tou-she', + source: ['/:category/:topic?', '/'], + target: '/reuters/:category/:topic?', + }, + { + title: '深度调查栏目', + docs: 'https://docs.rsshub.app/traditional-media.html#lu-tou-she', + source: ['/investigates'], + target: '/reuters/inverstigates', + }, + ], + }, +}; diff --git a/lib/v2/reuters/router.js b/lib/v2/reuters/router.js new file mode 100644 index 0000000000..e6968701dc --- /dev/null +++ b/lib/v2/reuters/router.js @@ -0,0 +1,6 @@ +module.exports = (router) => { + router.get('/channel/:site/:channel', require('./migration_prompt')); // deprecated + router.get('/theWire', require('./migration_prompt')); // deprecated + router.get('/investigates', require('./investigates')); + router.get('/:category/:topic?', require('./common')); +};