diff --git a/docs/traditional-media.md b/docs/traditional-media.md index 2791b13a19..0830a5f2cb 100644 --- a/docs/traditional-media.md +++ b/docs/traditional-media.md @@ -200,24 +200,30 @@ category 对应的关键词有 ### 即时新闻 - + -| 中港台 | 新加坡 | 国际 | 财经 | -| ------ | --------- | ----- | -------- | -| china | singapore | world | zfinance | +| 中国 | 新加坡 | 国际 | 财经 | +| ----- | --------- | ----- | -------- | +| china | singapore | world | zfinance | ### 新闻 - + -| 中港台 | 新加坡 | 东南亚 | 国际 | 体育 | -| ------------- | --------- | ------ | ------------- | ------ | -| greater-china | singapore | sea | international | sports | +| 中国 | 新加坡 | 东南亚 | 国际 | 体育 | 早报现在 | +| ----- | --------- | ------ | ----- | ------ | -------- | +| china | singapore | sea | world | sports | fukan | +### 其他栏目 + +除了上面两个兼容规则之外,联合早报网站里所有页面形如 这样的栏目都能被这个规则解析到,早报的大部分栏目都是这个样式的。你可以测试之后再订阅。 + + + ## 连线 Wired 非订阅用户每月有阅读全文次数限制。 diff --git a/lib/router.js b/lib/router.js index 9611b87db9..6e61d50763 100644 --- a/lib/router.js +++ b/lib/router.js @@ -720,8 +720,9 @@ router.get('/guokr/scientific', require('./routes/guokr/scientific')); router.get('/guokr/:category', require('./routes/guokr/calendar')); // 联合早报 -router.get('/zaobao/realtime/:type?', require('./routes/zaobao/realtime')); -router.get('/zaobao/znews/:type?', require('./routes/zaobao/znews')); +router.get('/zaobao/realtime/:section?', require('./routes/zaobao/realtime')); +router.get('/zaobao/znews/:section?', require('./routes/zaobao/znews')); +router.get('/zaobao/:type/:section', require('./routes/zaobao/')); // Apple router.get('/apple/exchange_repair/:country?', require('./routes/apple/exchange_repair')); diff --git a/lib/routes/zaobao/index.js b/lib/routes/zaobao/index.js new file mode 100644 index 0000000000..9b42d849b0 --- /dev/null +++ b/lib/routes/zaobao/index.js @@ -0,0 +1,17 @@ +const { parseList } = require('./util'); +const baseUrl = 'https://www.zaobao.com'; + +module.exports = async (ctx) => { + const type = ctx.params.type || 'realtime'; + const section = ctx.params.section || 'china'; + const sectionLink = `/${type}/${section}`; + + const { title, resultList } = await parseList(ctx, sectionLink); + + ctx.state.data = { + title: `《联合早报》${title}`, + link: baseUrl + sectionLink, + description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。', + item: resultList, + }; +}; diff --git a/lib/routes/zaobao/realtime.js b/lib/routes/zaobao/realtime.js index 6857ce9b6d..1289400a27 100644 --- a/lib/routes/zaobao/realtime.js +++ b/lib/routes/zaobao/realtime.js @@ -1,89 +1,28 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const baseUrl = 'https://www.zaobao.com.sg'; -const host = 'https://www.zaobao.com.sg/realtime'; -const got_ins = got.extend({ - headers: { - Referer: host, - }, -}); +const { parseList } = require('./util'); +const baseUrl = 'https://www.zaobao.com'; module.exports = async (ctx) => { - const type = ctx.params.type || 'china'; + const section = ctx.params.section || 'china'; - let info = '中港台'; - let word = '/realtime/china'; - let div = 'div#CN.list-sect-sub'; - if (type === '2') { - info = 'singapore'; - word = '/realtime/singapore'; - div = 'div#SG.list-sect-sub'; - } else if (type === 'world') { - info = '国际'; - word = '/realtime/world'; - div = 'div#Global.list-sect-sub'; - } else if (type === 'zfinance') { - info = '财经'; - word = '/zfinance/realtime'; - div = 'div#Finance.list-sect-sub'; + let name = '中港台'; + let sectionLink = '/realtime/china'; + if (section === 'singapore') { + name = '新加坡'; + sectionLink = '/realtime/singapore'; + } else if (section === 'world') { + name = '国际'; + sectionLink = '/realtime/world'; + } else if (section === 'zfinance') { + name = '财经'; + sectionLink = '/zfinance/realtime'; } - const response = await got_ins.get(host); - const $ = cheerio.load(response.data); - const data = $('li', div).find('div'); - // .attr('about') - const resultItems = await Promise.all( - data.toArray().map(async (item) => { - const $item = $(item); - const link = baseUrl + $item.attr('about'); - - let resultItem = {}; - - const value = await ctx.cache.get(link); - - if (value) { - resultItem = JSON.parse(value); - } else { - const article = await got_ins.get(link); - const $1 = cheerio.load(article.data); - const res = $1('.datestamp.date-updated.meta-date-updated', '.body-content') - .contents() - .filter(function() { - return this.nodeType === 3; - }) - .text() - .replace('年', '-') - .replace('月', '-') - .replace('日', ''); - - const yyyymmdd = res.replace('更新', '').toString(); - const hhmm = $item - .find('em') - .text() - .replace(/(.{2})/, '$1:'); - let description = ''; - $1('p', '.article-content-container').each(function() { - description = description + '

' + $(this).html() + '

'; - }); - - resultItem = { - title: $1('h1', '.body-content').text(), - description: description, - pubDate: new Date(yyyymmdd + hhmm).toUTCString(), - link: link, - }; - - ctx.cache.set(link, JSON.stringify(resultItem)); - } - // }; - return Promise.resolve(resultItem); - }) - ); + const { resultList } = await parseList(ctx, sectionLink); ctx.state.data = { - title: `《联合早报》${info} 即时`, - link: baseUrl + word, - description: '《联合早报》被公认是一份素质高、负责任、报道客观、言论公正、可信度高的报纸,对中国的发展采取积极的态度,在华人世界中享有崇高的信誉。', - item: resultItems, + title: `《联合早报》-${name}-即时`, + link: baseUrl + sectionLink, + description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。', + item: resultList, }; }; diff --git a/lib/routes/zaobao/util.js b/lib/routes/zaobao/util.js new file mode 100644 index 0000000000..4b214b9326 --- /dev/null +++ b/lib/routes/zaobao/util.js @@ -0,0 +1,80 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const baseUrl = 'https://www.zaobao.com'; +const got_ins = got.extend({ + headers: { + Referer: baseUrl, + }, +}); + +/** + * 通用解析页面类似 https://www.zaobao.com/realtime/china 的网站 + * + * @param {*} ctx RSSHub 的 ctx 参数,用来设置缓存 + * @param {string} sectionUrl 形如 /realtime/china 的字符串 + * @returns {Promise<{ + * title: string; + * resultList: { + * title: string; + * description: string; + * pubDate: string; + * link: string; + * }[];}>} 新闻标题以及新闻列表 + */ +const parseList = async (ctx, sectionUrl) => { + const response = await got_ins.get(baseUrl + sectionUrl); + const $ = cheerio.load(response.data); + const data = $('.row.list', '.post-list').find('.content'); + + const title = $('#breadcrumbs > a') + .toArray() + .reduce((acc, cV, cI) => { + if (cI > 0) { + return acc + '-' + $(cV).text(); + } + return ''; + }, ''); + const resultList = await Promise.all( + data.toArray().map(async (item) => { + const $item = $(item); + const link = baseUrl + $item.find('a')[0].attribs.href; + + let resultItem = {}; + + const value = await ctx.cache.get(link); + + if (value) { + resultItem = JSON.parse(value); + } else { + const article = await got_ins.get(link); + const $1 = cheerio.load(article.data); + + const time = $1('.datestamp.date-published.meta-date-published', '.body-content') + .text() + .replace('年', '-') + .replace('月', '-') + .replace('日', '') + .replace('发布/', ''); + + const description = $1('.article-content-container').html(); + + resultItem = { + title: $1('h1', '.body-content').text(), + description: description, + pubDate: new Date(time).toUTCString(), + link: link, + }; + ctx.cache.set(link, JSON.stringify(resultItem)); + } + + return Promise.resolve(resultItem); + }) + ); + return { + title: title, + resultList: resultList, + }; +}; +module.exports = { + parseList, +}; diff --git a/lib/routes/zaobao/znews.js b/lib/routes/zaobao/znews.js index 4d23c027ee..c4f983936a 100644 --- a/lib/routes/zaobao/znews.js +++ b/lib/routes/zaobao/znews.js @@ -1,81 +1,35 @@ -const got = require('@/utils/got'); -const cheerio = require('cheerio'); -const baseUrl = 'https://www.zaobao.com.sg'; -const got_ins = got.extend({ - headers: { - Referer: baseUrl, - }, -}); +const { parseList } = require('./util'); +const baseUrl = 'https://www.zaobao.com'; module.exports = async (ctx) => { - const type = ctx.params.type || 'greater-china'; + const section = ctx.params.section; - let info = '中港台'; - let word = '/znews/greater-china'; + let info = '中国'; + let sectionLink = '/news/china'; - if (type === 'singapore') { + if (section === 'singapore') { info = '新加坡'; - word = '/znews/singapore'; - } else if (type === 'international') { + sectionLink = '/news/singapore'; + } else if (section === 'world') { info = '国际'; - word = '/znews/international'; - } else if (type === 'sea') { + sectionLink = '/news/world'; + } else if (section === 'sea') { info = '东南亚'; - word = '/znews/sea'; - } else if (type === 'sports') { + sectionLink = '/news/sea'; + } else if (section === 'sports') { info = '体育'; - word = '/znews/sports'; + sectionLink = '/news/sports'; + } else if (section === 'fukan') { + info = '早报现在'; + sectionLink = '/news/fukan'; } - const response = await got_ins.get(baseUrl + word); - const $ = cheerio.load(response.data); - const data = $('.row.list', '.post-list').find('.col-md-8.col-sm-8.col-xs-8.content'); - - const resultItems = await Promise.all( - data.toArray().map(async (item) => { - const $item = $(item); - const link = baseUrl + $item.find('a')[1].attribs.href; - - let resultItem = {}; - - const value = await ctx.cache.get(link); - - if (value) { - resultItem = JSON.parse(value); - } else { - const article = await got_ins.get(link); - const $1 = cheerio.load(article.data); - const res = $1('.datestamp.date-published.meta-date-published', '.body-content') - .contents() - .text() - .replace('年', '-') - .replace('月', '-') - .replace('日', ''); - - const date = res.replace('发布/', '').toString(); - let description = ''; - $1('p', '.article-content-container').each(function() { - description = description + '

' + $(this).html() + '

'; - }); - - resultItem = { - title: $1('h1', '.body-content').text(), - description: description, - pubDate: new Date(date).toUTCString(), - link: link, - }; - - ctx.cache.set(link, JSON.stringify(resultItem)); - } - - return Promise.resolve(resultItem); - }) - ); + const { resultList } = await parseList(ctx, sectionLink); ctx.state.data = { - title: `《联合早报》${info} 新闻`, - link: baseUrl + word, - description: '《联合早报》被公认是一份素质高、负责任、报道客观、言论公正、可信度高的报纸,对中国的发展采取积极的态度,在华人世界中享有崇高的信誉。', - item: resultItems, + title: `《联合早报》-${info}-新闻`, + link: baseUrl + sectionLink, + description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。', + item: resultList, }; };