feat: 联合早报按规则订阅 (#3366)

This commit is contained in:
Artin
2019-10-31 18:52:01 +08:00
committed by DIYgod
parent 94357fc2d6
commit 07bf6054a0
6 changed files with 154 additions and 157 deletions

View File

@@ -200,24 +200,30 @@ category 对应的关键词有
### 即时新闻
<Route author="lengthmin" example="/zaobao/realtime/china" path="/zaobao/realtime/:type?" :paramsDesc="['分类, 缺省为中港台']">
<Route author="lengthmin" example="/zaobao/realtime/china" path="/zaobao/realtime/:type?" :paramsDesc="['分类, 缺省为 china']">
| 中港台 | 新加坡 | 国际 | 财经 |
| ------ | --------- | ----- | -------- |
| 中 | 新加坡 | 国际 | 财经 |
| ----- | --------- | ----- | -------- |
| china | singapore | world | zfinance |
</Route>
### 新闻
<Route author="lengthmin" example="/zaobao/znews/greater-china" path="/zaobao/znews/:type?" :paramsDesc="['分类, 缺省为中港台']">
<Route author="lengthmin" example="/zaobao/znews/china" path="/zaobao/znews/:type?" :paramsDesc="['分类, 缺省为 china']">
| 中港台 | 新加坡 | 东南亚 | 国际 | 体育 |
| ------------- | --------- | ------ | ------------- | ------ |
| greater-china | singapore | sea | international | sports |
| 中 | 新加坡 | 东南亚 | 国际 | 体育 | 早报现在 |
| ----- | --------- | ------ | ----- | ------ | -------- |
| china | singapore | sea | world | sports | fukan |
</Route>
### 其他栏目
除了上面两个兼容规则之外,联合早报网站里所有页面形如 <https://www.zaobao.com/wencui/politic> 这样的栏目都能被这个规则解析到,早报的大部分栏目都是这个样式的。你可以测试之后再订阅。
<Route author="lengthmin" example="/zaobao/wencui/politic" path="/zaobao/:type/:section" :paramsDesc="['https://www.zaobao.com/**wencui**/politic 中的 **wencui**', 'https://www.zaobao.com/wencui/**politic** 中的 **politic**']" />
## 连线 Wired
非订阅用户每月有阅读全文次数限制。

View File

@@ -720,8 +720,9 @@ router.get('/guokr/scientific', require('./routes/guokr/scientific'));
router.get('/guokr/:category', require('./routes/guokr/calendar'));
// 联合早报
router.get('/zaobao/realtime/:type?', require('./routes/zaobao/realtime'));
router.get('/zaobao/znews/:type?', require('./routes/zaobao/znews'));
router.get('/zaobao/realtime/:section?', require('./routes/zaobao/realtime'));
router.get('/zaobao/znews/:section?', require('./routes/zaobao/znews'));
router.get('/zaobao/:type/:section', require('./routes/zaobao/'));
// Apple
router.get('/apple/exchange_repair/:country?', require('./routes/apple/exchange_repair'));

View File

@@ -0,0 +1,17 @@
const { parseList } = require('./util');
const baseUrl = 'https://www.zaobao.com';
module.exports = async (ctx) => {
const type = ctx.params.type || 'realtime';
const section = ctx.params.section || 'china';
const sectionLink = `/${type}/${section}`;
const { title, resultList } = await parseList(ctx, sectionLink);
ctx.state.data = {
title: `《联合早报》${title}`,
link: baseUrl + sectionLink,
description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。',
item: resultList,
};
};

View File

@@ -1,89 +1,28 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const baseUrl = 'https://www.zaobao.com.sg';
const host = 'https://www.zaobao.com.sg/realtime';
const got_ins = got.extend({
headers: {
Referer: host,
},
});
const { parseList } = require('./util');
const baseUrl = 'https://www.zaobao.com';
module.exports = async (ctx) => {
const type = ctx.params.type || 'china';
const section = ctx.params.section || 'china';
let info = '中港台';
let word = '/realtime/china';
let div = 'div#CN.list-sect-sub';
if (type === '2') {
info = 'singapore';
word = '/realtime/singapore';
div = 'div#SG.list-sect-sub';
} else if (type === 'world') {
info = '国际';
word = '/realtime/world';
div = 'div#Global.list-sect-sub';
} else if (type === 'zfinance') {
info = '财经';
word = '/zfinance/realtime';
div = 'div#Finance.list-sect-sub';
let name = '中港台';
let sectionLink = '/realtime/china';
if (section === 'singapore') {
name = '新加坡';
sectionLink = '/realtime/singapore';
} else if (section === 'world') {
name = '国际';
sectionLink = '/realtime/world';
} else if (section === 'zfinance') {
name = '财经';
sectionLink = '/zfinance/realtime';
}
const response = await got_ins.get(host);
const $ = cheerio.load(response.data);
const data = $('li', div).find('div');
// .attr('about')
const resultItems = await Promise.all(
data.toArray().map(async (item) => {
const $item = $(item);
const link = baseUrl + $item.attr('about');
let resultItem = {};
const value = await ctx.cache.get(link);
if (value) {
resultItem = JSON.parse(value);
} else {
const article = await got_ins.get(link);
const $1 = cheerio.load(article.data);
const res = $1('.datestamp.date-updated.meta-date-updated', '.body-content')
.contents()
.filter(function() {
return this.nodeType === 3;
})
.text()
.replace('年', '-')
.replace('月', '-')
.replace('日', '');
const yyyymmdd = res.replace('更新', '').toString();
const hhmm = $item
.find('em')
.text()
.replace(/(.{2})/, '$1:');
let description = '';
$1('p', '.article-content-container').each(function() {
description = description + '<p>' + $(this).html() + '</p>';
});
resultItem = {
title: $1('h1', '.body-content').text(),
description: description,
pubDate: new Date(yyyymmdd + hhmm).toUTCString(),
link: link,
};
ctx.cache.set(link, JSON.stringify(resultItem));
}
// };
return Promise.resolve(resultItem);
})
);
const { resultList } = await parseList(ctx, sectionLink);
ctx.state.data = {
title: `《联合早报》${info} 即时`,
link: baseUrl + word,
description: '《联合早报》被公认是一份素质高、负责任、报道客观、言论公正、可信度高的报纸,对中国的发展采取积极的态度,在华人世界中享有崇高的信誉。',
item: resultItems,
title: `《联合早报》-${name}-即时`,
link: baseUrl + sectionLink,
description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。',
item: resultList,
};
};

80
lib/routes/zaobao/util.js Normal file
View File

@@ -0,0 +1,80 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const baseUrl = 'https://www.zaobao.com';
const got_ins = got.extend({
headers: {
Referer: baseUrl,
},
});
/**
* 通用解析页面类似 https://www.zaobao.com/realtime/china 的网站
*
* @param {*} ctx RSSHub 的 ctx 参数,用来设置缓存
* @param {string} sectionUrl 形如 /realtime/china 的字符串
* @returns {Promise<{
* title: string;
* resultList: {
* title: string;
* description: string;
* pubDate: string;
* link: string;
* }[];}>} 新闻标题以及新闻列表
*/
const parseList = async (ctx, sectionUrl) => {
const response = await got_ins.get(baseUrl + sectionUrl);
const $ = cheerio.load(response.data);
const data = $('.row.list', '.post-list').find('.content');
const title = $('#breadcrumbs > a')
.toArray()
.reduce((acc, cV, cI) => {
if (cI > 0) {
return acc + '-' + $(cV).text();
}
return '';
}, '');
const resultList = await Promise.all(
data.toArray().map(async (item) => {
const $item = $(item);
const link = baseUrl + $item.find('a')[0].attribs.href;
let resultItem = {};
const value = await ctx.cache.get(link);
if (value) {
resultItem = JSON.parse(value);
} else {
const article = await got_ins.get(link);
const $1 = cheerio.load(article.data);
const time = $1('.datestamp.date-published.meta-date-published', '.body-content')
.text()
.replace('年', '-')
.replace('月', '-')
.replace('日', '')
.replace('发布/', '');
const description = $1('.article-content-container').html();
resultItem = {
title: $1('h1', '.body-content').text(),
description: description,
pubDate: new Date(time).toUTCString(),
link: link,
};
ctx.cache.set(link, JSON.stringify(resultItem));
}
return Promise.resolve(resultItem);
})
);
return {
title: title,
resultList: resultList,
};
};
module.exports = {
parseList,
};

View File

@@ -1,81 +1,35 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const baseUrl = 'https://www.zaobao.com.sg';
const got_ins = got.extend({
headers: {
Referer: baseUrl,
},
});
const { parseList } = require('./util');
const baseUrl = 'https://www.zaobao.com';
module.exports = async (ctx) => {
const type = ctx.params.type || 'greater-china';
const section = ctx.params.section;
let info = '中港台';
let word = '/znews/greater-china';
let info = '中';
let sectionLink = '/news/china';
if (type === 'singapore') {
if (section === 'singapore') {
info = '新加坡';
word = '/znews/singapore';
} else if (type === 'international') {
sectionLink = '/news/singapore';
} else if (section === 'world') {
info = '国际';
word = '/znews/international';
} else if (type === 'sea') {
sectionLink = '/news/world';
} else if (section === 'sea') {
info = '东南亚';
word = '/znews/sea';
} else if (type === 'sports') {
sectionLink = '/news/sea';
} else if (section === 'sports') {
info = '体育';
word = '/znews/sports';
sectionLink = '/news/sports';
} else if (section === 'fukan') {
info = '早报现在';
sectionLink = '/news/fukan';
}
const response = await got_ins.get(baseUrl + word);
const $ = cheerio.load(response.data);
const data = $('.row.list', '.post-list').find('.col-md-8.col-sm-8.col-xs-8.content');
const resultItems = await Promise.all(
data.toArray().map(async (item) => {
const $item = $(item);
const link = baseUrl + $item.find('a')[1].attribs.href;
let resultItem = {};
const value = await ctx.cache.get(link);
if (value) {
resultItem = JSON.parse(value);
} else {
const article = await got_ins.get(link);
const $1 = cheerio.load(article.data);
const res = $1('.datestamp.date-published.meta-date-published', '.body-content')
.contents()
.text()
.replace('年', '-')
.replace('月', '-')
.replace('日', '');
const date = res.replace('发布/', '').toString();
let description = '';
$1('p', '.article-content-container').each(function() {
description = description + '<p>' + $(this).html() + '</p>';
});
resultItem = {
title: $1('h1', '.body-content').text(),
description: description,
pubDate: new Date(date).toUTCString(),
link: link,
};
ctx.cache.set(link, JSON.stringify(resultItem));
}
return Promise.resolve(resultItem);
})
);
const { resultList } = await parseList(ctx, sectionLink);
ctx.state.data = {
title: `《联合早报》${info} 新闻`,
link: baseUrl + word,
description: '《联合早报》被公认是一份素质高、负责任、报道客观、言论公正、可信度高的报纸,对中国的发展采取积极的态度,在华人世界中享有崇高的信誉。',
item: resultItems,
title: `《联合早报》-${info}-新闻`,
link: baseUrl + sectionLink,
description: '新加坡、中国、亚洲和国际的即时、评论、商业、体育、生活、科技与多媒体新闻,尽在联合早报。',
item: resultList,
};
};