fix(route): thepaper changes its site to nextjs based backend, so change the feeder (#10850)

* fix(route) thepaper now use nextjs as backend

* Id to Name for channel and list

* summary ok

* determine the detail by null check instead of type check

* list_url for better reading

* add failback of thumbnail

* fix video article

* add list for thepaper

* add list for thepaper

* del the dup line

* small fix and optimize

* add maintainer
This commit is contained in:
Felix Hsu
2022-09-21 22:53:50 +08:00
committed by GitHub
parent 00d6a689d9
commit fe50f9cb18
8 changed files with 217 additions and 71 deletions

View File

@@ -1621,21 +1621,136 @@ category 对应的关键词有
### 首页头条 ### 首页头条
<Route author="HenryQW nczitzk" example="/thepaper/featured" path="/thepaper/featured"/> <Route author="HenryQW nczitzk bigfei" example="/thepaper/featured" path="/thepaper/featured"/>
### 频道 ### 频道
<Route author="xyqfer nczitzk" example="/thepaper/channel/25950" path="/thepaper/channel/:id" :paramsDesc="['频道 id可在频道页 URL 中找到']"> <Route author="xyqfer nczitzk bigfei" example="/thepaper/channel/25950" path="/thepaper/channel/:id" :paramsDesc="['频道 id可在频道页 URL 中找到']">
| 视频 | 时事 | 财经 | 思想 | 澎湃号 | 生活 | | 频道 ID | 频道名 |
| ----- | ----- | ----- | ----- | ----- | ----- | | ------ | --- |
| 26916 | 25950 | 25951 | 25952 | 36079 | 25953 | | 25949 | 要闻 |
| 26916 | 视频 |
| 108856 | 战疫 |
| 25950 | 时事 |
| 25951 | 财经 |
| 36079 | 澎湃号 |
| 119908 | 科技 |
| 25952 | 思想 |
| 119489 | 智库 |
| 25953 | 生活 |
| 26161 | 问吧 |
| -21 | 体育 |
| -24 | 评论 |
| -23 | 国际 |
</Route> </Route>
### 列表 ### 栏目
<Route author="nczitzk" example="/thepaper/list/25457" path="/thepaper/list/:id" :paramsDesc="['列表 id可在列表页 URL 中找到']"/> <Route author="nczitzk bigfei" example="/thepaper/list/25457" path="/thepaper/list/:id" :paramsDesc="['栏目 id可在栏目页 URL 中找到']">
| 栏目 ID | 栏目名 |
| ------ | ------- |
| 26912 | 上直播 |
| 26913 | 七环视频 |
| 26965 | 温度计 |
| 26908 | 一级视场 |
| 27260 | World 湃 |
| 26907 | 湃客科技 |
| 33168 | 纪录湃 |
| 26911 | 围观 |
| 26918 | @所有人 |
| 26906 | 大都会 |
| 26909 | 追光灯 |
| 26910 | 运动装 |
| 26914 | 健寻记 |
| 82188 | AI 播报 |
| 89035 | 眼界 |
| 92278 | 关键帧 |
| 90069 | 战疫 |
| 25462 | 中国政库 |
| 25488 | 中南海 |
| 97924 | 初心之路 |
| 25489 | 舆论场 |
| 25490 | 打虎记 |
| 25423 | 人事风向 |
| 25426 | 法治中国 |
| 25424 | 一号专案 |
| 25463 | 港台来信 |
| 25491 | 长三角政商 |
| 25428 | 直击现场 |
| 68750 | 公益湃 |
| 27604 | 暖闻 |
| 25464 | 澎湃质量报告 |
| 25425 | 绿政公署 |
| 25429 | 澎湃国际 |
| 25481 | 外交学人 |
| 25430 | 澎湃防务 |
| 25678 | 唐人街 |
| 25427 | 澎湃人物 |
| 25422 | 浦江头条 |
| 25487 | 教育家 |
| 25634 | 全景现场 |
| 25635 | 美数课 |
| 25600 | 快看 |
| 25434 | 10% 公司 |
| 25436 | 能见度 |
| 25433 | 地产界 |
| 25438 | 财经上下游 |
| 25435 | 金改实验室 |
| 25437 | 牛市点线面 |
| 119963 | IPO 最前线 |
| 25485 | 澎湃商学院 |
| 25432 | 自贸区连线 |
| 37978 | 进博会在线 |
| 36079 | 湃客 |
| 27392 | 政务 |
| 77286 | 媒体 |
| 27234 | 科学湃 |
| 119445 | 生命科学 |
| 119447 | 未来 2% |
| 119446 | 元宇宙观察 |
| 119448 | 科创 101 |
| 119449 | 科学城邦 |
| 25444 | 社论 |
| 27224 | 澎湃评论 |
| 26525 | 思想湃 |
| 26878 | 上海书评 |
| 25483 | 思想市场 |
| 25457 | 私家历史 |
| 25574 | 翻书党 |
| 25455 | 艺术评论 |
| 26937 | 古代艺术 |
| 25450 | 文化课 |
| 25482 | 逝者 |
| 25536 | 专栏 |
| 26506 | 异次元 |
| 97313 | 海平面 |
| 103076 | 一问三知 |
| 25445 | 澎湃研究所 |
| 25446 | 全球智库 |
| 26915 | 城市漫步 |
| 25456 | 市政厅 |
| 104191 | 世界会客厅 |
| 25448 | 有戏 |
| 26609 | 文艺范 |
| 25942 | 身体 |
| 26015 | 私・奔 |
| 25599 | 运动家 |
| 25842 | 私家地理 |
| 80623 | 非常品 |
| 26862 | 楼市 |
| 25769 | 生活方式 |
| 25990 | 澎湃联播 |
| 26173 | 视界 |
| 26202 | 亲子学堂 |
| 26404 | 赢家 |
| 26490 | 汽车圈 |
| 115327 | IP SH |
| 117340 | 酒业 |
</Route>
### 明查 ### 明查

View File

@@ -1,15 +1,18 @@
const utils = require('./utils'); const utils = require('./utils');
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => { module.exports = async (ctx) => {
const { id } = ctx.params; const { id } = ctx.params;
const rootUrl = 'https://m.thepaper.cn'; const channel_url = `https://m.thepaper.cn/channel/${id}`;
const response = await got(channel_url);
const link = id === '26916' ? `${rootUrl}/channel_26916` : `${rootUrl}/list_page.jsp?nodeid=${id}&isList=0&pageidx=1`; const data = JSON.parse(cheerio.load(response.data)('#__NEXT_DATA__').html());
const items = await utils.ProcessFeed(id === '26916' ? '.list_video_item' : '.list_item_infor', link, ctx); const list = data.props.pageProps.data.list;
const items = await Promise.all(list.map((item) => utils.ProcessItem(item, ctx)));
ctx.state.data = { ctx.state.data = {
title: `澎湃新闻频道 - ${id}`, title: `澎湃新闻频道 - ${utils.ChannelIdToName(id, data)}`,
link: `${rootUrl}/channel_${id}`, link: channel_url,
item: items, item: items,
}; };
}; };

View File

@@ -1,14 +1,17 @@
const utils = require('./utils'); const utils = require('./utils');
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => { module.exports = async (ctx) => {
const rootUrl = 'https://m.thepaper.cn'; const response = await got('https://m.thepaper.cn');
const data = JSON.parse(cheerio.load(response.data)('#__NEXT_DATA__').html());
const list = data.props.pageProps.data.list;
const link = `${rootUrl}/list_page.jsp?&nodeid=25949&isList=1&pageidx=1`; const items = await Promise.all(list.map((item) => utils.ProcessItem(item, ctx)));
const items = await utils.ProcessFeed('.list_item_infor', link, ctx);
ctx.state.data = { ctx.state.data = {
title: '澎湃新闻 - 首页头条', title: '澎湃新闻 - 首页头条',
link: `${rootUrl}/channel_25949`, link: 'https://m.thepaper.cn',
item: items, item: items,
}; };
}; };

View File

@@ -1,15 +1,19 @@
const utils = require('./utils'); const utils = require('./utils');
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => { module.exports = async (ctx) => {
const { id } = ctx.params; const { id } = ctx.params;
const rootUrl = 'https://m.thepaper.cn'; const list_url = `https://m.thepaper.cn/list/${id}`;
const response = await got(list_url);
const data = JSON.parse(cheerio.load(response.data)('#__NEXT_DATA__').html());
const list = data.props.pageProps.data.list;
const link = `${rootUrl}/list_page.jsp?nodeid=${id}&isList=1&pageidx=1`; const items = await Promise.all(list.map((item) => utils.ProcessItem(item, ctx)));
const items = await utils.ProcessFeed('.list_item_infor', link, ctx);
ctx.state.data = { ctx.state.data = {
title: `澎湃新闻列表 - ${id}`, title: `澎湃新闻栏目 - ${utils.ListIdToName(id, data)}`,
link: `${rootUrl}/list_${id}`, link: list_url,
item: items, item: items,
}; };
}; };

View File

@@ -1,8 +1,8 @@
module.exports = { module.exports = {
'/839studio/:id': ['umm233'], '/839studio/:id': ['umm233'],
'/839studio': ['umm233'], '/839studio': ['umm233'],
'/channel/:id': ['xyqfer', 'nczitzk'], '/channel/:id': ['xyqfer', 'nczitzk', 'bigfei'],
'/featured': ['HenryQW', 'nczitzk'], '/featured': ['HenryQW', 'nczitzk', 'bigfei'],
'/factpaper/:status?': ['nczitzk'], '/factpaper/:status?': ['nczitzk'],
'/list/:id': ['nczitzk'], '/list/:id': ['nczitzk', 'bigfei'],
}; };

View File

@@ -15,10 +15,10 @@ module.exports = {
target: (params, url) => `/thepaper/channel/${new URL(url).search(/channel_(\d+)/)}`, target: (params, url) => `/thepaper/channel/${new URL(url).search(/channel_(\d+)/)}`,
}, },
{ {
title: '列表', title: '栏目',
docs: 'https://docs.rsshub.app/traditional-media.html#peng-pai-xin-wen-lie-biao', docs: 'https://docs.rsshub.app/traditional-media.html#peng-pai-xin-wen-lie-biao',
source: ['/'], source: ['/'],
target: (params, url) => `/thepaper/channel/${new URL(url).search(/list_(\d+)/)}`, target: (params, url) => `/thepaper/list/${new URL(url).search(/list_(\d+)/)}`,
}, },
{ {
title: '澎湃美数组作品集', title: '澎湃美数组作品集',

View File

@@ -0,0 +1,6 @@
<video class="cont_video"
src="{{ videos.url }}"
controls webkit-playsinline="" playsinline="" x5-playsinline=""
x-webkit-airplay="allow" preload="metadata"
poster="{{ videos.coverUrl}}" >
</video>

View File

@@ -1,55 +1,70 @@
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const date = require('@/utils/date');
const { parseDate } = require('@/utils/parse-date'); const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');
const got = require('@/utils/got'); const got = require('@/utils/got');
const { art } = require('@/utils/render');
const path = require('path');
module.exports = { module.exports = {
ProcessFeed: async (query, link, ctx) => { ProcessItem: async (item, ctx) => {
const res = await got(link); if (item.link) {
// external link
return {
title: item.name,
link: item.link,
description: item.name,
pubDate: parseDate(item.pubTimeLong),
media: {
content: {
url: item.pic,
},
},
};
} else {
const itemUrl = `https://m.thepaper.cn/detail/${item.contId}`;
return await ctx.cache.tryGet(itemUrl, async () => {
const res = await got(itemUrl);
const data = JSON.parse(cheerio.load(res.data)('#__NEXT_DATA__').html());
const detailData = data.props.pageProps.detailData;
const $ = cheerio.load(res.data); const contentDetail = detailData.contentDetail || detailData.liveDetail;
const list = $(query).slice(0, 10).get(); let description = contentDetail.content || contentDetail.summary;
return Promise.all( if (contentDetail.videos) {
list.map((item) => { description =
const $ = cheerio.load(item); art(path.join(__dirname, 'templates/video_detail.art'), {
const itemUrl = `https://m.thepaper.cn/${$(item).find('a').eq(0).attr('href')}`; videos: contentDetail.videos,
return ctx.cache.tryGet(itemUrl, async () => { }) + description;
const res = await got(itemUrl); }
const content = cheerio.load(res.data);
let description, pubDate; return {
title: contentDetail.name,
if (content('div.news_video_msg').length > 0) { link: itemUrl,
description = content('#vdetail_sum').html(); description,
pubDate = timezone( pubDate: parseDate(contentDetail.pubTime),
parseDate( author: contentDetail.author,
content('div.news_video_msg') media: {
.html() content: {
.replace(/&nbsp;/gi, '') url: item.pic || contentDetail.sharePic || (contentDetail.videos && contentDetail.videos.coverUrl),
.split('<br>')[0] },
), thumbnails: {
+8 url: item.pic || contentDetail.sharePic,
); },
} else if (content('#slider_wrapper_ul').length > 0) { },
description = ''; };
pubDate = new Date(date($(item).find('div.list_item_extra span').eq(1).text())).toUTCString(); });
} else { }
description = content('div.newsdetail_content').html(); },
pubDate = timezone(parseDate(content('div.date').text().trim().split('来源:')[0].trim()), +8); ChannelIdToName: (nodeId, next_data) => next_data.props.appProps.menu.channelList.find((c) => c.nodeId.toString() === nodeId.toString()).name,
ListIdToName: (listId, next_data) => {
const channelList = next_data.props.appProps.menu.channelList;
for (const c of channelList) {
if (c.childNodeList && c.childNodeList.length > 0) {
for (const l of c.childNodeList) {
if (l.nodeId.toString() === listId.toString()) {
return l.name;
} }
}
const single = { }
title: content('title').text(), }
link: itemUrl,
description,
pubDate,
author: content('div.author').text(),
};
return single;
});
})
);
}, },
}; };