fix(route): thepaper changes its site to nextjs based backend, so change the feeder (#10850)

* fix(route) thepaper now use nextjs as backend

* Id to Name for channel and list

* summary ok

* determine the detail by null check instead of type check

* list_url for better reading

* add failback of thumbnail

* fix video article

* add list for thepaper

* add list for thepaper

* del the dup line

* small fix and optimize

* add maintainer
This commit is contained in:
Felix Hsu
2022-09-21 22:53:50 +08:00
committed by GitHub
parent 00d6a689d9
commit fe50f9cb18
8 changed files with 217 additions and 71 deletions

View File

@@ -1,55 +1,70 @@
const cheerio = require('cheerio');
const date = require('@/utils/date');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');
const got = require('@/utils/got');
const { art } = require('@/utils/render');
const path = require('path');
module.exports = {
ProcessFeed: async (query, link, ctx) => {
const res = await got(link);
ProcessItem: async (item, ctx) => {
if (item.link) {
// external link
return {
title: item.name,
link: item.link,
description: item.name,
pubDate: parseDate(item.pubTimeLong),
media: {
content: {
url: item.pic,
},
},
};
} else {
const itemUrl = `https://m.thepaper.cn/detail/${item.contId}`;
return await ctx.cache.tryGet(itemUrl, async () => {
const res = await got(itemUrl);
const data = JSON.parse(cheerio.load(res.data)('#__NEXT_DATA__').html());
const detailData = data.props.pageProps.detailData;
const $ = cheerio.load(res.data);
const list = $(query).slice(0, 10).get();
const contentDetail = detailData.contentDetail || detailData.liveDetail;
let description = contentDetail.content || contentDetail.summary;
return Promise.all(
list.map((item) => {
const $ = cheerio.load(item);
const itemUrl = `https://m.thepaper.cn/${$(item).find('a').eq(0).attr('href')}`;
return ctx.cache.tryGet(itemUrl, async () => {
const res = await got(itemUrl);
const content = cheerio.load(res.data);
if (contentDetail.videos) {
description =
art(path.join(__dirname, 'templates/video_detail.art'), {
videos: contentDetail.videos,
}) + description;
}
let description, pubDate;
if (content('div.news_video_msg').length > 0) {
description = content('#vdetail_sum').html();
pubDate = timezone(
parseDate(
content('div.news_video_msg')
.html()
.replace(/ /gi, '')
.split('<br>')[0]
),
+8
);
} else if (content('#slider_wrapper_ul').length > 0) {
description = '';
pubDate = new Date(date($(item).find('div.list_item_extra span').eq(1).text())).toUTCString();
} else {
description = content('div.newsdetail_content').html();
pubDate = timezone(parseDate(content('div.date').text().trim().split('来源:')[0].trim()), +8);
return {
title: contentDetail.name,
link: itemUrl,
description,
pubDate: parseDate(contentDetail.pubTime),
author: contentDetail.author,
media: {
content: {
url: item.pic || contentDetail.sharePic || (contentDetail.videos && contentDetail.videos.coverUrl),
},
thumbnails: {
url: item.pic || contentDetail.sharePic,
},
},
};
});
}
},
ChannelIdToName: (nodeId, next_data) => next_data.props.appProps.menu.channelList.find((c) => c.nodeId.toString() === nodeId.toString()).name,
ListIdToName: (listId, next_data) => {
const channelList = next_data.props.appProps.menu.channelList;
for (const c of channelList) {
if (c.childNodeList && c.childNodeList.length > 0) {
for (const l of c.childNodeList) {
if (l.nodeId.toString() === listId.toString()) {
return l.name;
}
const single = {
title: content('title').text(),
link: itemUrl,
description,
pubDate,
author: content('div.author').text(),
};
return single;
});
})
);
}
}
}
},
};