const parser = require('@/utils/rss-parser');
const cheerio = require('cheerio');
const got = require('@/utils/got');
const { parseDate } = require('@/utils/parse-date');
const chromeMobileUserAgent = require('@/utils/rand-user-agent')({ browser: 'chrome', os: 'android', device: 'mobile' });
module.exports = async (ctx) => {
const categoryId = ctx.params.category_id;
const rssUrl = `https://www.scmp.com/rss/${categoryId}/feed`;
const feed = await parser.parseURL(rssUrl);
const items = await Promise.all(
feed.items.map((item) =>
ctx.cache.tryGet(item.link, async () => {
// Fetch the AMP version
const url = item.link.replace(/^https:\/\/www\.scmp\.com/, 'https://amp.scmp.com');
const response = await got(url, {
headers: {
'User-Agent': chromeMobileUserAgent,
},
});
const html = response.data;
const $ = cheerio.load(html);
const content = $('div.article-body.clearfix');
// Cover
const cover = $('.article-images > amp-carousel > .i-amphtml-slides-container >.i-amphtml-slide-item > amp-img > img');
if (cover.length > 0) {
$(`
`).insertBefore(content[0].childNodes[0]);
$(cover).remove();
}
// Summary
const summary = $('div.article-header__subhead > ul');
// Metadata (categories & updatedAt)
const updatedAt = $('meta[itemprop="dateModified"]').attr('content');
const publishedAt = item.pubDate || $('meta[itemprop="datePublished"]').attr('content');
const categories = $('meta[name="keywords"]')
.attr('content')
.split(',')
.map((c) => c.trim());
// Images
content.find('amp-img').each((i, e) => {
const img = $(`
`);
// Caption follows, no need to handle caption
$(img).insertBefore(e);
$(e).remove();
});
// iframes (youtube videos and interactive elements)
content.find('amp-iframe').each((i, e) => {
if ($(e).find('iframe').length > 0) {
const iframe = $(e).find('iframe')[0];
$(iframe).insertBefore(e);
$(e).remove();
}
});
content.find('div.video-wrapper > amp-iframe').each((i, e) => {
const iframe = $(`