const parser = require('@/utils/rss-parser'); const cheerio = require('cheerio'); const got = require('@/utils/got'); const { parseDate } = require('@/utils/parse-date'); const chromeMobileUserAgent = require('@/utils/rand-user-agent')({ browser: 'chrome', os: 'android', device: 'mobile' }); module.exports = async (ctx) => { const categoryId = ctx.params.category_id; const rssUrl = `https://www.scmp.com/rss/${categoryId}/feed`; const feed = await parser.parseURL(rssUrl); const items = await Promise.all( feed.items.map((item) => ctx.cache.tryGet(item.link, async () => { // Fetch the AMP version const url = item.link.replace(/^https:\/\/www\.scmp\.com/, 'https://amp.scmp.com'); const response = await got(url, { headers: { 'User-Agent': chromeMobileUserAgent, }, }); const html = response.data; const $ = cheerio.load(html); const content = $('div.article-body.clearfix'); // Cover const cover = $('.article-images > amp-carousel > .i-amphtml-slides-container >.i-amphtml-slide-item > amp-img > img'); if (cover.length > 0) { $(``).insertBefore(content[0].childNodes[0]); $(cover).remove(); } // Summary const summary = $('div.article-header__subhead > ul'); // Metadata (categories & updatedAt) const updatedAt = $('meta[itemprop="dateModified"]').attr('content'); const publishedAt = item.pubDate || $('meta[itemprop="datePublished"]').attr('content'); const categories = $('meta[name="keywords"]') .attr('content') .split(',') .map((c) => c.trim()); // Images content.find('amp-img').each((i, e) => { const img = $(`${e.attribs.alt}`); // Caption follows, no need to handle caption $(img).insertBefore(e); $(e).remove(); }); // iframes (youtube videos and interactive elements) content.find('amp-iframe').each((i, e) => { if ($(e).find('iframe').length > 0) { const iframe = $(e).find('iframe')[0]; $(iframe).insertBefore(e); $(e).remove(); } }); content.find('div.video-wrapper > amp-iframe').each((i, e) => { const iframe = $(`