feat: Add full article for AP News (#4103)

This commit is contained in:
zoenglinghou
2020-02-29 13:03:22 +01:00
committed by GitHub
parent 2f001cda60
commit 859ba6fdfd

View File

@@ -1,5 +1,6 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const url = require('url');
module.exports = async (ctx) => {
const topic = ctx.params.topic;
@@ -12,35 +13,59 @@ module.exports = async (ctx) => {
const data = response.data;
const $ = cheerio.load(data);
const list = $('div.FeedCard');
// const list = $('div.FeedCard');
const list = [];
$('div.FeedCard').each(function(index, item) {
if (
$(item)
.find('a[class^=Component-headline]')
.attr('href') !== undefined
) {
list.push(item);
}
});
const out = await Promise.all(
list.map(async (article) => {
const link = url.resolve(
'https://apnews.com',
$(article)
.find('a[class^=Component-headline]')
.attr('href')
);
const [title, author, pubDate, description] = await ctx.cache.tryGet(link, async () => {
const result = await got.get(link);
const $ = cheerio.load(result.data);
const head = JSON.parse($('script[type="application/ld+json"]').html());
const title = head.headline;
const author = head.author.join(' & ');
const pubDate = head.datePublished;
const text = $('div.Article').html();
const imageUrl = head.image;
const description = `<img src="${imageUrl}">` + text;
return [title, author, pubDate, description];
});
const item = {
title: title,
description: description,
pubDate: pubDate,
link: link,
author: author,
};
return Promise.resolve(item);
})
);
ctx.state.data = {
title: $('title').text(),
title: 'AP News - ' + $('title').text(),
link: `https://www.apnews.com/${topic}`,
item:
list &&
list
.map((index, item) => {
item = $(item);
return {
title: item
.find('h1[class^=Component-h1]')
.first()
.text(),
author: item
.find('span[class^=Component-bylines]')
.first()
.text()
.replace('By ', ''),
description: item
.find('div.content')
.first()
.text(),
pubDate: item.find('span[class^="Timestamp Component-root"]').attr('data-source'),
link: item.find('a[class^=Component-headline]').attr('href'),
};
})
.get(),
item: out,
};
};