mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-09 14:40:23 +08:00
feat: Update the Vulture endpoint to be able to generate a... (#4757)
This commit is contained in:
@@ -156,14 +156,7 @@ Provides a better reading experience (full text articles) over the official one.
|
||||
|
||||
## Vulture
|
||||
|
||||
<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:type/:excludetags?" :paramsDesc="['The sub-site name', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']">
|
||||
|
||||
Supported sub-sites:
|
||||
| TV | Movies | Comedy | Music | TV Recaps | Books | Theater | Art | Awards | Video |
|
||||
| --- | ------ | ------ | ----- | --------- | ----- | ------- | --- | ------ | ----- |
|
||||
| tv | movies | comedy | music | tvrecaps | books | theater | art | awards | video |
|
||||
|
||||
</RouteEn>
|
||||
<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:tag/:excludetags?" :paramsDesc="['Tag', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']" />
|
||||
|
||||
## World Health Organization | WHO
|
||||
|
||||
|
||||
@@ -2215,7 +2215,7 @@ router.get('/mastodon/timeline/:site/:only_media?', require('./routes/mastodon/t
|
||||
router.get('/aliyun-kernel/index', require('./routes/aliyun-kernel/index'));
|
||||
|
||||
// Vulture
|
||||
router.get('/vulture/:type/:excludetags?', require('./routes/vulture/index'));
|
||||
router.get('/vulture/:tag/:excludetags?', require('./routes/vulture/index'));
|
||||
|
||||
// xinwenlianbo
|
||||
router.get('/xinwenlianbo/index', require('./routes/xinwenlianbo/index'));
|
||||
|
||||
@@ -1,9 +1,14 @@
|
||||
const utils = require('./utils');
|
||||
|
||||
module.exports = async (ctx) => {
|
||||
const url = `https://www.vulture.com/${ctx.params.type}/`;
|
||||
const title = `Vulture - ${ctx.params.type}`;
|
||||
const url = `https://www.vulture.com/news/${ctx.params.tag}/`;
|
||||
const tagsToExclude = ctx.params.excludetags;
|
||||
|
||||
let title = `Vulture - tag ${ctx.params.tag}`;
|
||||
if (tagsToExclude !== undefined) {
|
||||
title += ' - excluding tags ';
|
||||
title += tagsToExclude.split(',').join(', ');
|
||||
}
|
||||
|
||||
ctx.state.data = await utils.getData(ctx, url, title, tagsToExclude);
|
||||
};
|
||||
|
||||
@@ -1,10 +1,17 @@
|
||||
const got = require('@/utils/got');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
async function load(link) {
|
||||
const response = await got.get(link);
|
||||
async function load(articleURL) {
|
||||
const response = await got.get(articleURL);
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// get the metadata
|
||||
const title = $('meta[property="og:title"]').attr('content');
|
||||
const pubDate = $('meta[property="article:published_time"]').attr('content');
|
||||
const bylineString = 'by ' + $('meta[name="author"]').attr('content');
|
||||
const tags = $('meta[property="article:tag"]').attr('content').split(', ');
|
||||
|
||||
// get the contents of the article
|
||||
const description = $('div.article-content');
|
||||
|
||||
// remove the content that we don't want to show
|
||||
@@ -16,45 +23,44 @@ async function load(link) {
|
||||
description.find('div.mobile-secondary-area').remove();
|
||||
description.find('aside.newsletter-flex-text').remove();
|
||||
|
||||
// get the tags
|
||||
const tagElements = $('div.tags > ul > li > a:not(.more)');
|
||||
const tags = tagElements
|
||||
.map(function () {
|
||||
return $(this).text().toLowerCase();
|
||||
})
|
||||
.get();
|
||||
// add the tags to the end
|
||||
description.append('<br /><br />tags: ' + tags.join(', '));
|
||||
|
||||
return {
|
||||
title: title,
|
||||
author: bylineString,
|
||||
pubDate: pubDate,
|
||||
link: articleURL,
|
||||
guid: articleURL,
|
||||
description: description.html(),
|
||||
tags: tags,
|
||||
};
|
||||
}
|
||||
|
||||
async function ProcessFeed(list, caches) {
|
||||
return await Promise.all(
|
||||
list.map(async (item) => {
|
||||
const itemUrl = item.canonicalUrl;
|
||||
async function ProcessFeed(htmlData, caches) {
|
||||
const $ = cheerio.load(htmlData);
|
||||
const allArticles = $('section.paginated-feed li.article');
|
||||
|
||||
let bylineString = '';
|
||||
if (item.byline) {
|
||||
const byline = item.byline[0];
|
||||
const bylineNames = byline.names.map((name) => name.text);
|
||||
const bylineNamesString = bylineNames.join(', ');
|
||||
// limit the list to only 25 articles, to make sure that load times remain reasonable
|
||||
const articles = allArticles.slice(0, 25);
|
||||
|
||||
bylineString = 'by ' + bylineNamesString;
|
||||
const articleURLs = [];
|
||||
$(articles).each(function (index, article) {
|
||||
const articleLink = $(article).find('a.link-text');
|
||||
|
||||
let articleURL = articleLink.attr('href');
|
||||
if (articleURL.startsWith('//www.')) {
|
||||
articleURL = 'https:' + articleURL;
|
||||
} else if (articleURL.startsWith('www.')) {
|
||||
articleURL = 'https://' + articleURL;
|
||||
}
|
||||
articleURLs.push(articleURL);
|
||||
});
|
||||
|
||||
const single = {
|
||||
title: item.primaryHeadline,
|
||||
link: itemUrl,
|
||||
author: bylineString,
|
||||
guid: itemUrl,
|
||||
pubDate: item.date,
|
||||
};
|
||||
|
||||
const other = await caches.tryGet(itemUrl, async () => await load(itemUrl));
|
||||
|
||||
return Promise.resolve(Object.assign({}, single, other));
|
||||
return await Promise.all(
|
||||
articleURLs.map(async (articleURL) => {
|
||||
const data = await caches.tryGet(articleURL, async () => await load(articleURL));
|
||||
return Promise.resolve(Object.assign({}, data));
|
||||
})
|
||||
);
|
||||
}
|
||||
@@ -82,29 +88,7 @@ const getData = async (ctx, url, title, tagsToExclude) => {
|
||||
});
|
||||
|
||||
const htmlData = htmlResponse.data;
|
||||
|
||||
const $ = cheerio.load(htmlData);
|
||||
let dataUri = $('section.paginated-feed').attr('data-uri');
|
||||
|
||||
if (dataUri.startsWith('www.')) {
|
||||
dataUri = 'https://' + dataUri;
|
||||
}
|
||||
|
||||
// get the raw data
|
||||
const response = await got({
|
||||
method: 'get',
|
||||
url: dataUri,
|
||||
headers: {
|
||||
Referer: dataUri,
|
||||
},
|
||||
});
|
||||
|
||||
const data = response.data;
|
||||
|
||||
// limit the list to only 25 articles, to make sure that load times remain reasonable
|
||||
const list = data.articles.slice(0, 25);
|
||||
|
||||
let result = await ProcessFeed(list, ctx.cache);
|
||||
let result = await ProcessFeed(htmlData, ctx.cache);
|
||||
|
||||
// filter out specified tags
|
||||
if (tagsToExclude !== undefined) {
|
||||
@@ -112,10 +96,14 @@ const getData = async (ctx, url, title, tagsToExclude) => {
|
||||
result = FilterItemsWithTags(result, tagsToExcludeArray);
|
||||
}
|
||||
|
||||
// get the description
|
||||
const $ = cheerio.load(htmlData);
|
||||
const description = $('meta[name="description"]').attr('content');
|
||||
|
||||
return {
|
||||
title: title,
|
||||
link: url,
|
||||
description: $('meta[name="description"]').attr('content'),
|
||||
description: description,
|
||||
item: result,
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user