mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-09 23:00:48 +08:00
feat: Update the Vulture endpoint to be able to generate a... (#4757)
This commit is contained in:
@@ -156,14 +156,7 @@ Provides a better reading experience (full text articles) over the official one.
|
|||||||
|
|
||||||
## Vulture
|
## Vulture
|
||||||
|
|
||||||
<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:type/:excludetags?" :paramsDesc="['The sub-site name', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']">
|
<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:tag/:excludetags?" :paramsDesc="['Tag', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']" />
|
||||||
|
|
||||||
Supported sub-sites:
|
|
||||||
| TV | Movies | Comedy | Music | TV Recaps | Books | Theater | Art | Awards | Video |
|
|
||||||
| --- | ------ | ------ | ----- | --------- | ----- | ------- | --- | ------ | ----- |
|
|
||||||
| tv | movies | comedy | music | tvrecaps | books | theater | art | awards | video |
|
|
||||||
|
|
||||||
</RouteEn>
|
|
||||||
|
|
||||||
## World Health Organization | WHO
|
## World Health Organization | WHO
|
||||||
|
|
||||||
|
|||||||
@@ -2215,7 +2215,7 @@ router.get('/mastodon/timeline/:site/:only_media?', require('./routes/mastodon/t
|
|||||||
router.get('/aliyun-kernel/index', require('./routes/aliyun-kernel/index'));
|
router.get('/aliyun-kernel/index', require('./routes/aliyun-kernel/index'));
|
||||||
|
|
||||||
// Vulture
|
// Vulture
|
||||||
router.get('/vulture/:type/:excludetags?', require('./routes/vulture/index'));
|
router.get('/vulture/:tag/:excludetags?', require('./routes/vulture/index'));
|
||||||
|
|
||||||
// xinwenlianbo
|
// xinwenlianbo
|
||||||
router.get('/xinwenlianbo/index', require('./routes/xinwenlianbo/index'));
|
router.get('/xinwenlianbo/index', require('./routes/xinwenlianbo/index'));
|
||||||
|
|||||||
@@ -1,9 +1,14 @@
|
|||||||
const utils = require('./utils');
|
const utils = require('./utils');
|
||||||
|
|
||||||
module.exports = async (ctx) => {
|
module.exports = async (ctx) => {
|
||||||
const url = `https://www.vulture.com/${ctx.params.type}/`;
|
const url = `https://www.vulture.com/news/${ctx.params.tag}/`;
|
||||||
const title = `Vulture - ${ctx.params.type}`;
|
|
||||||
const tagsToExclude = ctx.params.excludetags;
|
const tagsToExclude = ctx.params.excludetags;
|
||||||
|
|
||||||
|
let title = `Vulture - tag ${ctx.params.tag}`;
|
||||||
|
if (tagsToExclude !== undefined) {
|
||||||
|
title += ' - excluding tags ';
|
||||||
|
title += tagsToExclude.split(',').join(', ');
|
||||||
|
}
|
||||||
|
|
||||||
ctx.state.data = await utils.getData(ctx, url, title, tagsToExclude);
|
ctx.state.data = await utils.getData(ctx, url, title, tagsToExclude);
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,10 +1,17 @@
|
|||||||
const got = require('@/utils/got');
|
const got = require('@/utils/got');
|
||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
|
|
||||||
async function load(link) {
|
async function load(articleURL) {
|
||||||
const response = await got.get(link);
|
const response = await got.get(articleURL);
|
||||||
const $ = cheerio.load(response.data);
|
const $ = cheerio.load(response.data);
|
||||||
|
|
||||||
|
// get the metadata
|
||||||
|
const title = $('meta[property="og:title"]').attr('content');
|
||||||
|
const pubDate = $('meta[property="article:published_time"]').attr('content');
|
||||||
|
const bylineString = 'by ' + $('meta[name="author"]').attr('content');
|
||||||
|
const tags = $('meta[property="article:tag"]').attr('content').split(', ');
|
||||||
|
|
||||||
|
// get the contents of the article
|
||||||
const description = $('div.article-content');
|
const description = $('div.article-content');
|
||||||
|
|
||||||
// remove the content that we don't want to show
|
// remove the content that we don't want to show
|
||||||
@@ -16,45 +23,44 @@ async function load(link) {
|
|||||||
description.find('div.mobile-secondary-area').remove();
|
description.find('div.mobile-secondary-area').remove();
|
||||||
description.find('aside.newsletter-flex-text').remove();
|
description.find('aside.newsletter-flex-text').remove();
|
||||||
|
|
||||||
// get the tags
|
// add the tags to the end
|
||||||
const tagElements = $('div.tags > ul > li > a:not(.more)');
|
description.append('<br /><br />tags: ' + tags.join(', '));
|
||||||
const tags = tagElements
|
|
||||||
.map(function () {
|
|
||||||
return $(this).text().toLowerCase();
|
|
||||||
})
|
|
||||||
.get();
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
title: title,
|
||||||
|
author: bylineString,
|
||||||
|
pubDate: pubDate,
|
||||||
|
link: articleURL,
|
||||||
|
guid: articleURL,
|
||||||
description: description.html(),
|
description: description.html(),
|
||||||
tags: tags,
|
tags: tags,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function ProcessFeed(list, caches) {
|
async function ProcessFeed(htmlData, caches) {
|
||||||
|
const $ = cheerio.load(htmlData);
|
||||||
|
const allArticles = $('section.paginated-feed li.article');
|
||||||
|
|
||||||
|
// limit the list to only 25 articles, to make sure that load times remain reasonable
|
||||||
|
const articles = allArticles.slice(0, 25);
|
||||||
|
|
||||||
|
const articleURLs = [];
|
||||||
|
$(articles).each(function (index, article) {
|
||||||
|
const articleLink = $(article).find('a.link-text');
|
||||||
|
|
||||||
|
let articleURL = articleLink.attr('href');
|
||||||
|
if (articleURL.startsWith('//www.')) {
|
||||||
|
articleURL = 'https:' + articleURL;
|
||||||
|
} else if (articleURL.startsWith('www.')) {
|
||||||
|
articleURL = 'https://' + articleURL;
|
||||||
|
}
|
||||||
|
articleURLs.push(articleURL);
|
||||||
|
});
|
||||||
|
|
||||||
return await Promise.all(
|
return await Promise.all(
|
||||||
list.map(async (item) => {
|
articleURLs.map(async (articleURL) => {
|
||||||
const itemUrl = item.canonicalUrl;
|
const data = await caches.tryGet(articleURL, async () => await load(articleURL));
|
||||||
|
return Promise.resolve(Object.assign({}, data));
|
||||||
let bylineString = '';
|
|
||||||
if (item.byline) {
|
|
||||||
const byline = item.byline[0];
|
|
||||||
const bylineNames = byline.names.map((name) => name.text);
|
|
||||||
const bylineNamesString = bylineNames.join(', ');
|
|
||||||
|
|
||||||
bylineString = 'by ' + bylineNamesString;
|
|
||||||
}
|
|
||||||
|
|
||||||
const single = {
|
|
||||||
title: item.primaryHeadline,
|
|
||||||
link: itemUrl,
|
|
||||||
author: bylineString,
|
|
||||||
guid: itemUrl,
|
|
||||||
pubDate: item.date,
|
|
||||||
};
|
|
||||||
|
|
||||||
const other = await caches.tryGet(itemUrl, async () => await load(itemUrl));
|
|
||||||
|
|
||||||
return Promise.resolve(Object.assign({}, single, other));
|
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -82,29 +88,7 @@ const getData = async (ctx, url, title, tagsToExclude) => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
const htmlData = htmlResponse.data;
|
const htmlData = htmlResponse.data;
|
||||||
|
let result = await ProcessFeed(htmlData, ctx.cache);
|
||||||
const $ = cheerio.load(htmlData);
|
|
||||||
let dataUri = $('section.paginated-feed').attr('data-uri');
|
|
||||||
|
|
||||||
if (dataUri.startsWith('www.')) {
|
|
||||||
dataUri = 'https://' + dataUri;
|
|
||||||
}
|
|
||||||
|
|
||||||
// get the raw data
|
|
||||||
const response = await got({
|
|
||||||
method: 'get',
|
|
||||||
url: dataUri,
|
|
||||||
headers: {
|
|
||||||
Referer: dataUri,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const data = response.data;
|
|
||||||
|
|
||||||
// limit the list to only 25 articles, to make sure that load times remain reasonable
|
|
||||||
const list = data.articles.slice(0, 25);
|
|
||||||
|
|
||||||
let result = await ProcessFeed(list, ctx.cache);
|
|
||||||
|
|
||||||
// filter out specified tags
|
// filter out specified tags
|
||||||
if (tagsToExclude !== undefined) {
|
if (tagsToExclude !== undefined) {
|
||||||
@@ -112,10 +96,14 @@ const getData = async (ctx, url, title, tagsToExclude) => {
|
|||||||
result = FilterItemsWithTags(result, tagsToExcludeArray);
|
result = FilterItemsWithTags(result, tagsToExcludeArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get the description
|
||||||
|
const $ = cheerio.load(htmlData);
|
||||||
|
const description = $('meta[name="description"]').attr('content');
|
||||||
|
|
||||||
return {
|
return {
|
||||||
title: title,
|
title: title,
|
||||||
link: url,
|
link: url,
|
||||||
description: $('meta[name="description"]').attr('content'),
|
description: description,
|
||||||
item: result,
|
item: result,
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user