feat: Update the Vulture endpoint to be able to generate a... (#4757)

2025-12-09 14:40:23 +08:00 · 2020-05-17 23:55:57 -04:00
parent 509c887a05
commit ec383ae3c4
4 changed files with 54 additions and 68 deletions
--- a/docs/en/new-media.md
+++ b/docs/en/new-media.md
@@ -156,14 +156,7 @@ Provides a better reading experience (full text articles) over the official one.

 ## Vulture

-<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:type/:excludetags?" :paramsDesc="['The sub-site name', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']">
-
-Supported sub-sites：
-| TV | Movies | Comedy | Music | TV Recaps | Books | Theater | Art | Awards | Video |
-| --- | ------ | ------ | ----- | --------- | ----- | ------- | --- | ------ | ----- |
-| tv | movies | comedy | music | tvrecaps | books | theater | art | awards | video |
-
-</RouteEn>
+<RouteEn author="loganrockmore" example="/vulture/movies" path="/vulture/:tag/:excludetags?" :paramsDesc="['Tag', 'Comma-delimited list of tags. If an article includes one of these tags, it will be excluded from the RSS feed.']" />

 ## World Health Organization | WHO

--- a/lib/router.js
+++ b/lib/router.js
@@ -2215,7 +2215,7 @@ router.get('/mastodon/timeline/:site/:only_media?', require('./routes/mastodon/t
 router.get('/aliyun-kernel/index', require('./routes/aliyun-kernel/index'));

 // Vulture
-router.get('/vulture/:type/:excludetags?', require('./routes/vulture/index'));
+router.get('/vulture/:tag/:excludetags?', require('./routes/vulture/index'));

 // xinwenlianbo
 router.get('/xinwenlianbo/index', require('./routes/xinwenlianbo/index'));
--- a/lib/routes/vulture/index.js
+++ b/lib/routes/vulture/index.js
@@ -1,9 +1,14 @@
 const utils = require('./utils');

 module.exports = async (ctx) => {
-    const url = `https://www.vulture.com/${ctx.params.type}/`;
-    const title = `Vulture - ${ctx.params.type}`;
+    const url = `https://www.vulture.com/news/${ctx.params.tag}/`;
    const tagsToExclude = ctx.params.excludetags;

+    let title = `Vulture - tag ${ctx.params.tag}`;
+    if (tagsToExclude !== undefined) {
+        title += ' - excluding tags ';
+        title += tagsToExclude.split(',').join(', ');
+    }
+
    ctx.state.data = await utils.getData(ctx, url, title, tagsToExclude);
 };
--- a/lib/routes/vulture/utils.js
+++ b/lib/routes/vulture/utils.js
@@ -1,10 +1,17 @@
 const got = require('@/utils/got');
 const cheerio = require('cheerio');

-async function load(link) {
-    const response = await got.get(link);
+async function load(articleURL) {
+    const response = await got.get(articleURL);
    const $ = cheerio.load(response.data);

+    // get the metadata
+    const title = $('meta[property="og:title"]').attr('content');
+    const pubDate = $('meta[property="article:published_time"]').attr('content');
+    const bylineString = 'by ' + $('meta[name="author"]').attr('content');
+    const tags = $('meta[property="article:tag"]').attr('content').split(', ');
+
+    // get the contents of the article
    const description = $('div.article-content');

    // remove the content that we don't want to show
@@ -16,45 +23,44 @@ async function load(link) {
    description.find('div.mobile-secondary-area').remove();
    description.find('aside.newsletter-flex-text').remove();

-    // get the tags
-    const tagElements = $('div.tags > ul > li > a:not(.more)');
-    const tags = tagElements
-        .map(function () {
-            return $(this).text().toLowerCase();
-        })
-        .get();
+    // add the tags to the end
+    description.append('<br /><br />tags: ' + tags.join(', '));

    return {
+        title: title,
+        author: bylineString,
+        pubDate: pubDate,
+        link: articleURL,
+        guid: articleURL,
        description: description.html(),
        tags: tags,
    };
 }

-async function ProcessFeed(list, caches) {
-    return await Promise.all(
-        list.map(async (item) => {
-            const itemUrl = item.canonicalUrl;
+async function ProcessFeed(htmlData, caches) {
+    const $ = cheerio.load(htmlData);
+    const allArticles = $('section.paginated-feed li.article');

-            let bylineString = '';
-            if (item.byline) {
-                const byline = item.byline[0];
-                const bylineNames = byline.names.map((name) => name.text);
-                const bylineNamesString = bylineNames.join(', ');
+    // limit the list to only 25 articles, to make sure that load times remain reasonable
+    const articles = allArticles.slice(0, 25);

-                bylineString = 'by ' + bylineNamesString;
+    const articleURLs = [];
+    $(articles).each(function (index, article) {
+        const articleLink = $(article).find('a.link-text');
+
+        let articleURL = articleLink.attr('href');
+        if (articleURL.startsWith('//www.')) {
+            articleURL = 'https:' + articleURL;
+        } else if (articleURL.startsWith('www.')) {
+            articleURL = 'https://' + articleURL;
        }
+        articleURLs.push(articleURL);
+    });

-            const single = {
-                title: item.primaryHeadline,
-                link: itemUrl,
-                author: bylineString,
-                guid: itemUrl,
-                pubDate: item.date,
-            };
-
-            const other = await caches.tryGet(itemUrl, async () => await load(itemUrl));
-
-            return Promise.resolve(Object.assign({}, single, other));
+    return await Promise.all(
+        articleURLs.map(async (articleURL) => {
+            const data = await caches.tryGet(articleURL, async () => await load(articleURL));
+            return Promise.resolve(Object.assign({}, data));
        })
    );
 }
@@ -82,29 +88,7 @@ const getData = async (ctx, url, title, tagsToExclude) => {
    });

    const htmlData = htmlResponse.data;
-
-    const $ = cheerio.load(htmlData);
-    let dataUri = $('section.paginated-feed').attr('data-uri');
-
-    if (dataUri.startsWith('www.')) {
-        dataUri = 'https://' + dataUri;
-    }
-
-    // get the raw data
-    const response = await got({
-        method: 'get',
-        url: dataUri,
-        headers: {
-            Referer: dataUri,
-        },
-    });
-
-    const data = response.data;
-
-    // limit the list to only 25 articles, to make sure that load times remain reasonable
-    const list = data.articles.slice(0, 25);
-
-    let result = await ProcessFeed(list, ctx.cache);
+    let result = await ProcessFeed(htmlData, ctx.cache);

    // filter out specified tags
    if (tagsToExclude !== undefined) {
@@ -112,10 +96,14 @@ const getData = async (ctx, url, title, tagsToExclude) => {
        result = FilterItemsWithTags(result, tagsToExcludeArray);
    }

+    // get the description
+    const $ = cheerio.load(htmlData);
+    const description = $('meta[name="description"]').attr('content');
+
    return {
        title: title,
        link: url,
-        description: $('meta[name="description"]').attr('content'),
+        description: description,
        item: result,
    };
 };