Files
RSSHub/lib/v2/flyert/utils.js
mudone c7de199be7 fix(route): flyert duplicate images & add shanghai bank (#12377)
* fix(route): flyert duplicate images & add shanghai bank

* fix(route): flyert duplicate images & add shanghai bank. optimize code according to the revivew suggestion.

* docs: add Bank of Shanghai slug for creditcard route
2023-04-24 02:12:47 -07:00

74 lines
2.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const got = require('@/utils/got');
const cheerio = require('cheerio');
const url = require('url');
const iconv = require('iconv-lite');
// 加载文章页
async function load(link) {
const response = await got.get(link, {
responseType: 'buffer',
});
const gbk2utf8 = (s) => iconv.decode(s, 'gbk');
const $ = cheerio.load(gbk2utf8(response.data));
// 去除全文末尾多与内容
$('.lookMore').remove();
$('script, style').remove();
$('#loginDialog').remove();
// 获取第一个帖子对象
const firstpost = $('.firstpost');
// 修改图片中的链接
firstpost.find('ignore_js_op img').each(function () {
$(this).attr('src', $(this).attr('file'));
// remove useless atrributes
for (const attr of ['id', 'aid', 'zoomfile', 'file', 'zoomfile', 'class', 'onclick', 'title', 'inpost', 'alt', 'onmouseover']) {
$(this).removeAttr(attr);
}
});
// 去除全文中图片的多余标签
const images = firstpost.find('ignore_js_op img');
firstpost.find('ignore_js_op').remove();
firstpost.append(images);
// // 提取内容
const description = firstpost.html();
return { description };
}
const ProcessFeed = (list, caches) => {
const host = 'https://www.flyert.com';
return Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const $label = $(".comiis_common a[data-track='版块页主题分类']");
const $title = $(".comiis_common a[data-track='版块页文章']");
// 还原相对链接为绝对链接
const itemUrl = url.resolve(host, $title.attr('href'));
// 列表上提取到的信息
const single = {
title: $label.text() + '-' + $title.text(),
link: itemUrl,
guid: itemUrl,
};
// 使用tryGet方法从缓存获取内容。
// 当缓存中无法获取到链接内容的时候则使用load方法加载文章内容。
const other = await caches.tryGet(itemUrl, () => load(itemUrl));
// 合并解析后的结果集作为该篇文章最终的输出结果
return Promise.resolve(Object.assign({}, single, other));
})
);
};
module.exports = {
ProcessFeed,
};