mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-02 18:18:06 +08:00
92 lines
3.0 KiB
JavaScript
92 lines
3.0 KiB
JavaScript
const got = require('@/utils/got');
|
|
const parser = require('@/utils/rss-parser');
|
|
const cheerio = require('cheerio');
|
|
|
|
const ProcessFeed = (i, $, link) => {
|
|
const title = $('h1').text();
|
|
let content = $('div.story-container').eq(i);
|
|
|
|
// 处理封面图片
|
|
content.find('div.story-image > figure').each((_, e) => {
|
|
const src = `https://thumbor.ftacademy.cn/unsafe/1340x754/${e.attribs['data-url']}`;
|
|
|
|
$(`<img src=${src}>`).insertAfter(content.find('div.story-lead')[0]);
|
|
});
|
|
|
|
// 付费文章跳转
|
|
content.find('div#subscribe-now-container').each((_, e) => {
|
|
$(`<br/><p>此文章为付费文章,会员<a href='${link}'>请访问网站阅读</a>。</p>`).insertAfter(content.find('div.story-body')[0]);
|
|
$(e).remove();
|
|
});
|
|
|
|
// 获取作者
|
|
let author = '';
|
|
content.find('span.story-author > a').each((_, e) => {
|
|
author += `${$(e).text()} `;
|
|
});
|
|
author = author.trim();
|
|
|
|
// 去除头部主题, 头部重复标题, 冗余元数据, 植入广告, 植入 js, 社交分享按钮, 底部版权声明, 空白 DOM
|
|
content
|
|
.find(
|
|
'div.story-theme, h1.story-headline, div.story-byline, div.mpu-container-instory,script, div#story-action-placeholder, div.copyrightstatement-container, div.clearfloat, div.o-ads, h2.list-title, div.allcomments, div.logincomment, div.nologincomment'
|
|
)
|
|
.each((_, e) => {
|
|
$(e).remove();
|
|
});
|
|
content = content.html();
|
|
|
|
return { content, author, title };
|
|
};
|
|
|
|
const getData = async ({ site = 'www', channel, ctx }) => {
|
|
let feed;
|
|
|
|
if (channel) {
|
|
channel = channel.toLowerCase();
|
|
channel = channel.split('-').join('/');
|
|
|
|
try {
|
|
feed = await parser.parseURL(`https://${site}.ftchinese.com/rss/${channel}`);
|
|
} catch (error) {
|
|
return {
|
|
title: `FT 中文网 ${channel} 不存在`,
|
|
description: `FT 中文网 ${channel} 不存在`,
|
|
};
|
|
}
|
|
} else {
|
|
feed = await parser.parseURL(`https://${site}.ftchinese.com/rss/feed`);
|
|
}
|
|
|
|
const items = await Promise.all(
|
|
feed.items.map((item) => {
|
|
item.link = item.link.replace('http://', 'https://');
|
|
return ctx.cache.tryGet(item.link, async () => {
|
|
const response = await got.get(`${item.link}?full=y&archive`);
|
|
|
|
const $ = cheerio.load(response.data);
|
|
const results = [];
|
|
for (let i = 0; i < $('div.story-container').length; i++) {
|
|
results.push(ProcessFeed(i, $, item.link));
|
|
}
|
|
|
|
item.title = results[0].title;
|
|
item.description = results.map((result) => result.content).join('');
|
|
item.author = results[0].author;
|
|
return item;
|
|
});
|
|
})
|
|
);
|
|
|
|
return {
|
|
title: feed.title,
|
|
link: feed.link,
|
|
description: feed.description,
|
|
item: items,
|
|
};
|
|
};
|
|
|
|
module.exports = {
|
|
getData,
|
|
};
|