mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-10 23:34:38 +08:00
修复知乎图片重复问题 & 知乎日报h2标签处理逻辑 (#1704)
1. 知乎的**用户回答**、**专栏**、**问题**、**话题**内容的图片处,实际上是`<noscript><img ... /></noscript><img ... />`,在Inoreader中noscript标签会被去掉导致出现两张一模一样的图片,很容易处理掉没用的noscript标签,确保图片的正确显示。 2. **知乎日报**内容去掉形如`<h2 class="question-title">知乎问题标题</h2>`中的h2标签,现在的正则会把`</h2>`也一并去掉,处理逻辑更为清晰。
This commit is contained in:
@@ -36,7 +36,7 @@ module.exports = async (ctx) => {
|
||||
Referer: url,
|
||||
},
|
||||
});
|
||||
item.description = utils.ProcessImage(storyDetail.data.body.replace(/<div class="meta">([\s\S]*?)<\/div>/g, '<strong>$1</strong>').replace(/<h2.*?>/g, ''));
|
||||
item.description = utils.ProcessImage(storyDetail.data.body.replace(/<div class="meta">([\s\S]*?)<\/div>/g, '<strong>$1</strong>').replace(/<\/?h2.*?>/g, ''));
|
||||
ctx.cache.set(key, item.description, 24 * 60 * 60);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ module.exports = {
|
||||
ProcessImage: function(content) {
|
||||
const $ = cheerio.load(content, { xmlMode: true });
|
||||
|
||||
$('noscript').remove();
|
||||
|
||||
$('img.content_image, img.origin_image, img.content-image, img.data-actualsrc').each((i, e) => {
|
||||
if (e.attribs['data-original']) {
|
||||
$(e).attr({
|
||||
|
||||
Reference in New Issue
Block a user