mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-02 18:18:06 +08:00
* Initial commit of route Penguin Random House * WIP: parse books into templates * Updated radar info * Added articles route * Renamed the route and added docs * Update lib/v2/penguin-random-house/articles.js according to suggestion Co-authored-by: Tony <TonyRL@users.noreply.github.com> * Changed category to Reading, added En docs * Changed pubDate format * Update lib/v2/penguin-random-house/router.js Co-authored-by: Tony <TonyRL@users.noreply.github.com> * Update lib/v2/penguin-random-house/articles.js Co-authored-by: Tony <TonyRL@users.noreply.github.com> * Update lib/v2/penguin-random-house/thereaddown.js Co-authored-by: Tony <TonyRL@users.noreply.github.com> * Parse time * Fallback on failed regex matches * Fallback date to undefined * Update lib/v2/penguin-random-house/utils.js Co-authored-by: Tony <TonyRL@users.noreply.github.com> * fix: radar domain
120 lines
3.2 KiB
JavaScript
120 lines
3.2 KiB
JavaScript
const cheerio = require('cheerio');
|
|
const got = require('@/utils/got');
|
|
const { art } = require('@/utils/render');
|
|
const path = require('path');
|
|
const { parseDate } = require('@/utils/parse-date');
|
|
|
|
const parseBookInList = (element) => {
|
|
const $ = cheerio.load(element);
|
|
const title = $('h2').first().text();
|
|
const author = $('h2.author').first().text();
|
|
const description = $('.desc').first().text();
|
|
let imageSrc = $('img.cover__backcover').attr('src');
|
|
let imageAlt = $('img.cover__backcover').attr('alt');
|
|
if (!imageSrc) {
|
|
imageSrc = $('img.img-responsive').attr('data-src');
|
|
imageAlt = $('img.img-responsive').attr('alt');
|
|
}
|
|
|
|
return art(path.join(__dirname, 'templates/book.art'), {
|
|
title,
|
|
author,
|
|
description,
|
|
imageSrc,
|
|
imageAlt,
|
|
});
|
|
};
|
|
|
|
const parsePubDate = (data) => {
|
|
const dateString = data('script')
|
|
.get()
|
|
.filter((element) => {
|
|
const fullString = element.children[0];
|
|
if (!fullString || !fullString.data) {
|
|
return false;
|
|
}
|
|
return fullString.data.includes('post_date');
|
|
})[0];
|
|
if (dateString.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const dateMatch = dateString.children[0].data.match(/(?<="post_date":").*?(?=")/);
|
|
if (!dateMatch) {
|
|
return;
|
|
}
|
|
|
|
return parseDate(dateMatch[0]);
|
|
};
|
|
|
|
const parseBooks = (element) => {
|
|
const $ = cheerio.load(element);
|
|
const description = $('h2.read-down-text').first().html();
|
|
let mainBlock = '';
|
|
|
|
$('.awesome-list>li').map((i, element) => {
|
|
const appending = parseBookInList(element);
|
|
mainBlock += appending;
|
|
return appending;
|
|
});
|
|
|
|
return {
|
|
description,
|
|
content: mainBlock,
|
|
pubDate: parsePubDate($),
|
|
};
|
|
};
|
|
|
|
const parseArticle = (element) => {
|
|
const $ = cheerio.load(element);
|
|
const description = $('h2.hdr-smalltxt').first().html();
|
|
const imageSrc = $('div.img-block>img').first().attr('src');
|
|
const imageAlt = $('div.img-block>img').first().attr('alt');
|
|
let mainBlock = '';
|
|
|
|
const descriptionBlock = art(path.join(__dirname, 'templates/articleHeader.art'), {
|
|
description,
|
|
imageSrc,
|
|
imageAlt,
|
|
});
|
|
|
|
$('div.main-content>p,div.main-content>ul').map((i, element) => {
|
|
const appending = cheerio.load(element);
|
|
mainBlock += appending.html();
|
|
return appending;
|
|
});
|
|
|
|
return {
|
|
description: descriptionBlock,
|
|
content: mainBlock,
|
|
pubDate: parsePubDate($),
|
|
};
|
|
};
|
|
|
|
const parseList = (items, ctx, contentParser) =>
|
|
Promise.all(
|
|
items.map((item) =>
|
|
ctx.cache.tryGet(item.url, async () => {
|
|
const itemRes = await got(item.url);
|
|
|
|
const itemPage = itemRes.data;
|
|
const itemParsed = contentParser(itemPage);
|
|
|
|
const result = {
|
|
title: item.title,
|
|
description: itemParsed.description + '<br>' + itemParsed.content,
|
|
pubDate: itemParsed.pubDate,
|
|
link: item.url,
|
|
};
|
|
|
|
return result;
|
|
})
|
|
)
|
|
);
|
|
|
|
module.exports = {
|
|
parseList,
|
|
parseBooks,
|
|
parseArticle,
|
|
};
|