Files
RSSHub/lib/v2/penguin-random-house/utils.js
StevenRCE0 2af4df23e6 feat(route): Penguin Random House (#10287)
* Initial commit of route Penguin Random House

* WIP: parse books into templates

* Updated radar info

* Added articles route

* Renamed the route and added docs

* Update lib/v2/penguin-random-house/articles.js according to suggestion

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Changed category to Reading, added En docs

* Changed pubDate format

* Update lib/v2/penguin-random-house/router.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update lib/v2/penguin-random-house/articles.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update lib/v2/penguin-random-house/thereaddown.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Parse time

* Fallback on failed regex matches

* Fallback date to undefined

* Update lib/v2/penguin-random-house/utils.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: radar domain
2022-07-24 22:21:46 +08:00

120 lines
3.2 KiB
JavaScript

const cheerio = require('cheerio');
const got = require('@/utils/got');
const { art } = require('@/utils/render');
const path = require('path');
const { parseDate } = require('@/utils/parse-date');
const parseBookInList = (element) => {
const $ = cheerio.load(element);
const title = $('h2').first().text();
const author = $('h2.author').first().text();
const description = $('.desc').first().text();
let imageSrc = $('img.cover__backcover').attr('src');
let imageAlt = $('img.cover__backcover').attr('alt');
if (!imageSrc) {
imageSrc = $('img.img-responsive').attr('data-src');
imageAlt = $('img.img-responsive').attr('alt');
}
return art(path.join(__dirname, 'templates/book.art'), {
title,
author,
description,
imageSrc,
imageAlt,
});
};
const parsePubDate = (data) => {
const dateString = data('script')
.get()
.filter((element) => {
const fullString = element.children[0];
if (!fullString || !fullString.data) {
return false;
}
return fullString.data.includes('post_date');
})[0];
if (dateString.length === 0) {
return;
}
const dateMatch = dateString.children[0].data.match(/(?<="post_date":").*?(?=")/);
if (!dateMatch) {
return;
}
return parseDate(dateMatch[0]);
};
const parseBooks = (element) => {
const $ = cheerio.load(element);
const description = $('h2.read-down-text').first().html();
let mainBlock = '';
$('.awesome-list>li').map((i, element) => {
const appending = parseBookInList(element);
mainBlock += appending;
return appending;
});
return {
description,
content: mainBlock,
pubDate: parsePubDate($),
};
};
const parseArticle = (element) => {
const $ = cheerio.load(element);
const description = $('h2.hdr-smalltxt').first().html();
const imageSrc = $('div.img-block>img').first().attr('src');
const imageAlt = $('div.img-block>img').first().attr('alt');
let mainBlock = '';
const descriptionBlock = art(path.join(__dirname, 'templates/articleHeader.art'), {
description,
imageSrc,
imageAlt,
});
$('div.main-content>p,div.main-content>ul').map((i, element) => {
const appending = cheerio.load(element);
mainBlock += appending.html();
return appending;
});
return {
description: descriptionBlock,
content: mainBlock,
pubDate: parsePubDate($),
};
};
const parseList = (items, ctx, contentParser) =>
Promise.all(
items.map((item) =>
ctx.cache.tryGet(item.url, async () => {
const itemRes = await got(item.url);
const itemPage = itemRes.data;
const itemParsed = contentParser(itemPage);
const result = {
title: item.title,
description: itemParsed.description + '<br>' + itemParsed.content,
pubDate: itemParsed.pubDate,
link: item.url,
};
return result;
})
)
);
module.exports = {
parseList,
parseBooks,
parseArticle,
};