feat(route): Penguin Random House (#10287)

* Initial commit of route Penguin Random House

* WIP: parse books into templates

* Updated radar info

* Added articles route

* Renamed the route and added docs

* Update lib/v2/penguin-random-house/articles.js according to suggestion

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Changed category to Reading, added En docs

* Changed pubDate format

* Update lib/v2/penguin-random-house/router.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update lib/v2/penguin-random-house/articles.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Update lib/v2/penguin-random-house/thereaddown.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* Parse time

* Fallback on failed regex matches

* Fallback date to undefined

* Update lib/v2/penguin-random-house/utils.js

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: radar domain
This commit is contained in:
StevenRCE0
2022-07-24 22:21:46 +08:00
committed by GitHub
parent 362a2b0f49
commit 2af4df23e6
11 changed files with 230 additions and 0 deletions

View File

@@ -57,6 +57,16 @@ Eg:<https://kakuyomu.jp/works/1177354054883783581>
</RouteEn>
## Penguin Random House
### Book Lists
<RouteEn author="StevenRCE0" example="/penguin-random-house/the-read-down" path="/penguin-random-house/the-read-down" />
### Articles
<RouteEn author="StevenRCE0" example="/penguin-random-house/articles" path="/penguin-random-house/articles" />
## syosetu
### chapter

View File

@@ -69,6 +69,16 @@ pageClass: routes
</Route>
## Penguin Random House
### Book Lists
<Route author="StevenRCE0" example="/penguin-random-house/the-read-down" path="/penguin-random-house/the-read-down" />
### Articles
<Route author="StevenRCE0" example="/penguin-random-house/articles" path="/penguin-random-house/articles" />
## SoBooks
### 首页

View File

@@ -37,6 +37,7 @@ module.exports = async (ctx) => {
link: currentUrl,
description,
pubDate: parseDate(title.match(/\((.*)\)/)[1], ['D MMMM YYYY', 'D MMM YYYY']),
guid: title,
};
});

View File

@@ -0,0 +1,27 @@
const utils = require('./utils');
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const link = 'https://www.penguinrandomhouse.com/articles/';
const res = await got(link);
const $ = cheerio.load(res.data);
const itemArray = $('.archive-module-half-container,.archive-module-third-container')
.map(function () {
return {
url: $(this).find('a').attr('href'),
title: $(this).find('.archive-module-text').first().text(),
};
})
.get();
const out = await utils.parseList(itemArray, ctx, utils.parseArticle);
ctx.state.data = {
title: 'Penguin Random House Articles',
link,
description: 'In-depth interviews, author essays, fascinating essays. Go deeper into the books you love.',
item: out,
};
};

View File

@@ -0,0 +1,4 @@
module.exports = {
'/the-read-down': ['StevenRCE0'],
'/articles': ['StevenRCE0'],
};

View File

@@ -0,0 +1,19 @@
module.exports = {
'penguinrandomhouse.com': {
_name: 'Penguin Random House',
'.': [
{
title: 'Penguin Random House Book Lists',
docs: 'https://docs.rsshub.app/reading.html#penguin-random-house',
source: ['/the-read-down'],
target: '/penguin-random-house/the-read-down',
},
{
title: 'Penguin Random House Articles',
docs: 'https://docs.rsshub.app/reading.html#penguin-random-house',
source: ['/articles'],
target: '/penguin-random-house/articles',
},
],
},
};

View File

@@ -0,0 +1,4 @@
module.exports = function (router) {
router.get('/articles', require('./articles'));
router.get('/the-read-down', require('./thereaddown'));
};

View File

@@ -0,0 +1,5 @@
<p>
<img src="{{ imageSrc }}" alt="{{ imageAlt }}"/>
<br>
{{ description }}
</p>

View File

@@ -0,0 +1,4 @@
<img src="{{ imageSrc }}" alt="{{ imageAlt }}"/>
<h1>{{ title }}</h1>
<h3>{{ author }}</h3>
<p>{{ description }}</p>

View File

@@ -0,0 +1,27 @@
const utils = require('./utils');
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const link = 'https://www.penguinrandomhouse.com/the-read-down/';
const res = await got(link);
const $ = cheerio.load(res.data);
const itemArray = $('.archive-module-half-container,.archive-module-third-container')
.map(function () {
return {
url: $(this).find('a').attr('href'),
title: $(this).find('.archive-module-text').first().text(),
};
})
.get();
const out = await utils.parseList(itemArray, ctx, utils.parseBooks);
ctx.state.data = {
title: 'Penguin Random House Book Lists',
link,
description: 'Never wonder what to read next! Check out these lists to find your next favorite book.',
item: out,
};
};

View File

@@ -0,0 +1,119 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
const { art } = require('@/utils/render');
const path = require('path');
const { parseDate } = require('@/utils/parse-date');
const parseBookInList = (element) => {
const $ = cheerio.load(element);
const title = $('h2').first().text();
const author = $('h2.author').first().text();
const description = $('.desc').first().text();
let imageSrc = $('img.cover__backcover').attr('src');
let imageAlt = $('img.cover__backcover').attr('alt');
if (!imageSrc) {
imageSrc = $('img.img-responsive').attr('data-src');
imageAlt = $('img.img-responsive').attr('alt');
}
return art(path.join(__dirname, 'templates/book.art'), {
title,
author,
description,
imageSrc,
imageAlt,
});
};
const parsePubDate = (data) => {
const dateString = data('script')
.get()
.filter((element) => {
const fullString = element.children[0];
if (!fullString || !fullString.data) {
return false;
}
return fullString.data.includes('post_date');
})[0];
if (dateString.length === 0) {
return;
}
const dateMatch = dateString.children[0].data.match(/(?<="post_date":").*?(?=")/);
if (!dateMatch) {
return;
}
return parseDate(dateMatch[0]);
};
const parseBooks = (element) => {
const $ = cheerio.load(element);
const description = $('h2.read-down-text').first().html();
let mainBlock = '';
$('.awesome-list>li').map((i, element) => {
const appending = parseBookInList(element);
mainBlock += appending;
return appending;
});
return {
description,
content: mainBlock,
pubDate: parsePubDate($),
};
};
const parseArticle = (element) => {
const $ = cheerio.load(element);
const description = $('h2.hdr-smalltxt').first().html();
const imageSrc = $('div.img-block>img').first().attr('src');
const imageAlt = $('div.img-block>img').first().attr('alt');
let mainBlock = '';
const descriptionBlock = art(path.join(__dirname, 'templates/articleHeader.art'), {
description,
imageSrc,
imageAlt,
});
$('div.main-content>p,div.main-content>ul').map((i, element) => {
const appending = cheerio.load(element);
mainBlock += appending.html();
return appending;
});
return {
description: descriptionBlock,
content: mainBlock,
pubDate: parsePubDate($),
};
};
const parseList = (items, ctx, contentParser) =>
Promise.all(
items.map((item) =>
ctx.cache.tryGet(item.url, async () => {
const itemRes = await got(item.url);
const itemPage = itemRes.data;
const itemParsed = contentParser(itemPage);
const result = {
title: item.title,
description: itemParsed.description + '<br>' + itemParsed.content,
pubDate: itemParsed.pubDate,
link: item.url,
};
return result;
})
)
);
module.exports = {
parseList,
parseBooks,
parseArticle,
};