From 4e2c3efe2d46e70fa1eecac44a048d60d0fd0cb7 Mon Sep 17 00:00:00 2001 From: TheLittle_Yang <2570984321@qq.com> Date: Fri, 26 May 2023 12:18:28 +0800 Subject: [PATCH] feat(route): DevolverDigital Blog: Parse per page instead of full page, and add cache support. (#12560) * get each post instead of the whole page * add cache support * Update blog.js * Update blog.js --- lib/v2/devolverdigital/blog.js | 54 +++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/lib/v2/devolverdigital/blog.js b/lib/v2/devolverdigital/blog.js index bdd42f5e9e..98245902ec 100644 --- a/lib/v2/devolverdigital/blog.js +++ b/lib/v2/devolverdigital/blog.js @@ -3,39 +3,47 @@ const cheerio = require('cheerio'); module.exports = async (ctx) => { const baseUrl = 'https://www.devolverdigital.com/blog'; - const { data: response } = await got(baseUrl); + const $ = cheerio.load(response); - $('noscript').remove(); - const nextData = JSON.parse($('#__NEXT_DATA__').text()); - const allBlogContents = $('div.flex > div > div > div > div:not([class])'); - const items = nextData.props.pageProps.posts.map((post, postIndex) => { - // img resource redirection and - // clean up absolute layouts for img and span + const items = await Promise.all( + nextData.props.pageProps.posts.map((postData) => { + const postUrl = `${baseUrl}/post/${postData.id}`; + return ctx.cache.tryGet(postUrl, async () => { + const { data: postPage } = await got(postUrl); - const imageUrls = post.body.filter((item) => item.type === 'upload' && item.value.cloudinary.resource_type === 'image').map((item) => item.value.cloudinary.secure_url); + const $page = cheerio.load(postPage); + $page('noscript').remove(); + const postContent = $page('div.flex > div > div > div > div:not([class])'); - const allImageSpans = $(allBlogContents[postIndex]).find('span > img').parent(); + // img resource redirection and + // clean up absolute layouts for img and span + const imageUrls = postData.body.filter((item) => item.type === 'upload' && item.value.cloudinary.resource_type === 'image').map((item) => item.value.cloudinary.secure_url); + const allImageSpans = postContent.find('span > img').parent(); + allImageSpans.each((spanIndex, span) => { + $(span).attr('style', $(span).attr('style').replace('position:absolute', '')); + const img = $(span).find('img'); + img.attr('src', imageUrls[spanIndex]); + img.attr('style', img.attr('style').replace('position:absolute', '').replace('width:0', '').replace('height:0', '')); + }); - allImageSpans.each((spanIndex, span) => { - $(span).attr('style', $(span).attr('style').replace('position:absolute', '')); - const img = $(span).find('img'); - img.attr('src', imageUrls[spanIndex]); - img.attr('style', img.attr('style').replace('position:absolute', '').replace('width:0', '').replace('height:0', '')); - }); - - return { - title: post.title, - author: post.author, - pubDate: Date.parse(post.createdAt), - description: $(allBlogContents[postIndex]).html(), - }; - }); + return { + title: postData.title, + link: postUrl, + author: postData.author, + pubDate: Date.parse(postData.createdAt), + updated: Date.parse(postData.updatedAt), + description: postContent.html(), + }; + }); + }) + ); ctx.state.data = { title: 'DevolverDigital Blog', + language: 'en-us', link: 'https://www.devolverdigital.com/blog', item: items, };