From 698c7bdaa5b0e692e4c90bfd253a868ffe96dba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E9=95=BF=E5=AE=89?= Date: Thu, 31 Jan 2019 10:58:20 +0800 Subject: [PATCH] =?UTF-8?q?Adding=20RSS=EF=BC=9A=E6=97=A5=E6=8A=A5=20|=20D?= =?UTF-8?q?2=20=E8=B5=84=E6=BA=90=E5=BA=93=20(#1494)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加对 [D2 资源库日报](https://awesome.fairyever.com/daily/) 的支持。 **注意**: 引入了一个新的依赖:[sanitize-html](https://www.npmjs.com/package/sanitize-html) 用于去除 HTML 中不需要的内容 ```javascript description = sanitizeHtml(description, { allowedTags: ['section', 'h2', 'ul', 'li', 'p', 'span', 'a'], }); ``` --- docs/README.md | 4 ++++ lib/router.js | 3 +++ lib/routes/d2/daily.js | 50 ++++++++++++++++++++++++++++++++++++++++ package.json | 1 + yarn.lock | 52 ++++++++++++++++++++++++++++++++++++++---- 5 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 lib/routes/d2/daily.js diff --git a/docs/README.md b/docs/README.md index 9a81cddb67..41189db331 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2850,3 +2850,7 @@ board 和 build 可在[这里](http://api.ineal.me/tss/status)查看 | 最新 | 天文航空 | 动物植物 | 自然地理 | 历史考古 | 生命医学 | 生活百科 | 科技前沿 | + +### 日报 | D2 资源库 + + diff --git a/lib/router.js b/lib/router.js index c9b600be5f..04f9980f26 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1019,4 +1019,7 @@ router.get('/mpaypass/news', require('./routes/mpaypass/news')); // 新浪科技探索 router.get('/sina/discovery/:type', require('./routes/sina/discovery')); +// D2 资源库 +router.get('/d2/daily', require('./routes/d2/daily')); + module.exports = router; diff --git a/lib/routes/d2/daily.js b/lib/routes/d2/daily.js new file mode 100644 index 0000000000..35b3a21b3d --- /dev/null +++ b/lib/routes/d2/daily.js @@ -0,0 +1,50 @@ +const axios = require('../../utils/axios'); +const cheerio = require('cheerio'); +const url = require('url'); +const sanitizeHtml = require('sanitize-html'); + +module.exports = async (ctx) => { + const indexLink = 'https://awesome.fairyever.com/daily/'; + const { data } = await axios.get(indexLink); + + const $ = cheerio.load(data); + // 修改 slice 可以获取更多天的内容,暂时最新的一天就够了 + const days = $('ul.menu-list>li>a') + .slice(0, 1) + .toArray(); + + const promises = days.map(async (ele) => { + ele = $(ele); + const relativePath = ele.attr('href'); + const innerText = ele.text(); + const link = url.resolve(indexLink, relativePath); + const cache = ctx.cache.get(link); + if (cache) { + return cache; + } + const { data } = await axios.get(link); + const $$ = cheerio.load(data); + let description = $$('article[title*=日报]>section') + // 去掉文章头部的 logo 和文章尾部的二维码 + .slice(1, -1) + .toArray() + .reduce((pre, cur) => pre + $$.html(cur), ''); + description = sanitizeHtml(description, { + allowedTags: ['section', 'h2', 'ul', 'li', 'p', 'span', 'a'], + }); + return { + title: $('head>title').text(), + pubDate: new Date(innerText.trim()).toUTCString(), + description, + guid: link, + link, + }; + }); + + ctx.state.data = { + title: '日报 | D2 资源库', + link: 'https://awesome.fairyever.com/daily/', + description: '日报 | D2 资源库', + item: await Promise.all(promises), + }; +}; diff --git a/package.json b/package.json index b9b6733ea9..6038d5bef4 100644 --- a/package.json +++ b/package.json @@ -75,6 +75,7 @@ "raven": "^2.6.4", "redis": "2.8.0", "rss-parser": "3.6.2", + "sanitize-html": "^1.20.0", "sharp": "^0.21.0", "socks-proxy-agent": "^4.0.1", "twit": "2.2.11", diff --git a/yarn.lock b/yarn.lock index 861376daf1..d2f35bc3da 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1377,7 +1377,7 @@ array-union@^1.0.1: dependencies: array-uniq "^1.0.1" -array-uniq@^1.0.1: +array-uniq@^1.0.1, array-uniq@^1.0.2: version "1.0.3" resolved "https://registry.yarnpkg.com/array-uniq/-/array-uniq-1.0.3.tgz#af6ac877a25cc7f74e058894753858dfdb24fdb6" integrity sha1-r2rId6Jcx/dOBYiUdThY39sk/bY= @@ -4550,7 +4550,7 @@ html-minifier@^3.2.3, html-minifier@^3.4.3: relateurl "0.2.x" uglify-js "3.4.x" -htmlparser2@^3.9.1: +htmlparser2@^3.10.0, htmlparser2@^3.9.1: version "3.10.0" resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-3.10.0.tgz#5f5e422dcf6119c0d983ed36260ce9ded0bee464" integrity sha512-J1nEUGv+MkXS0weHNWVKJJ+UrLfePxRWpN3C9bEi9fLxL2+ggW94DQvgYVXsaT30PGwYRIZKNZXuyMhp3Di4bQ== @@ -6186,11 +6186,31 @@ lodash.debounce@^4.0.8: resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" integrity sha1-gteb/zCmfEAF/9XiUVMArZyk168= +lodash.escaperegexp@^4.1.2: + version "4.1.2" + resolved "https://registry.npmjs.org/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz#64762c48618082518ac3df4ccf5d5886dae20347" + integrity sha1-ZHYsSGGAglGKw99Mz11YhtriA0c= + +lodash.isplainobject@^4.0.6: + version "4.0.6" + resolved "https://registry.npmjs.org/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz#7c526a52d89b45c45cc690b88163be0497f550cb" + integrity sha1-fFJqUtibRcRcxpC4gWO+BJf1UMs= + +lodash.isstring@^4.0.1: + version "4.0.1" + resolved "https://registry.npmjs.org/lodash.isstring/-/lodash.isstring-4.0.1.tgz#d527dfb5456eca7cc9bb95d5daeaf88ba54a5451" + integrity sha1-1SfftUVuynzJu5XV2ur4i6VKVFE= + lodash.memoize@^4.1.2: version "4.1.2" resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-4.1.2.tgz#bcc6c49a42a2840ed997f323eada5ecd182e0bfe" integrity sha1-vMbEmkKihA7Zl/Mj6tpezRguC/4= +lodash.mergewith@^4.6.1: + version "4.6.1" + resolved "https://registry.npmjs.org/lodash.mergewith/-/lodash.mergewith-4.6.1.tgz#639057e726c3afbdb3e7d42741caa8d6e4335927" + integrity sha512-eWw5r+PYICtEBgrBE5hhlT6aAa75f411bgDz/ZL2KZqYV03USvucsxcHUIlGTDTECs1eunpI7HOV7U+WLDvNdQ== + lodash.sortby@^4.7.0: version "4.7.0" resolved "https://registry.yarnpkg.com/lodash.sortby/-/lodash.sortby-4.7.0.tgz#edd14c824e2cc9c1e0b0a1b42bb5210516a42438" @@ -7870,7 +7890,7 @@ postcss@^6.0.0, postcss@^6.0.1, postcss@^6.0.23: source-map "^0.6.1" supports-color "^5.4.0" -postcss@^7.0.7: +postcss@^7.0.5, postcss@^7.0.7: version "7.0.14" resolved "https://registry.yarnpkg.com/postcss/-/postcss-7.0.14.tgz#4527ed6b1ca0d82c53ce5ec1a2041c2346bbd6e5" integrity sha512-NsbD6XUUMZvBxtQAJuWDJeeC4QFsmWsfozWxCJPWf3M55K9iu2iMDaKqyoOdTJ1R4usBXuxlVFAIo8rZPQD4Bg== @@ -8678,6 +8698,22 @@ sane@^3.0.0: optionalDependencies: fsevents "^1.2.3" +sanitize-html@^1.20.0: + version "1.20.0" + resolved "https://registry.npmjs.org/sanitize-html/-/sanitize-html-1.20.0.tgz#9a602beb1c9faf960fb31f9890f61911cc4d9156" + integrity sha512-BpxXkBoAG+uKCHjoXFmox6kCSYpnulABoGcZ/R3QyY9ndXbIM5S94eOr1IqnzTG8TnbmXaxWoDDzKC5eJv7fEQ== + dependencies: + chalk "^2.4.1" + htmlparser2 "^3.10.0" + lodash.clonedeep "^4.5.0" + lodash.escaperegexp "^4.1.2" + lodash.isplainobject "^4.0.6" + lodash.isstring "^4.0.1" + lodash.mergewith "^4.6.1" + postcss "^7.0.5" + srcset "^1.0.0" + xtend "^4.0.1" + sax@0.5.x: version "0.5.8" resolved "https://registry.yarnpkg.com/sax/-/sax-0.5.8.tgz#d472db228eb331c2506b0e8c15524adb939d12c1" @@ -9056,6 +9092,14 @@ sprintf-js@~1.0.2: resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c" integrity sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw= +srcset@^1.0.0: + version "1.0.0" + resolved "https://registry.npmjs.org/srcset/-/srcset-1.0.0.tgz#a5669de12b42f3b1d5e83ed03c71046fc48f41ef" + integrity sha1-pWad4StC87HV6D7QPHEEb8SPQe8= + dependencies: + array-uniq "^1.0.2" + number-is-nan "^1.0.0" + sshpk@^1.7.0: version "1.16.1" resolved "https://registry.yarnpkg.com/sshpk/-/sshpk-1.16.1.tgz#fb661c0bef29b39db40769ee39fa70093d6f6877" @@ -10626,7 +10670,7 @@ xmldom@0.1.x: resolved "https://registry.yarnpkg.com/xmldom/-/xmldom-0.1.27.tgz#d501f97b3bdb403af8ef9ecc20573187aadac0e9" integrity sha1-1QH5ezvbQDr4757MIFcxh6rawOk= -xtend@^4.0.0, xtend@~4.0.1: +xtend@^4.0.0, xtend@^4.0.1, xtend@~4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.1.tgz#a5c6d532be656e23db820efb943a1f04998d63af" integrity sha1-pcbVMr5lbiPbgg77lDofBJmNY68=