From f8990b7cb72ca3c78806f8a0c51f3ee0c7f873eb Mon Sep 17 00:00:00 2001 From: SettingDust Date: Mon, 11 Mar 2019 10:47:09 +0800 Subject: [PATCH] The cherrio config route (#1699) Use js config too build rss close #349 close #1035 --- docs/README.md | 6 ++ docs/joinus/README.md | 55 +++++++++++++++++ lib/router.js | 4 ++ lib/routes/uraaka-joshi/uraaka-joshi-user.js | 19 ++++++ lib/routes/uraaka-joshi/uraaka-joshi.js | 21 +++++++ lib/utils/common-config.js | 63 ++++++++++++++++++++ lib/utils/date.js | 1 + test/utils/common-config.js | 38 ++++++++++++ 8 files changed, 207 insertions(+) create mode 100644 lib/routes/uraaka-joshi/uraaka-joshi-user.js create mode 100644 lib/routes/uraaka-joshi/uraaka-joshi.js create mode 100644 lib/utils/common-config.js create mode 100644 test/utils/common-config.js diff --git a/docs/README.md b/docs/README.md index 52e3d9c098..2716eaa55d 100755 --- a/docs/README.md +++ b/docs/README.md @@ -3112,3 +3112,9 @@ type 为 all 时,category 参数不支持 cost 和 free ### 决胜网 + +### 裏垢女子まとめ + + + + diff --git a/docs/joinus/README.md b/docs/joinus/README.md index 79f48bfec3..045ad06a63 100644 --- a/docs/joinus/README.md +++ b/docs/joinus/README.md @@ -258,6 +258,61 @@ sidebar: auto // 注:由于此路由只是起到一个新专栏上架提醒的作用,无法访问付费文章,因此没有文章正文 ``` + 4. **使用通用配置型路由** + + 很大一部分网站是可以通过一个配置范式来生成 RSS 的。 + 通用配置即通过 cherrio(**CSS 选择器、jQuery 函数**)读取 json 数据来简便的生成 RSS。 + + 首先我们需要几个数据: + + 1. RSS 来源链接 + 2. 数据来源链接 + 3. RSS 标题(非 item 标题) + + ```js + const buildData = require('../../utils/common-config'); + module.exports = async (ctx) => { + ctx.state.data = await buildData({ + link: RSS来源链接, + url: 数据来源链接, + title: '%title%', //这里使用了变量,形如 **%xxx%** 这样的会被解析为变量,值为 **params** 下的同名值 + params: { + title: RSS标题, + }, + }); + }; + ``` + + 至此,我们的 RSS 还没有任何内容,内容需要由`item`完成,也是核心部分,需要有 CSS 选择器以及 jQuery 的函数知识(请去 W3School 学习) + 下面为一个实例 + 建议在打开[此链接](https://www.uraaka-joshi.com/)的开发者工具之后再阅读以下内容,请善用开发者工具的搜索功能搜寻`$('xxx')`中的内容 + + ```js + const buildData = require('../../utils/common-config'); + + module.exports = async (ctx) => { + const link = `https://www.uraaka-joshi.com/`; + ctx.state.data = await buildData({ + link, + url: link, + title: `%title%`, + params: { + title: '裏垢女子まとめ', + }, + item: { + item: '.content-main .stream .stream-item', + title: `$('.post-account-group').text() + ' - %title%'`, //只支持$().xxx()这样的js语句,也足够使用 + link: `$('.post-account-group').attr('href')`, //.text()代表获取元素的文本,attr()表示获取指定属性 + description: `$('.post .context').html()`, // .html()代表获取元素的html代码 + pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`, // 日期的格式多种多样,可以尝试使用**/utils/date** + guid: `new Date($('.post-time').attr('datetime')).getTime()`, // guid必须唯一,这是RSS的不同item的标志 + }, + }); + }; + ``` + + 至此我们完成了一个最简单的路由 + --- #### 使用缓存 diff --git a/lib/router.js b/lib/router.js index 77e4482ad3..4be7c2d95f 100755 --- a/lib/router.js +++ b/lib/router.js @@ -1121,4 +1121,8 @@ router.get('/luogu/daily/:id?', require('./routes/luogu/daily')); // 决胜网 router.get('/juesheng', require('./routes/juesheng')); +// 裏垢女子まとめ +router.get('/uraaka-joshi', require('./routes/uraaka-joshi/uraaka-joshi')); +router.get('/uraaka-joshi/:id', require('./routes/uraaka-joshi/uraaka-joshi-user')); + module.exports = router; diff --git a/lib/routes/uraaka-joshi/uraaka-joshi-user.js b/lib/routes/uraaka-joshi/uraaka-joshi-user.js new file mode 100644 index 0000000000..afe404b947 --- /dev/null +++ b/lib/routes/uraaka-joshi/uraaka-joshi-user.js @@ -0,0 +1,19 @@ +const buildData = require('../../utils/common-config'); + +module.exports = async (ctx) => { + const params = ctx.params; + const link = `https://www.uraaka-joshi.com/users/${params.id}`; + ctx.state.data = await buildData({ + link, + url: link, + title: `$('.top-profile-card-name-link').text() + '@${params.id} - 裏垢女子まとめ'`, + item: { + item: '.content-main .stream .stream-item', + title: `$('.post-name').text() + '@${params.id} - 裏垢女子まとめ'`, + link: `https://www.uraaka-joshi.com/users/${params.id}`, + description: `$('.post .context').html()`, + pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`, + guid: `new Date($('.post-time').attr('datetime')).getTime()`, + }, + }); +}; diff --git a/lib/routes/uraaka-joshi/uraaka-joshi.js b/lib/routes/uraaka-joshi/uraaka-joshi.js new file mode 100644 index 0000000000..74af353ef4 --- /dev/null +++ b/lib/routes/uraaka-joshi/uraaka-joshi.js @@ -0,0 +1,21 @@ +const buildData = require('../../utils/common-config'); + +module.exports = async (ctx) => { + const link = `https://www.uraaka-joshi.com/`; + ctx.state.data = await buildData({ + link, + url: link, + title: `%title%`, + params: { + title: '裏垢女子まとめ', + }, + item: { + item: '.content-main .stream .stream-item', + title: `$('.post-account-group').text() + ' - %title%'`, + link: `$('.post-account-group').attr('href')`, + description: `$('.post .context').html()`, + pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`, + guid: `new Date($('.post-time').attr('datetime')).getTime()`, + }, + }); +}; diff --git a/lib/utils/common-config.js b/lib/utils/common-config.js new file mode 100644 index 0000000000..9822849d69 --- /dev/null +++ b/lib/utils/common-config.js @@ -0,0 +1,63 @@ +const cheerio = require('cheerio'); +const axios = require('./axios'); + +function transElemText($, prop) { + const regex = new RegExp(/\$\((.*)\)/g); + let result = prop; + if (regex.test(result)) { + result = eval(result); + } + return result; +} + +function replaceParams(data, prop, $) { + const regex = new RegExp(/%(.*)%/g); + let result = prop; + let group = regex.exec(prop); + while (group) { + // FIXME Multi vars + result = result.replace(group[0], transElemText($, data.params[group[1]])); + group = regex.exec(prop); + } + return result; +} + +function getProp(data, prop, $) { + let result = data; + if (Array.isArray(prop)) { + for (const e of prop) { + result = transElemText($, result[e]); + } + } else { + result = transElemText($, result[prop]); + } + return replaceParams(data, result, $); +} + +async function buildData(data) { + const response = (await axios.get(data.url)).data; + const $ = cheerio.load(response); + const $item = $(data.item.item); + // 这里应该是可以通过参数注入一些代码的,不过应该无伤大雅 + return { + title: getProp(data, 'title', $), + description: getProp(data, 'description', $), + item: $item + .map((_, e) => { + const $elem = (selector) => $(e).find(selector); + return { + title: getProp(data, ['item', 'title'], $elem), + description: getProp(data, ['item', 'description'], $elem), + pubDate: getProp(data, ['item', 'pubDate'], $elem), + link: getProp(data, ['item', 'link'], $elem), + guid: getProp(data, ['item', 'guid'], $elem), + }; + }) + .get(), + }; +} + +module.exports = buildData; +module.exports.transElemText = transElemText; +module.exports.replaceParams = replaceParams; +module.exports.getProp = getProp; diff --git a/lib/utils/date.js b/lib/utils/date.js index 19a45da006..c235f2ab01 100644 --- a/lib/utils/date.js +++ b/lib/utils/date.js @@ -7,6 +7,7 @@ module.exports = (html, timeZone = -serverOffset) => { if (/(\d+)分钟前/.exec(html)) { math = /(\d+)分钟前/.exec(html); date.setMinutes(date.getMinutes() - math[1]); + date.setSeconds(0); } else if (/(\d+)小时前/.exec(html)) { math = /(\d+)小时前/.exec(html); date.setHours(date.getHours() - math[1]); diff --git a/test/utils/common-config.js b/test/utils/common-config.js new file mode 100644 index 0000000000..39d26541db --- /dev/null +++ b/test/utils/common-config.js @@ -0,0 +1,38 @@ +const configUtils = require('../../lib/utils/common-config'); + +describe('index', () => { + it('transElemText', async () => { + const $ = () => 'RSSHub'; + expect(configUtils.transElemText($, '$()')).toBe('RSSHub'); + }); + + it('replaceParams', async () => { + const $ = () => 'RSSHub'; + const data = { + params: { + title: 'RSSHub', + }, + title: '%title%', + }; + expect(configUtils.replaceParams(data, data.title, $)).toBe('RSSHub'); + }); + + it('getProp', async () => { + const $ = () => 'RSSHub'; + const data = { + title: 'RSSHub', + }; + expect(configUtils.getProp(data, ['title'], $)).toBe('RSSHub'); + }); + + it('all', async () => { + const $ = () => 'RSSHub'; + const data = { + params: { + title: '$()', + }, + title: '%title%', + }; + expect(configUtils.getProp(data, ['title'], $)).toBe('RSSHub'); + }); +});