The cherrio config route (#1699)

Use js config too build rss

close #349 close #1035
This commit is contained in:
SettingDust
2019-03-11 10:47:09 +08:00
committed by DIYgod
parent b559b7664f
commit f8990b7cb7
8 changed files with 207 additions and 0 deletions

View File

@@ -3112,3 +3112,9 @@ type 为 all 时category 参数不支持 cost 和 free
### 决胜网
<route name="最新资讯" author="WenryXu" example="/juesheng" path="/juesheng"/>
### 裏垢女子まとめ
<route name="主页" author="SettingDust" example="/uraaka-joshi" path="/uraaka-joshi"/>
<route name="用户" author="SettingDust" example="/uraaka-joshi/_rrwq" path="/uraaka-joshi/:id" :paramsDesc="['用户名']"/>

View File

@@ -258,6 +258,61 @@ sidebar: auto
// 注:由于此路由只是起到一个新专栏上架提醒的作用,无法访问付费文章,因此没有文章正文
```
4. **使用通用配置型路由**
很大一部分网站是可以通过一个配置范式来生成 RSS 的。
通用配置即通过 cherrio**CSS 选择器、jQuery 函数**)读取 json 数据来简便的生成 RSS。
首先我们需要几个数据:
1. RSS 来源链接
2. 数据来源链接
3. RSS 标题(非 item 标题)
```js
const buildData = require('../../utils/common-config');
module.exports = async (ctx) => {
ctx.state.data = await buildData({
link: RSS来源链接,
url: 数据来源链接,
title: '%title%', //这里使用了变量,形如 **%xxx%** 这样的会被解析为变量,值为 **params** 下的同名值
params: {
title: RSS标题,
},
});
};
```
至此,我们的 RSS 还没有任何内容,内容需要由`item`完成,也是核心部分,需要有 CSS 选择器以及 jQuery 的函数知识(请去 W3School 学习)
下面为一个实例
建议在打开[此链接](https://www.uraaka-joshi.com/)的开发者工具之后再阅读以下内容,请善用开发者工具的搜索功能搜寻`$('xxx')`中的内容
```js
const buildData = require('../../utils/common-config');
module.exports = async (ctx) => {
const link = `https://www.uraaka-joshi.com/`;
ctx.state.data = await buildData({
link,
url: link,
title: `%title%`,
params: {
title: '裏垢女子まとめ',
},
item: {
item: '.content-main .stream .stream-item',
title: `$('.post-account-group').text() + ' - %title%'`, //只支持$().xxx()这样的js语句也足够使用
link: `$('.post-account-group').attr('href')`, //.text()代表获取元素的文本attr()表示获取指定属性
description: `$('.post .context').html()`, // .html()代表获取元素的html代码
pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`, // 日期的格式多种多样,可以尝试使用**/utils/date**
guid: `new Date($('.post-time').attr('datetime')).getTime()`, // guid必须唯一这是RSS的不同item的标志
},
});
};
```
至此我们完成了一个最简单的路由
---
#### 使用缓存

View File

@@ -1121,4 +1121,8 @@ router.get('/luogu/daily/:id?', require('./routes/luogu/daily'));
// 决胜网
router.get('/juesheng', require('./routes/juesheng'));
// 裏垢女子まとめ
router.get('/uraaka-joshi', require('./routes/uraaka-joshi/uraaka-joshi'));
router.get('/uraaka-joshi/:id', require('./routes/uraaka-joshi/uraaka-joshi-user'));
module.exports = router;

View File

@@ -0,0 +1,19 @@
const buildData = require('../../utils/common-config');
module.exports = async (ctx) => {
const params = ctx.params;
const link = `https://www.uraaka-joshi.com/users/${params.id}`;
ctx.state.data = await buildData({
link,
url: link,
title: `$('.top-profile-card-name-link').text() + '@${params.id} - 裏垢女子まとめ'`,
item: {
item: '.content-main .stream .stream-item',
title: `$('.post-name').text() + '@${params.id} - 裏垢女子まとめ'`,
link: `https://www.uraaka-joshi.com/users/${params.id}`,
description: `$('.post .context').html()`,
pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`,
guid: `new Date($('.post-time').attr('datetime')).getTime()`,
},
});
};

View File

@@ -0,0 +1,21 @@
const buildData = require('../../utils/common-config');
module.exports = async (ctx) => {
const link = `https://www.uraaka-joshi.com/`;
ctx.state.data = await buildData({
link,
url: link,
title: `%title%`,
params: {
title: '裏垢女子まとめ',
},
item: {
item: '.content-main .stream .stream-item',
title: `$('.post-account-group').text() + ' - %title%'`,
link: `$('.post-account-group').attr('href')`,
description: `$('.post .context').html()`,
pubDate: `new Date($('.post-time').attr('datetime')).toUTCString()`,
guid: `new Date($('.post-time').attr('datetime')).getTime()`,
},
});
};

View File

@@ -0,0 +1,63 @@
const cheerio = require('cheerio');
const axios = require('./axios');
function transElemText($, prop) {
const regex = new RegExp(/\$\((.*)\)/g);
let result = prop;
if (regex.test(result)) {
result = eval(result);
}
return result;
}
function replaceParams(data, prop, $) {
const regex = new RegExp(/%(.*)%/g);
let result = prop;
let group = regex.exec(prop);
while (group) {
// FIXME Multi vars
result = result.replace(group[0], transElemText($, data.params[group[1]]));
group = regex.exec(prop);
}
return result;
}
function getProp(data, prop, $) {
let result = data;
if (Array.isArray(prop)) {
for (const e of prop) {
result = transElemText($, result[e]);
}
} else {
result = transElemText($, result[prop]);
}
return replaceParams(data, result, $);
}
async function buildData(data) {
const response = (await axios.get(data.url)).data;
const $ = cheerio.load(response);
const $item = $(data.item.item);
// 这里应该是可以通过参数注入一些代码的,不过应该无伤大雅
return {
title: getProp(data, 'title', $),
description: getProp(data, 'description', $),
item: $item
.map((_, e) => {
const $elem = (selector) => $(e).find(selector);
return {
title: getProp(data, ['item', 'title'], $elem),
description: getProp(data, ['item', 'description'], $elem),
pubDate: getProp(data, ['item', 'pubDate'], $elem),
link: getProp(data, ['item', 'link'], $elem),
guid: getProp(data, ['item', 'guid'], $elem),
};
})
.get(),
};
}
module.exports = buildData;
module.exports.transElemText = transElemText;
module.exports.replaceParams = replaceParams;
module.exports.getProp = getProp;

View File

@@ -7,6 +7,7 @@ module.exports = (html, timeZone = -serverOffset) => {
if (/(\d+)分钟前/.exec(html)) {
math = /(\d+)分钟前/.exec(html);
date.setMinutes(date.getMinutes() - math[1]);
date.setSeconds(0);
} else if (/(\d+)小时前/.exec(html)) {
math = /(\d+)小时前/.exec(html);
date.setHours(date.getHours() - math[1]);

View File

@@ -0,0 +1,38 @@
const configUtils = require('../../lib/utils/common-config');
describe('index', () => {
it('transElemText', async () => {
const $ = () => 'RSSHub';
expect(configUtils.transElemText($, '$()')).toBe('RSSHub');
});
it('replaceParams', async () => {
const $ = () => 'RSSHub';
const data = {
params: {
title: 'RSSHub',
},
title: '%title%',
};
expect(configUtils.replaceParams(data, data.title, $)).toBe('RSSHub');
});
it('getProp', async () => {
const $ = () => 'RSSHub';
const data = {
title: 'RSSHub',
};
expect(configUtils.getProp(data, ['title'], $)).toBe('RSSHub');
});
it('all', async () => {
const $ = () => 'RSSHub';
const data = {
params: {
title: '$()',
},
title: '%title%',
};
expect(configUtils.getProp(data, ['title'], $)).toBe('RSSHub');
});
});