增加 深圳大学研究生招生通知 (#2019)

* 增加 深圳大学研究生招生通知

* 对全文内容使用缓存

* code prettified

* fix cache implementation

* remove duplicate logging output

* update university.md
This commit is contained in:
NagaruZ
2019-05-03 23:21:50 +08:00
committed by DIYgod
parent 5f9db472db
commit 13b6b947fc
4 changed files with 152 additions and 0 deletions

View File

@@ -709,6 +709,16 @@ https://rsshub.app/**nuist**/`bulletin` 或 https://rsshub.app/**nuist**/`bullet
<Route name="广东海洋大学" author="Xiaotouming" example="/gdoujwc" path="/gdoujwc"/> <Route name="广东海洋大学" author="Xiaotouming" example="/gdoujwc" path="/gdoujwc"/>
## 深圳大学
<Route name="深圳大学研究生招生网" author="NagaruZ" example="/szu/yz/1" path="/universities/szu/yz/:type?" :paramsDesc="['默认为1']" >
| 研究生 | 博士生 |
| ------ | ------ |
| 1 | 2 |
</Route>
## MIT ## MIT
<Route name="MIT graduateadmissions's all blogs" author="LogicJake" example="/mit/graduateadmissions/index/all" path="/universities/mit/graduateadmissions/index/all"/> <Route name="MIT graduateadmissions's all blogs" author="LogicJake" example="/mit/graduateadmissions/index/all" path="/universities/mit/graduateadmissions/index/all"/>

View File

@@ -742,6 +742,9 @@ router.get('/shu/jwc/:type?', require('./routes/universities/shu/jwc'));
// 北京科技大学天津学院 // 北京科技大学天津学院
router.get('/ustb/tj/news/:type?', require('./routes/universities/ustb/tj/news')); router.get('/ustb/tj/news/:type?', require('./routes/universities/ustb/tj/news'));
// 深圳大学
router.get('/szu/yz/:type?', require('./routes/universities/szu/yz'));
// ifanr // ifanr
router.get('/ifanr/:channel?', require('./routes/ifanr/index')); router.get('/ifanr/:channel?', require('./routes/ifanr/index'));

View File

@@ -0,0 +1,50 @@
const axios = require('../../../../utils/axios');
const cheerio = require('cheerio');
const util = require('./utils');
const map = new Map([[1, { title: '硕士招生 - 深圳大学研究生招生网' }], [2, { title: '博士招生 - 深圳大学研究生招生网' }]]);
module.exports = async (ctx) => {
let type = Number.parseInt(ctx.params.type);
const struct = {
1: {
selector: {
list: '.list',
item: 'li',
content: '#vsb_content',
},
url: 'https://yz.szu.edu.cn/sszs/gg.htm',
},
2: {
selector: {
list: '.list',
item: 'li',
content: '#vsb_content',
},
url: 'https://yz.szu.edu.cn/bszs/gg.htm',
},
};
if (type !== 1 && type !== 2) {
// fallback to default
type = 1;
}
const url = struct[type].url;
const response = await axios.get(url);
const data = response.data;
const $ = cheerio.load(data);
const list = $(struct[type].selector.list)
.find(struct[type].selector.item)
.get();
const name = $('title').text();
const result = await util.ProcessFeed(list, ctx.cache, struct[type]);
ctx.state.data = {
title: map.get(type).title,
link: `${url}`,
description: `${name}`,
item: result,
};
};

View File

@@ -0,0 +1,89 @@
const cheerio = require('cheerio');
const axios = require('../../../../utils/axios');
const url = require('url');
const ProcessFeed = async (list, cache, current) =>
await Promise.all(
list
.filter(function(item) {
// 如果不包含链接说明不是新闻item如表头的tr
const $ = cheerio.load(item);
if ($('a').length > 0) {
return true;
}
return false;
// return typeof ($('a').attr('href')) !== undefined;
// return false;
})
.map(async (item) => {
let $ = cheerio.load(item);
const $url = url.resolve(current.url, $('a').attr('href'));
const key = $url;
// 检查缓存中是否存在该页面
const value = await cache.get(key);
if (value) {
// 查询返回未过期缓存
return JSON.parse(value);
} else {
// 加载新闻内容页面
const response = await axios.get($url);
const data = response.data;
$ = cheerio.load(data); // 使用 cheerio 加载返回的 HTML
// 还原图片地址
$('img').each((index, elem) => {
const $elem = $(elem);
const src = $elem.attr('src');
if (src && src !== '') {
$elem.attr('src', url.resolve(current.url, src));
}
});
// 还原链接地址
$('a').each((index, elem) => {
const $elem = $(elem);
const src = $elem.attr('href');
if (src && src !== '') {
$elem.attr('href', url.resolve(current.url, src));
}
});
// 去除样式
$('img').removeAttr('style');
$('div').removeAttr('style');
$('span').removeAttr('style');
$('p').removeAttr('style');
$('table').removeAttr('style');
$('td').removeAttr('style');
$('tr').removeAttr('style');
$('style').remove();
$('script').remove();
const title = $('h2').text();
const single = {
title: title,
description: $(current.selector.content).html(),
link: $url,
pubDate: new Date(
$('div.ny_fbt')
.text()
.substr(6, 16)
).toUTCString(), // 混有发表时间和点击量,取出时间
author: '深圳大学研究生招生网',
guid: $url, // 文章唯一标识
};
// 将内容写入缓存
cache.set(key, JSON.stringify(single), 24 * 60 * 60); // 缓存时间为24h
// 返回列表上提取到的信息
return single;
}
})
);
module.exports = {
ProcessFeed,
};