diff --git a/docs/university.md b/docs/university.md index db7c37ffae..153245d61a 100644 --- a/docs/university.md +++ b/docs/university.md @@ -163,6 +163,16 @@ +## 北京邮电大学 + + + +| 综合 | 信息与通信工程学院 | 电子工程学院 | 计算机学院 | 自动化学院 | 软件学院 | 数字媒体与设计艺术学院 | 网络空间安全学院 | 理学院 | 经济管理学院 | 人文学院 | 马克思主义学院 | 网络技术研究院 | 信息光子学与光通信研究院 | +| ---- | ------------------ | ------------ | ---------- | ---------- | -------- | ---------------------- | ---------------- | ------ | ------------ | -------- | -------------- | -------------- | ------------------------ | +| all | sice | see | scs | sa | sse | sdmda | scss | sci | sem | sh | mtri | int | ipoc | + + + ## 南京邮电大学 diff --git a/lib/router.js b/lib/router.js index 9835185863..f41a02bf4d 100755 --- a/lib/router.js +++ b/lib/router.js @@ -1247,4 +1247,7 @@ router.get('/tencent/guyu/channel/:name', require('./routes/tencent/guyu/channel // 21财经 router.get('/21caijing/channel/:name', require('./routes/21caijing/channel')); +// 北京邮电大学 +router.get('/bupt/yz/:type', require('./routes/universities/bupt/yz')); + module.exports = router; diff --git a/lib/routes/universities/bupt/utils.js b/lib/routes/universities/bupt/utils.js new file mode 100644 index 0000000000..e9859112bd --- /dev/null +++ b/lib/routes/universities/bupt/utils.js @@ -0,0 +1,79 @@ +const cheerio = require('cheerio'); +const axios = require('../../../utils/axios'); +const url = require('url'); + +const ProcessFeed = async (list, cache, current, type) => + await Promise.all( + list + .filter(function(item) { + // 如果不包含链接说明不是新闻item,如表头的tr + const $ = cheerio.load(item); + if ($('a').length > 0) { + return true; + } + return false; + + // return typeof ($('a').attr('href')) !== undefined; + // return false; + }) + .map(async (item) => { + let $ = cheerio.load(item); + + const $url = url.resolve(current.url, $('a').attr('href')); + + // 加载新闻内容页面 + const response = await axios({ + method: 'get', + url: $url, + }); + + const data = response.data; + $ = cheerio.load(data); // 使用 cheerio 加载返回的 HTML + + // 还原图片地址 + $('img').each((index, elem) => { + const $elem = $(elem); + const src = $elem.attr('src'); + if (src && src !== '') { + $elem.attr('src', url.resolve(current.url, src)); + } + }); + // 还原链接地址 + $('a').each((index, elem) => { + const $elem = $(elem); + const src = $elem.attr('href'); + if (src && src !== '') { + $elem.attr('href', url.resolve(current.url, src)); + } + }); + // 去除样式 + $('img').removeAttr('style'); + $('div').removeAttr('style'); + $('span').removeAttr('style'); + $('p').removeAttr('style'); + $('table').removeAttr('style'); + $('td').removeAttr('style'); + $('tr').removeAttr('style'); + $('style').remove(); + $('script').remove(); + + let title = ''; + if (type === 'sci') { + title = $('#timu').text(); + } else { + title = $('title').text(); + } + // 列表上提取到的信息 + return { + title: title, + description: $(current.selector.content).html(), + link: $url, + author: '北京邮电大学研究院', + guid: $url, // 文章唯一标识 + }; + }) + ); + +module.exports = { + ProcessFeed, +}; diff --git a/lib/routes/universities/bupt/yz.js b/lib/routes/universities/bupt/yz.js new file mode 100644 index 0000000000..a41991f2ad --- /dev/null +++ b/lib/routes/universities/bupt/yz.js @@ -0,0 +1,157 @@ +const axios = require('../../../utils/axios'); +const cheerio = require('cheerio'); +const util = require('./utils'); + +// const date = require('../../utils/date'); +/* 研究生官网招生信息*/ + +module.exports = async (ctx) => { + const type = ctx.params.type; + + const struct = { + all: { + selector: { + list: '.list ul', + item: 'li', + content: '.aticle', + }, + url: 'https://yzb.bupt.edu.cn/list/list.php?p=2_1_1', + }, + sice: { + selector: { + list: '.list', + item: 'li', + content: '#content', + }, + url: 'https://sice.bupt.edu.cn/xygk/zygg.htm', + }, + see: { + selector: { + list: '.list', + item: 'li', + content: '.article', + }, + url: 'https://see.bupt.edu.cn/list/list.php?p=1_2_1', + }, + scs: { + selector: { + list: '#ctl00_ctl00_ph_content_ph_2_content_noteMainPagePanel1_noteMainPage1_GridView1 tbody', + item: 'tr', + content: '#conten_2', + }, + url: 'https://scs.bupt.edu.cn/cs_web/', + }, + sa: { + selector: { + list: '.list', + item: 'li', + content: '.v_news_content', + }, + url: 'https://sa.bupt.edu.cn/index/ybgl.htm', + }, + sse: { + selector: { + list: '.main_conRCb ul', + item: 'li', + content: '.main_conDiv', + }, + url: 'https://sse.bupt.edu.cn/jxgl/jwtzygg.htm', + }, + sdmda: { + selector: { + list: '.list', + item: 'li', + content: '.article', + }, + url: 'https://sdmda.bupt.edu.cn/?cat=33', + }, + scss: { + selector: { + list: '.main_conRCb ul', + item: 'li', + content: '.main_conDiv', + }, + url: 'https://scss.bupt.edu.cn/zsjy1/yjszs1.htm', + }, + sci: { + selector: { + list: '#right_common ul', + item: 'li', + content: '#newscontent', + }, + url: 'https://sci.bupt.edu.cn/list/list.php?p=2_6_1', + }, + sem: { + selector: { + list: '#zs .index_list_lu', + item: 'li', + content: '#vsb_content', + }, + url: 'https://sem.bupt.edu.cn/index.htm', + }, + sh: { + selector: { + list: '.list ul', + item: 'li', + content: '.content', + }, + url: 'https://sh.bupt.edu.cn/list/list.php?p=3_3_1', + }, + mtri: { + selector: { + list: '.list ul', + item: 'li', + content: '.content', + }, + url: 'https://mtri.bupt.edu.cn/list/list.php?p=1_3_1', + }, + int: { + selector: { + list: '.list ul', + item: 'li', + content: '.content', + }, + url: 'https://int.bupt.edu.cn/list/list.php?p=1_3_1', + }, + ipoc: { + selector: { + list: '.content', + item: 'li', + content: '#content', + }, + url: 'https://ipoc.bupt.edu.cn/zytz.htm', + }, + }; + + const url = struct[type].url; + const response = await axios({ + method: 'get', + url: url, + headers: { + Referer: 'https://www.baidu.com/', + }, + }); + const data = response.data; + + const $ = cheerio.load(data); // 使用 cheerio 加载返回的 HTML + const list = $(struct[type].selector.list) + .find(struct[type].selector.item) + .get(); + + const name = $('title').text(); + const result = await util.ProcessFeed(list, ctx.cache, struct[type], type); + + // 使用 cheerio 选择器,选择 class="note-list" 下的所有 "li"元素,返回 cheerio node 对象数组 + // cheerio get() 方法将 cheerio node 对象数组转换为 node 对象数组 + + // 注:每一个 cheerio node 对应一个 HTML DOM + // 注:cheerio 选择器与 jquery 选择器几乎相同 + // 参考 cheerio 文档:https://cheerio.js.org/ + + ctx.state.data = { + title: `${name}又有更新了`, + link: `${url}`, + description: `${name}`, + item: result, + }; +};