diff --git a/docs/university.md b/docs/university.md index 75c29ea119..4dd2b117af 100644 --- a/docs/university.md +++ b/docs/university.md @@ -2169,6 +2169,15 @@ type 列表: +## 云南师范大学 + +### 继续教育学院 + +#### 新闻 + + + + ## 浙江大学 ### 普通栏目 如学术 / 图片 / 新闻等 diff --git a/lib/router.js b/lib/router.js index a71c11b7ef..6a6ffec5de 100644 --- a/lib/router.js +++ b/lib/router.js @@ -803,6 +803,9 @@ router.get('/ynu/grs/qttz/:category', lazyloadRouteHandler('./routes/universitie router.get('/ynu/jwc/:category', lazyloadRouteHandler('./routes/universities/ynu/jwc/zytz')); router.get('/ynu/home', lazyloadRouteHandler('./routes/universities/ynu/home/main')); +// 云南师范大学 +router.get('/ynnu/edu/news', lazyloadRouteHandler('./routes/universities/ynnu/edu/news')); + // 昆明理工大学 router.get('/kmust/jwc/:type?', lazyloadRouteHandler('./routes/universities/kmust/jwc')); router.get('/kmust/job/careers/:type?', lazyloadRouteHandler('./routes/universities/kmust/job/careers')); diff --git a/lib/routes/universities/ynnu/edu/base64.js b/lib/routes/universities/ynnu/edu/base64.js new file mode 100644 index 0000000000..76790de6d1 --- /dev/null +++ b/lib/routes/universities/ynnu/edu/base64.js @@ -0,0 +1,351 @@ +// https://gitee.com/slice30k/base64-js + +const BASE64_MAPPING = [ + 'A', + 'B', + 'C', + 'D', + 'E', + 'F', + 'G', + 'H', + 'I', + 'J', + 'K', + 'L', + 'M', + 'N', + 'O', + 'P', + 'Q', + 'R', + 'S', + 'T', + 'U', + 'V', + 'W', + 'X', + 'Y', + 'Z', + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + '+', + '/', +]; +const URLSAFE_BASE64_MAPPING = [ + 'A', + 'B', + 'C', + 'D', + 'E', + 'F', + 'G', + 'H', + 'I', + 'J', + 'K', + 'L', + 'M', + 'N', + 'O', + 'P', + 'Q', + 'R', + 'S', + 'T', + 'U', + 'V', + 'W', + 'X', + 'Y', + 'Z', + 'a', + 'b', + 'c', + 'd', + 'e', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'm', + 'n', + 'o', + 'p', + 'q', + 'r', + 's', + 't', + 'u', + 'v', + 'w', + 'x', + 'y', + 'z', + '0', + '1', + '2', + '3', + '4', + '5', + '6', + '7', + '8', + '9', + '-', + '_', +]; + +const _toBinary = function (ascii) { + const binary = []; + while (ascii > 0) { + const b = ascii % 2; + ascii = Math.floor(ascii / 2); + binary.push(b); + } + binary.reverse(); + return binary; +}; + +const _toDecimal = function (binary) { + let dec = 0; + let p = 0; + for (let i = binary.length - 1; i >= 0; --i) { + const b = binary[i]; + if (b === 1) { + dec += Math.pow(2, p); + } + ++p; + } + return dec; +}; + +const _toUTF8Binary = function (c, binaryArray) { + const mustLen = 8 - (c + 1) + (c - 1) * 6; + const fatLen = binaryArray.length; + let diff = mustLen - fatLen; + while (--diff >= 0) { + binaryArray.unshift(0); + } + const binary = []; + let _c = c; + while (--_c >= 0) { + binary.push(1); + } + binary.push(0); + let i = 0; + const len = 8 - (c + 1); + for (; i < len; ++i) { + binary.push(binaryArray[i]); + } + + for (let j = 0; j < c - 1; ++j) { + binary.push(1); + binary.push(0); + let sum = 6; + while (--sum >= 0) { + binary.push(binaryArray[i++]); + } + } + return binary; +}; + +const _toBinaryArray = function (str) { + let binaryArray = []; + let i = 0; + const len = str.length; + for (; i < len; ++i) { + const unicode = str.charCodeAt(i); + const _tmpBinary = _toBinary(unicode); + if (unicode < 0x80) { + let _tmpdiff = 8 - _tmpBinary.length; + while (--_tmpdiff >= 0) { + _tmpBinary.unshift(0); + } + binaryArray = binaryArray.concat(_tmpBinary); + } else if (unicode >= 0x80 && unicode <= 0x7ff) { + binaryArray = binaryArray.concat(_toUTF8Binary(2, _tmpBinary)); + } else if (unicode >= 0x800 && unicode <= 0xffff) { + // UTF-8 3byte + binaryArray = binaryArray.concat(_toUTF8Binary(3, _tmpBinary)); + } else if (unicode >= 0x10000 && unicode <= 0x1fffff) { + // UTF-8 4byte + binaryArray = binaryArray.concat(_toUTF8Binary(4, _tmpBinary)); + } else if (unicode >= 0x200000 && unicode <= 0x3ffffff) { + // UTF-8 5byte + binaryArray = binaryArray.concat(_toUTF8Binary(5, _tmpBinary)); + } else if (unicode >= 4000000 && unicode <= 0x7fffffff) { + // UTF-8 6byte + binaryArray = binaryArray.concat(_toUTF8Binary(6, _tmpBinary)); + } + } + return binaryArray; +}; + +const _toUnicodeStr = function (binaryArray) { + let unicode; + let unicodeBinary = []; + let str = ''; + let i = 0; + const len = binaryArray.length; + for (; i < len; i++) { + if (binaryArray[i] === 0) { + unicode = _toDecimal(binaryArray.slice(i, i + 8)); + str += String.fromCharCode(unicode); + i += 8; + } else { + let sum = 0; + while (i < len) { + if (binaryArray[i] === 1) { + ++sum; + } else { + break; + } + ++i; + } + unicodeBinary = unicodeBinary.concat(binaryArray.slice(i + 1, i + 8 - sum)); + i += 8 - sum; + while (sum > 1) { + unicodeBinary = unicodeBinary.concat(binaryArray.slice(i + 2, i + 8)); + i += 8; + --sum; + } + unicode = _toDecimal(unicodeBinary); + str += String.fromCharCode(unicode); + unicodeBinary = []; + } + } + return str; +}; + +const _encode = function (str, url_safe) { + const base64_Index = []; + const binaryArray = _toBinaryArray(str); + const dictionary = url_safe ? URLSAFE_BASE64_MAPPING : BASE64_MAPPING; + + let extra_Zero_Count = 0; + for (let i = 0, len = binaryArray.length; i < len; i += 6) { + const diff = i + 6 - len; + if (diff === 2) { + extra_Zero_Count = 2; + } else if (diff === 4) { + extra_Zero_Count = 4; + } + let _tmpExtra_Zero_Count = extra_Zero_Count; + while (--_tmpExtra_Zero_Count >= 0) { + binaryArray.push(0); + } + base64_Index.push(_toDecimal(binaryArray.slice(i, i + 6))); + } + + let base64 = ''; + for (let i = 0, len = base64_Index.length; i < len; ++i) { + base64 += dictionary[base64_Index[i]]; + } + + for (let i = 0, len = extra_Zero_Count / 2; i < len; ++i) { + base64 += '='; + } + return base64; +}; + +const _decode = function (_base64Str, url_safe) { + const _len = _base64Str.length; + let extra_Zero_Count = 0; + const dictionary = url_safe ? URLSAFE_BASE64_MAPPING : BASE64_MAPPING; + + if (_base64Str.charAt(_len - 1) === '=') { + if (_base64Str.charAt(_len - 2) === '=') { + // 两个等号说明补了4个0 + extra_Zero_Count = 4; + _base64Str = _base64Str.substring(0, _len - 2); + } else { + // 一个等号说明补了2个0 + extra_Zero_Count = 2; + _base64Str = _base64Str.substring(0, _len - 1); + } + } + + let binaryArray = []; + let i = 0; + const len = _base64Str.length; + for (; i < len; ++i) { + const c = _base64Str.charAt(i); + let j = 0; + const size = dictionary.length; + for (; j < size; ++j) { + if (c === dictionary[j]) { + const _tmp = _toBinary(j); + + /* 不足6位的补0*/ + const _tmpLen = _tmp.length; + if (6 - _tmpLen > 0) { + for (let k = 6 - _tmpLen; k > 0; --k) { + _tmp.unshift(0); + } + } + binaryArray = binaryArray.concat(_tmp); + break; + } + } + } + if (extra_Zero_Count > 0) { + binaryArray = binaryArray.slice(0, binaryArray.length - extra_Zero_Count); + } + const str = _toUnicodeStr(binaryArray); + return str; +}; + +const __BASE64 = { + encode(str) { + return _encode(str, false); + }, + decode(base64Str) { + return _decode(base64Str, false); + }, + urlsafe_encode(str) { + return _encode(str, true); + }, + urlsafe_decode(base64Str) { + return _decode(base64Str, true); + }, +}; + +module.exports = __BASE64; diff --git a/lib/routes/universities/ynnu/edu/news.js b/lib/routes/universities/ynnu/edu/news.js new file mode 100644 index 0000000000..9756767b0a --- /dev/null +++ b/lib/routes/universities/ynnu/edu/news.js @@ -0,0 +1,109 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const { encode } = require('./base64'); +const timezone = require('@/utils/timezone'); + +module.exports = async (ctx) => { + const baseUrl = 'https://ostudy.ynnedu.cn/'; + const blacklist = ['ynnu-mv.html', 'https://chjiao.ynnu.edu.cn/zsgz/zsjz.htm', 'https://chjiao.ynnu.edu.cn/info/1006/1404.htm', 'https://chjiao.ynnu.edu.cn/zsgz/dksw.htm']; + const { body } = await got(baseUrl); + const $index = cheerio.load(body); + ctx.state.data = { + title: '云南师范大学继续教育学院 #通知', + link: baseUrl, + item: await Promise.all([ + // 顶部幻灯片抓取 + Promise.all( + $index('div[carousel-item] > div') + .filter((_, element) => !blacklist.includes($index(element).find('a').attr('href'))) + .map((_, element) => { + const $element = $index(element); + const $a = $element.find('a'); + let link = $a.attr('href'); + if (!$a.attr('href').startsWith('http')) { + link = new URL(link, baseUrl).toString(); + } + if (link.endsWith('.pdf')) { + return Promise.resolve({ + title: $element.find('p').text(), + link, + }); + } + return ctx.cache.tryGet(link, async () => { + const { body: articleBody } = await got(link); + const $article = cheerio.load(articleBody, { decodeEntities: false }); + return { + title: $article('#noticeSubject, #inform-content > div:nth-child(1)').text(), + link, + description: $article('#noticeContent, .oinform-content').html() + $article('.notice-attachment').html(), + }; + }); + }) + .get() + ), + // 中部新闻抓取 + Promise.all( + cheerio + .load($index('#serviceInform-tpl').html())('.news-li:not(:last-child)') + .filter((_, element) => !blacklist.includes($index(element).find('a').attr('href'))) + .map((_, element) => { + const $element = $index(element); + const $a = $element.find('a'); + let link = $a.attr('href'); + if (!$a.attr('href').startsWith('http')) { + link = new URL(link, baseUrl).toString(); + } + if (link.endsWith('.pdf')) { + return Promise.resolve({ + title: $a.text(), + link, + pubDate: parseDate($element.find('span').text(), '发布时间: YYYY-MM-DD'), + }); + } + return ctx.cache.tryGet(link, async () => { + const { body: articleBody } = await got(link); + const $article = cheerio.load(articleBody, { decodeEntities: false }); + return { + title: $article('#noticeSubject, #inform-content > div:nth-child(1)').text(), + link, + description: $article('#noticeContent, .oinform-content').html() + $article('.notice-attachment').html(), + pubDate: parseDate($element.find('span').text(), '发布时间: YYYY-MM-DD'), + }; + }); + }) + .get() + ), + got(`${baseUrl}/serviceNotice/queryRecentServiceNotices`, { + method: 'post', + headers: { + 'content-type': 'application/x-www-form-urlencoded', + }, + body: 'top=10&organizationCode=ORG-CN-EDU-YNNU', + }).then(({ data: { data } }) => + data.map(({ createTime, noticeSubject, noticeContent, noticeCode, publishUnit }) => ({ + title: noticeSubject, + description: noticeContent, + pubDate: timezone(parseDate(createTime), +8), + link: `${baseUrl}/notice-detail.html?c=${encode(noticeCode)}`, + author: publishUnit, + })) + ), + got(`${baseUrl}/newsInformation/queryRecentNewsInformations`, { + method: 'post', + headers: { + 'content-type': 'application/x-www-form-urlencoded', + }, + body: 'top=10&organizationCode=ORG-CN-EDU-YNNU', + }).then(({ data: { data } }) => + data.map(({ createTime, title, newsContent, newsCode, publishUnit }) => ({ + title, + description: newsContent, + pubDate: timezone(parseDate(createTime), +8), + link: `${baseUrl}/news-detail.html?c=${encode(newsCode)}`, + author: publishUnit, + })) + ), + ]).then((items) => items.flat()), + }; +};