From d5af636a4c5f301a54457340ec6a03c588f21efc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=96=87=E6=B2=9B=E4=B8=9C?= Date: Thu, 28 Jul 2022 23:39:48 +0800 Subject: [PATCH] =?UTF-8?q?feat(route):=20add=20HAFU=20=E6=B2=B3=E5=8D=97?= =?UTF-8?q?=E8=B4=A2=E6=94=BF=E9=87=91=E8=9E=8D=E5=AD=A6=E9=99=A2=20(#1033?= =?UTF-8?q?0)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(route): add HAFU 河南财政金融学院 * fix problems 1. fix route path in all files 2. fix radar target() 3. use camelCase 4. use ctx.cache.tryGet() 5. add "attachments" in description * 1.run format, 2.add Promise.all() --- docs/university.md | 12 ++ lib/v2/hafu/maintainer.js | 3 + lib/v2/hafu/news.js | 15 +++ lib/v2/hafu/radar.js | 23 ++++ lib/v2/hafu/router.js | 3 + lib/v2/hafu/templates/hafu.art | 3 + lib/v2/hafu/utils.js | 205 +++++++++++++++++++++++++++++++++ 7 files changed, 264 insertions(+) create mode 100644 lib/v2/hafu/maintainer.js create mode 100644 lib/v2/hafu/news.js create mode 100644 lib/v2/hafu/radar.js create mode 100644 lib/v2/hafu/router.js create mode 100644 lib/v2/hafu/templates/hafu.art create mode 100644 lib/v2/hafu/utils.js diff --git a/docs/university.md b/docs/university.md index 7734bec77d..e83f9868f6 100644 --- a/docs/university.md +++ b/docs/university.md @@ -1252,6 +1252,18 @@ category 列表: +## 河南财政金融学院 + +### 河南财政金融学院 + + + +| 校内公告通知 | 教务处公告通知 | 招生就业处公告通知 | +| ------ | ------- | --------- | +| ggtz | jwc | zsjyc | + + + ## 河南大学 ### 河南大学 diff --git a/lib/v2/hafu/maintainer.js b/lib/v2/hafu/maintainer.js new file mode 100644 index 0000000000..eaea891fd0 --- /dev/null +++ b/lib/v2/hafu/maintainer.js @@ -0,0 +1,3 @@ +module.exports = { + '/news/:tpye?': ['WenPeiTung'], +}; diff --git a/lib/v2/hafu/news.js b/lib/v2/hafu/news.js new file mode 100644 index 0000000000..dff7950845 --- /dev/null +++ b/lib/v2/hafu/news.js @@ -0,0 +1,15 @@ +const parseList = require('./utils'); + +module.exports = async (ctx) => { + // set default router type + const type = ctx.params.type ?? 'ggtz'; + + const { link, title, resultList } = await parseList(ctx, type); + + ctx.state.data = { + title, + link, + description: '河南财政金融学院 - 公告通知', + item: resultList, + }; +}; diff --git a/lib/v2/hafu/radar.js b/lib/v2/hafu/radar.js new file mode 100644 index 0000000000..9735b17087 --- /dev/null +++ b/lib/v2/hafu/radar.js @@ -0,0 +1,23 @@ +module.exports = { + 'hafu.edu.cn': { + _name: '河南财政金融学院', + www: [ + { + title: '河南财政金融学院 - 通知公告', + docs: 'https://docs.rsshub.app/university.html#he-nan-cai-zheng-jin-rong-xue-yuan', + source: '/*', + target: (params, url) => { + if (url.indexOf('www')) { + return '/hafu/news/ggtz'; + } + if (url.indexOf('jwc')) { + return '/hafu/news/jwc'; + } + if (url.indexOf('zsjyc')) { + return '/hafu/news/zsjyc'; + } + }, + }, + ], + }, +}; diff --git a/lib/v2/hafu/router.js b/lib/v2/hafu/router.js new file mode 100644 index 0000000000..c36fd4f1c8 --- /dev/null +++ b/lib/v2/hafu/router.js @@ -0,0 +1,3 @@ +module.exports = function (router) { + router.get('/news/:type?', require('./news')); +}; diff --git a/lib/v2/hafu/templates/hafu.art b/lib/v2/hafu/templates/hafu.art new file mode 100644 index 0000000000..5231e64c4a --- /dev/null +++ b/lib/v2/hafu/templates/hafu.art @@ -0,0 +1,3 @@ +{{ if articleBody }} + {{@ articleBody }} +{{ /if }} \ No newline at end of file diff --git a/lib/v2/hafu/utils.js b/lib/v2/hafu/utils.js new file mode 100644 index 0000000000..15073f16b5 --- /dev/null +++ b/lib/v2/hafu/utils.js @@ -0,0 +1,205 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); +const timezone = require('@/utils/timezone'); +const { art } = require('@/utils/render'); +const path = require('path'); + +const typeMap = { + ggtz: { url: 'https://www.hafu.edu.cn/index/ggtz.htm', root: 'https://www.hafu.edu.cn/', title: '河南财院 - 公告通知', parseFn: ggtzParse }, + jwc: { url: 'https://jwc.hafu.edu.cn/tzgg.htm', root: 'https://jwc.hafu.edu.cn/', title: '河南财院 教务处 - 公告通知', parseFn: jwcParse }, + zsjyc: { url: 'https://zsjyc.hafu.edu.cn/tztg.htm', root: 'https://zsjyc.hafu.edu.cn/', title: '河南财院 招生就业处 - 公告通知', parseFn: zsjycParse }, +}; +// Number of get articles +let limit = 10; + +module.exports = async (ctx, type) => { + const link = typeMap[type].url; + const title = typeMap[type].title; + + const response = await got(link); + const $ = cheerio.load(response.data); + + limit = ctx.query.limit || limit; + const resultList = await typeMap[type].parseFn(ctx, $); + + return { + title, + link, + resultList, + }; +}; + +async function tryGetFullText(href, link, type) { + let articleData = ''; + let description = ''; + // for some unexpected href link + try { + const articleRes = await got(link); + articleData = cheerio.load(articleRes.data); + // fullText + let articleBody = articleData('div[class=v_news_content]').html(); + // attachments + if (articleData('[id^=nattach]').length !== 0) { + articleBody = tryGetAttachments(articleData, articleBody, type); + } + + description = art(path.join(__dirname, 'templates/hafu.art'), articleBody)(); + } catch { + description = href; + } + + return { articleData, description }; +} + +function tryGetAttachments(articleData, articleBody, type) { + if (type === 'ggtz') { + articleData(`[id^=nattach]`) + .prev() + .map((_, item) => { + const href = articleData(item).attr('href').slice(1); + const link = typeMap.ggtz.root + href; + const title = articleData(item).text(); + articleBody += '
'; + articleBody += `${title}`; + return null; + }); + } else { + articleData('[id^=nattach]') + .parent() + .prev() + .map((_, item) => { + const href = articleData(item).find('a').attr('href').slice(1); + const link = typeMap[type].root + href; + const title = articleData(item).find('a').find('span').text(); + articleBody += '
'; + articleBody += ` ${title} `; + return null; + }); + } + + return articleBody; +} +// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate) +// B. got from articlePage 1.description(fullText), 2.article(author), 3.detailed(pubDate) +async function ggtzParse(ctx, $) { + const data = $('a[class=c269582]').parent().slice(0, limit); + const resultItems = await Promise.all( + data + .map(async (_, item) => { + // .slice(3) for cut out str '../' in original link + const href = $(item).find('a[class=c269582]').attr('href').slice(3); + const link = typeMap.ggtz.root + href; + const title = $(item).find('a[class=c269582]').attr('title'); + + const result = await ctx.cache.tryGet(link, async () => { + const { articleData, description } = await tryGetFullText(href, link, 'ggtz'); + let author = ''; + let pubDate = ''; + if (articleData instanceof Function) { + const header = articleData('h1').next().text(); + const index = header.indexOf('日期'); + + author = header.substring(0, index - 2) || ''; + + const date = header.substring(index + 3, index + 19); + pubDate = parseDate(date, 'YYYY-MM-DD HH:mm'); + } else { + const date = $(item).find('a[class=c269582_date]').text(); + pubDate = parseDate(date, 'YYYY-MM-DD'); + } + + return { + title, + description, + pubDate: timezone(pubDate, +8), + link, + author, + }; + }); + + return result; + }) + .get() + ); + + return resultItems; +} +// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate) +// B. got from articlePage 1.description(fullText), 2.article(author) +async function jwcParse(ctx, $) { + const data = $('a[class=c259713]').parent().parent().slice(0, limit); + const resultItems = await Promise.all( + data + .map(async (_, item) => { + const href = $(item).find('a[class=c259713]').attr('href'); + const link = typeMap.jwc.root + href; + const title = $(item).find('a[class=c259713]').attr('title'); + + const date = $(item).find('span[class=timestyle259713]').text(); + const pubDate = parseDate(date, 'YYYY/MM/DD'); + + const result = await ctx.cache.tryGet(link, async () => { + const { articleData, description } = await tryGetFullText(href, link, 'jwc'); + + let author = ''; + if (articleData instanceof Function) { + author = articleData('span[class=authorstyle259690]').text(); + } + + return { + title, + description, + pubDate: timezone(pubDate, +8), + link, + author: '供稿单位:' + author, + }; + }); + + return result; + }) + .get() + ); + + return resultItems; +} +// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate) +// B. got from articlePage 1.description(fullText), 2.detailed(pubDate) +async function zsjycParse(ctx, $) { + const data = $('a[class=c127701]').parent().parent().slice(0, limit); + const resultItems = await Promise.all( + data + .map(async (_, item) => { + const href = $(item).find('a[class=c127701]').attr('href'); + const link = typeMap.zsjyc.root + href; + + const title = $(item).find('a[class=c127701]').attr('title'); + + const result = await ctx.cache.tryGet(link, async () => { + const { articleData, description } = await tryGetFullText(href, link, 'zsjyc'); + + let pubDate = ''; + if (articleData instanceof Function) { + const date = articleData('span[class=timestyle127702]').text(); + pubDate = parseDate(date, 'YYYY-MM-DD HH:mm'); + } else { + const date = $(item).find('a[class=c269582_date]').text(); + pubDate = parseDate(date, 'YYYY-MM-DD'); + } + + return { + title, + description, + pubDate: timezone(pubDate, +8), + link, + author: '供稿单位:招生就业处', + }; + }); + + return result; + }) + .get() + ); + + return resultItems; +}