feat(route): add HAFU 河南财政金融学院 (#10330)

* feat(route): add HAFU 河南财政金融学院

* fix problems

1. fix route path in all files
2. fix radar target()
3. use camelCase
4. use ctx.cache.tryGet()
5. add "attachments" in description

* 1.run format, 2.add Promise.all()
This commit is contained in:
文沛东
2022-07-28 23:39:48 +08:00
committed by GitHub
parent e6f9689859
commit d5af636a4c
7 changed files with 264 additions and 0 deletions

View File

@@ -1252,6 +1252,18 @@ category 列表:
<Route author="plusmultiply0" example="/hhu/libNewsc" path="/hhu/libNewsc"/>
## 河南财政金融学院
### 河南财政金融学院
<Route author="WenPeiTung" example="/hafu/news/ggtz" path="/hafu/news/:type?" radar="1" rssbud="1" :paramsDesc="['分类,见下表(默认为 `ggtz`)']">
| 校内公告通知 | 教务处公告通知 | 招生就业处公告通知 |
| ------ | ------- | --------- |
| ggtz | jwc | zsjyc |
</Route>
## 河南大学
### 河南大学

View File

@@ -0,0 +1,3 @@
module.exports = {
'/news/:tpye?': ['WenPeiTung'],
};

15
lib/v2/hafu/news.js Normal file
View File

@@ -0,0 +1,15 @@
const parseList = require('./utils');
module.exports = async (ctx) => {
// set default router type
const type = ctx.params.type ?? 'ggtz';
const { link, title, resultList } = await parseList(ctx, type);
ctx.state.data = {
title,
link,
description: '河南财政金融学院 - 公告通知',
item: resultList,
};
};

23
lib/v2/hafu/radar.js Normal file
View File

@@ -0,0 +1,23 @@
module.exports = {
'hafu.edu.cn': {
_name: '河南财政金融学院',
www: [
{
title: '河南财政金融学院 - 通知公告',
docs: 'https://docs.rsshub.app/university.html#he-nan-cai-zheng-jin-rong-xue-yuan',
source: '/*',
target: (params, url) => {
if (url.indexOf('www')) {
return '/hafu/news/ggtz';
}
if (url.indexOf('jwc')) {
return '/hafu/news/jwc';
}
if (url.indexOf('zsjyc')) {
return '/hafu/news/zsjyc';
}
},
},
],
},
};

3
lib/v2/hafu/router.js Normal file
View File

@@ -0,0 +1,3 @@
module.exports = function (router) {
router.get('/news/:type?', require('./news'));
};

View File

@@ -0,0 +1,3 @@
{{ if articleBody }}
{{@ articleBody }}
{{ /if }}

205
lib/v2/hafu/utils.js Normal file
View File

@@ -0,0 +1,205 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');
const { art } = require('@/utils/render');
const path = require('path');
const typeMap = {
ggtz: { url: 'https://www.hafu.edu.cn/index/ggtz.htm', root: 'https://www.hafu.edu.cn/', title: '河南财院 - 公告通知', parseFn: ggtzParse },
jwc: { url: 'https://jwc.hafu.edu.cn/tzgg.htm', root: 'https://jwc.hafu.edu.cn/', title: '河南财院 教务处 - 公告通知', parseFn: jwcParse },
zsjyc: { url: 'https://zsjyc.hafu.edu.cn/tztg.htm', root: 'https://zsjyc.hafu.edu.cn/', title: '河南财院 招生就业处 - 公告通知', parseFn: zsjycParse },
};
// Number of get articles
let limit = 10;
module.exports = async (ctx, type) => {
const link = typeMap[type].url;
const title = typeMap[type].title;
const response = await got(link);
const $ = cheerio.load(response.data);
limit = ctx.query.limit || limit;
const resultList = await typeMap[type].parseFn(ctx, $);
return {
title,
link,
resultList,
};
};
async function tryGetFullText(href, link, type) {
let articleData = '';
let description = '';
// for some unexpected href link
try {
const articleRes = await got(link);
articleData = cheerio.load(articleRes.data);
// fullText
let articleBody = articleData('div[class=v_news_content]').html();
// attachments
if (articleData('[id^=nattach]').length !== 0) {
articleBody = tryGetAttachments(articleData, articleBody, type);
}
description = art(path.join(__dirname, 'templates/hafu.art'), articleBody)();
} catch {
description = href;
}
return { articleData, description };
}
function tryGetAttachments(articleData, articleBody, type) {
if (type === 'ggtz') {
articleData(`[id^=nattach]`)
.prev()
.map((_, item) => {
const href = articleData(item).attr('href').slice(1);
const link = typeMap.ggtz.root + href;
const title = articleData(item).text();
articleBody += '<br/>';
articleBody += `<a href=${link}>${title}</a>`;
return null;
});
} else {
articleData('[id^=nattach]')
.parent()
.prev()
.map((_, item) => {
const href = articleData(item).find('a').attr('href').slice(1);
const link = typeMap[type].root + href;
const title = articleData(item).find('a').find('span').text();
articleBody += '<br/>';
articleBody += `<a href=${link}> ${title} </a>`;
return null;
});
}
return articleBody;
}
// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate)
// B. got from articlePage 1.description(fullText), 2.article(author), 3.detailed(pubDate)
async function ggtzParse(ctx, $) {
const data = $('a[class=c269582]').parent().slice(0, limit);
const resultItems = await Promise.all(
data
.map(async (_, item) => {
// .slice(3) for cut out str '../' in original link
const href = $(item).find('a[class=c269582]').attr('href').slice(3);
const link = typeMap.ggtz.root + href;
const title = $(item).find('a[class=c269582]').attr('title');
const result = await ctx.cache.tryGet(link, async () => {
const { articleData, description } = await tryGetFullText(href, link, 'ggtz');
let author = '';
let pubDate = '';
if (articleData instanceof Function) {
const header = articleData('h1').next().text();
const index = header.indexOf('日期');
author = header.substring(0, index - 2) || '';
const date = header.substring(index + 3, index + 19);
pubDate = parseDate(date, 'YYYY-MM-DD HH:mm');
} else {
const date = $(item).find('a[class=c269582_date]').text();
pubDate = parseDate(date, 'YYYY-MM-DD');
}
return {
title,
description,
pubDate: timezone(pubDate, +8),
link,
author,
};
});
return result;
})
.get()
);
return resultItems;
}
// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate)
// B. got from articlePage 1.description(fullText), 2.article(author)
async function jwcParse(ctx, $) {
const data = $('a[class=c259713]').parent().parent().slice(0, limit);
const resultItems = await Promise.all(
data
.map(async (_, item) => {
const href = $(item).find('a[class=c259713]').attr('href');
const link = typeMap.jwc.root + href;
const title = $(item).find('a[class=c259713]').attr('title');
const date = $(item).find('span[class=timestyle259713]').text();
const pubDate = parseDate(date, 'YYYY/MM/DD');
const result = await ctx.cache.tryGet(link, async () => {
const { articleData, description } = await tryGetFullText(href, link, 'jwc');
let author = '';
if (articleData instanceof Function) {
author = articleData('span[class=authorstyle259690]').text();
}
return {
title,
description,
pubDate: timezone(pubDate, +8),
link,
author: '供稿单位:' + author,
};
});
return result;
})
.get()
);
return resultItems;
}
// A. got from hostPage 1.article(link), 2.article(title), 3.(pubDate)
// B. got from articlePage 1.description(fullText), 2.detailed(pubDate)
async function zsjycParse(ctx, $) {
const data = $('a[class=c127701]').parent().parent().slice(0, limit);
const resultItems = await Promise.all(
data
.map(async (_, item) => {
const href = $(item).find('a[class=c127701]').attr('href');
const link = typeMap.zsjyc.root + href;
const title = $(item).find('a[class=c127701]').attr('title');
const result = await ctx.cache.tryGet(link, async () => {
const { articleData, description } = await tryGetFullText(href, link, 'zsjyc');
let pubDate = '';
if (articleData instanceof Function) {
const date = articleData('span[class=timestyle127702]').text();
pubDate = parseDate(date, 'YYYY-MM-DD HH:mm');
} else {
const date = $(item).find('a[class=c269582_date]').text();
pubDate = parseDate(date, 'YYYY-MM-DD');
}
return {
title,
description,
pubDate: timezone(pubDate, +8),
link,
author: '供稿单位:招生就业处',
};
});
return result;
})
.get()
);
return resultItems;
}