Files
RSSHub/lib/v2/sdu/extractor/view.js
Levi Zim 958be6266e feat(route): 山东大学(威海)新闻网 (#9537)
* feat(sduwh): add extractors.

* feat(route): add route for 山东大学(威海)新闻网

* docs: for route sduwh/news

* docs: for route sduwh/news

(cherry picked from commit 831830167a)

* feat(radar): for route 山东大学(威海)新闻网

* refactor: change `got.get` to `got`.

* refactor: prefer `parseDate()` to `new Date()`

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: incomplete URL substring sanitization.

Make CodeQL happy.

* fix(radar): fix target field.

* fix: change route /sduwh to /sdu/wh

* fix: remove superfluous slash character in url.

* feat: look for exact date first.

* feat: extract exact date from news extractor.

* feat: extract exact date from view extractor.

* feat: extractor for www.sdrj.sdu.edu.cn

* refactor: semantic separation of sduwh with sdu

* feat(radar): more accurate name

* docs: update documentation

* refactor: migrate to v2

* refactor: fix deprecated url.resolve

* fix: update docs url

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: sdu not working routes

* fix: accurate `ctx.state.data.url`

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: better error handling for extractors.

* fix: timezone

Co-authored-by: Tony <TonyRL@users.noreply.github.com>

* fix: better error handling.

Co-authored-by: Tony <TonyRL@users.noreply.github.com>
2022-04-17 00:01:39 +08:00

22 lines
1002 B
JavaScript

const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone');
module.exports = async (link, ctx) =>
await ctx.cache.tryGet(link, async () => {
let content, author, exactDate;
try {
const result = await got(link);
const $ = cheerio.load(result.data);
content = $('#vsb_content').html();
author = $("form[name='_newscontent_fromname'] > div > p:last-of-type").text();
const exactDateLine = $('.news_tit > p:last-child').text();
const exactDateText = exactDateLine.match(/^发布日期:(?<date>\d+年\d+月\d+日\s\d{2}:\d{2})/).groups.date;
exactDate = timezone(parseDate(exactDateText, 'YYYY年MM月DD日 HH:mm'), +8);
return { description: content, author, exactDate };
} catch (e) {
return { description: content, author, exactDate };
}
});