fix: fetch content with tryGet

This commit is contained in:
ikvarxt
2021-01-10 20:27:09 +08:00
parent c3dfbea0cc
commit 8202f67e1f

View File

@@ -1,67 +1,44 @@
// 以下代码参考 bupt/yz.js & bupt/utils.js 修改完成,在此感谢。
const got = require('@/utils/got');
const cheerio = require('cheerio');
const url = require('url');
module.exports = async(ctx) => {
const meta = {
selector: {
list: '.tr-ri ul',
item: 'li',
content: '.v_news_content',
},
url: 'http://jwzx.lntu.edu.cn/index/jwgg.htm',
}
const response = await got({
method: 'get',
url: `${meta.url}`,
});
const data = response.data;
const link = 'http://jwzx.lntu.edu.cn/index/jwgg.htm';
const response = await got.get(link);
const $ = cheerio.load(data);
const list = $(meta.selector.list).find(meta.selector.item).get();
const $ = cheerio.load(response.data);
const list = $('.tr-ri ul').find('li').get();
const res = await Promise.all(
list.map(async(item) => {
let $ = cheerio.load(item);
// 通过解析后的子项地址
const item_link = url.resolve(link, $('a').attr('href'));
const res = await ProcessFeed(list, meta);
const description = await ctx.cache.tryGet(item_link, async() => {
const result = await got.get(item_link);
$ = cheerio.load(result.data);
// remove style
$('img, div, span, p, table, td, tr').removeAttr('style');
$('style, script').remove();
return $('.v_news_content').html().replace(/(<span[^>]*>|<\/span>)/g, "");
});
const rssitem = {
title: $('title').text().split('-')[0],
description: description,
link: item_link,
author: '辽宁工程技术大学教务处',
};
return Promise.resolve(rssitem);
})
);
ctx.state.data = {
title: '辽宁工程技术大学教务公告',
link: `${meta.url}`,
link: link,
item: res,
};
};
const ProcessFeed = async(list, meta) =>
await Promise.all(
list.map(async(item) => {
let $ = cheerio.load(item);
// 通过解析过后的子项地址
const $url = url.resolve(meta.url, $('a').attr('href'));
// 加载内容页面
const response = await got({
method: 'get',
url: $url,
});
const data = response.data;
$ = cheerio.load(data); // 使用 cheerio 加载返回的 HTML
// 去除样式,好文明
$('img, div, span, p, table, td, tr').removeAttr('style');
$('style, script').remove();
// 截取掉「……教务处」
let title = $('title').text().split('-')[0];
// 列表上提取到的信息
return {
title: title,
description: $(meta.selector.content).html().replace(/<span*>(.*?)<\/span>/g, '$1').replace(/<span[^>]*>/g, '').replace(/<\/span[^>]*>/g, ''),
link: $url,
author: '辽宁工程技术大学教务处',
guid: $url, // 文章唯一标识
};
})
);
};