feat(route): 北极星光伏 (#9542)

* 新增北极星光伏网路由

* 规范化北极星光伏网路由

* 修改一些文本错误

* refactor: migrate to v2
This commit is contained in:
Sxuet
2022-04-23 19:16:16 +08:00
committed by GitHub
parent ac11c9e2f5
commit c7aa447100
8 changed files with 156 additions and 82 deletions

View File

@@ -469,9 +469,21 @@ Provides all of the articles by the specified Yahoo! author.
## 北极星电力网
### 北极星环保
### 环保要闻
<Route author="zsimple" example="/bjx/huanbao" path="/bjx/huanbao" />
<Route author="zsimple" example="/bjx/huanbao" path="/bjx/huanbao" radar="1" rssbud="1"/>
### 光伏
<Route author="Sxuet" example="/bjx/gf/sc" path="/bjx/gf/:type" :paramsDesc="['分类,北极星光伏最后的`type`字段']" radar="1" rssbud="1"/>
`:type` 类型可选如下
| 要闻 | 政策 | 市场行情 | 企业动态 | 独家观点 | 项目工程 | 招标采购 | 财经 | 国际行情 | 价格趋势 | 技术跟踪 |
| -- | -- | ---- | ---- | ---- | ---- | ---- | -- | ---- | ---- | ---- |
| yw | zc | sc | mq | dj | xm | zb | cj | gj | sj | js |
</Route>
## 财新网

View File

@@ -1445,8 +1445,8 @@ router.get('/banyuetan/:name', lazyloadRouteHandler('./routes/banyuetan'));
// router.get('/people/xjpjh/:keyword?/:year?', lazyloadRouteHandler('./routes/people/xjpjh'));
// router.get('/people/cpc/24h', lazyloadRouteHandler('./routes/people/cpc/24h'));
// 北极星电力网
router.get('/bjx/huanbao', lazyloadRouteHandler('./routes/bjx/huanbao'));
// 北极星电力网 migrated to v2
// router.get('/bjx/huanbao', lazyloadRouteHandler('./routes/bjx/huanbao'));
// gamersky
router.get('/gamersky/news', lazyloadRouteHandler('./routes/gamersky/news'));

View File

@@ -1,78 +0,0 @@
const got = require('@/utils/got');
const date = require('@/utils/date');
const cheerio = require('cheerio');
const url = require('url');
const iconv = require('iconv-lite');
module.exports = async (ctx) => {
const listURL = 'http://huanbao.bjx.com.cn/NewsList';
const response = await got({
method: 'get',
url: listURL,
});
const $ = cheerio.load(response.data);
const list = $('.list_main .list_left_ul a')
.map((_, a) => $(a).attr('href'))
.get();
const out = await Promise.all(
// 服务器禁止单个IP大并发访问只能少返回几条
list.slice(0, 3).map((link) => fetchPage(ctx, link))
);
ctx.state.data = {
title: '北极星环保 - 环保行业垂直门户网站',
link: listURL,
item: out,
};
};
async function fetchPage(ctx, link) {
const cache = await ctx.cache.get(link);
if (cache) {
return JSON.parse(cache);
}
// 可能一篇文章过长会分成多页
const pages = [];
const result = await got.get(link, { responseType: 'buffer' });
const $page = cheerio.load(iconv.decode(result.data, 'gbk'));
pages.push($page);
// 如果是有分页链接,则使用顺序加载以保证顺序
const pagelinks = $page('.list_detail div.page a');
if (pagelinks.length > 0) {
for (let i = 0; i < pagelinks.length; i++) {
const $a = $page(pagelinks[i]);
if (!/^\d+$/.test($a.text().trim())) {
continue;
}
const sublink = url.resolve(link, $a.attr('href'));
/* eslint-disable no-await-in-loop */
const result = await got.get(sublink, { responseType: 'buffer' });
pages.push(cheerio.load(iconv.decode(result.data, 'gbk')));
}
}
// 将懒加载的loading图片转换为真实图片
pages.forEach(($p) => {
$p('.list_detail')
.find('img[data-echo]')
.each((_, img) => {
const $img = $p(img);
$img.attr('src', $img.data('echo')).removeAttr('data-echo');
});
});
const item = {
title: $page('.list_detail > h1').text(),
description: pages.reduce((desc, $p) => desc + $p('.list_detail .newsrand').html(), ''),
pubDate: date($page('.list_detail .list_copy b').last().text()),
link,
author: $page('.list_detail .list_copy b').first().text(),
};
ctx.cache.set(link, JSON.stringify(item));
return item;
}

78
lib/v2/bjx/huanbao.js Normal file
View File

@@ -0,0 +1,78 @@
const got = require('@/utils/got');
const { parseDate } = require('@/utils/parse-date');
const cheerio = require('cheerio');
const timezone = require('@/utils/timezone');
const asyncPool = require('tiny-async-pool');
const asyncPoolAll = async (...args) => {
const results = [];
for await (const result of asyncPool(...args)) {
results.push(result);
}
return results;
};
module.exports = async (ctx) => {
const listURL = 'https://huanbao.bjx.com.cn/yw/';
const response = await got(listURL);
const $ = cheerio.load(response.data);
let items = $('.cc-layout-3 .cc-list-content li')
.toArray()
.map((e) => {
e = $(e);
return {
title: e.find('a').attr('title'),
link: e.find('a').attr('href'),
pubDate: parseDate(e.find('span').text()),
};
});
items = await asyncPoolAll(
// 服务器禁止单个IP大并发访问只能少返回几条
3,
items,
(items) => fetchPage(ctx, items.link)
);
ctx.state.data = {
title: '北极星环保 - 环保行业垂直门户网站',
link: listURL,
item: items,
};
};
const fetchPage = (ctx, link) =>
ctx.cache.tryGet(link, async () => {
// 可能一篇文章过长会分成多页
const pages = [];
const result = await got(link);
const $page = cheerio.load(result.data);
pages.push($page);
// 如果是有分页链接,则使用顺序加载以保证顺序
const pagelinks = $page('#article_cont .cc-paging a');
if (pagelinks.length > 0) {
for (let i = 0; i < pagelinks.length; i++) {
const $a = $page(pagelinks[i]);
if (!/^\d+$/.test($a.text().trim())) {
continue;
}
const sublink = new URL($a.attr('href'), link).href;
/* eslint-disable no-await-in-loop */
const result = await got(sublink);
pages.push(cheerio.load(result.data));
}
}
const item = {
title: $page('title').text(),
description: pages.reduce((desc, $p) => desc + $p('.cc-article').html(), ''),
pubDate: timezone(parseDate($page('.cc-headline .box p span').eq(0).text()), +8),
link,
author: $page('.cc-headline .box p span').eq(1).text(),
};
return item;
});

4
lib/v2/bjx/maintainer.js Normal file
View File

@@ -0,0 +1,4 @@
module.exports = {
'/gf/:type': ['Sxuet'],
'/huanbao': ['zsimple'],
};

21
lib/v2/bjx/radar.js Normal file
View File

@@ -0,0 +1,21 @@
module.exports = {
'bjx.com.cn': {
_name: '北极星电力网',
guangfu: [
{
title: '光伏 - 分类',
docs: 'https://docs.rsshub.app/traditional-media.html#bei-ji-xing-dian-li-wang',
source: ['/:type', '/'],
target: '/bjx/gf/:type?',
},
],
huanbao: [
{
title: '环保要闻',
docs: 'https://docs.rsshub.app/traditional-media.html#bei-ji-xing-dian-li-wang',
source: ['/yw', '/'],
target: '/bjx/huanbao',
},
],
},
};

4
lib/v2/bjx/router.js Normal file
View File

@@ -0,0 +1,4 @@
module.exports = (router) => {
router.get('/gf/:type', require('./types'));
router.get('/huanbao', require('./huanbao'));
};

33
lib/v2/bjx/types.js Normal file
View File

@@ -0,0 +1,33 @@
const got = require('@/utils/got');
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
module.exports = async (ctx) => {
const type = ctx.params.type;
const response = await got({
method: 'get',
url: `https://guangfu.bjx.com.cn/${type}/`,
});
const data = response.data;
const $ = cheerio.load(data);
const typeName = $('div.box2 em:last').text();
const list = $('div.cc-list-content ul li');
ctx.state.data = {
title: `北极星太阳能光大网${typeName}`,
description: $('meta[name="Description"]').attr('content'),
link: `https://guangfu.bjx.com.cn/${type}/`,
item:
list &&
list
.map((index, item) => {
item = $(item);
return {
title: item.find('a').attr('title'),
description: item.html(),
link: item.find('a').attr('href'),
pubDate: parseDate(item.find('span').text()),
};
})
.get(),
};
};