From c7aa447100514f06a43b0d69e001323c66eba37f Mon Sep 17 00:00:00 2001 From: Sxuet <52621721+1065464173@users.noreply.github.com> Date: Sat, 23 Apr 2022 19:16:16 +0800 Subject: [PATCH] =?UTF-8?q?feat(route):=20=E5=8C=97=E6=9E=81=E6=98=9F?= =?UTF-8?q?=E5=85=89=E4=BC=8F=20(#9542)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 新增北极星光伏网路由 * 规范化北极星光伏网路由 * 修改一些文本错误 * refactor: migrate to v2 --- docs/traditional-media.md | 16 +++++++- lib/router.js | 4 +- lib/routes/bjx/huanbao.js | 78 --------------------------------------- lib/v2/bjx/huanbao.js | 78 +++++++++++++++++++++++++++++++++++++++ lib/v2/bjx/maintainer.js | 4 ++ lib/v2/bjx/radar.js | 21 +++++++++++ lib/v2/bjx/router.js | 4 ++ lib/v2/bjx/types.js | 33 +++++++++++++++++ 8 files changed, 156 insertions(+), 82 deletions(-) delete mode 100644 lib/routes/bjx/huanbao.js create mode 100644 lib/v2/bjx/huanbao.js create mode 100644 lib/v2/bjx/maintainer.js create mode 100644 lib/v2/bjx/radar.js create mode 100644 lib/v2/bjx/router.js create mode 100644 lib/v2/bjx/types.js diff --git a/docs/traditional-media.md b/docs/traditional-media.md index 6c7675e49d..8e1dec72cb 100644 --- a/docs/traditional-media.md +++ b/docs/traditional-media.md @@ -469,9 +469,21 @@ Provides all of the articles by the specified Yahoo! author. ## 北极星电力网 -### 北极星环保 +### 环保要闻 - + + +### 光伏 + + + +`:type` 类型可选如下 + +| 要闻 | 政策 | 市场行情 | 企业动态 | 独家观点 | 项目工程 | 招标采购 | 财经 | 国际行情 | 价格趋势 | 技术跟踪 | +| -- | -- | ---- | ---- | ---- | ---- | ---- | -- | ---- | ---- | ---- | +| yw | zc | sc | mq | dj | xm | zb | cj | gj | sj | js | + + ## 财新网 diff --git a/lib/router.js b/lib/router.js index ee92965382..9976295fc9 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1445,8 +1445,8 @@ router.get('/banyuetan/:name', lazyloadRouteHandler('./routes/banyuetan')); // router.get('/people/xjpjh/:keyword?/:year?', lazyloadRouteHandler('./routes/people/xjpjh')); // router.get('/people/cpc/24h', lazyloadRouteHandler('./routes/people/cpc/24h')); -// 北极星电力网 -router.get('/bjx/huanbao', lazyloadRouteHandler('./routes/bjx/huanbao')); +// 北极星电力网 migrated to v2 +// router.get('/bjx/huanbao', lazyloadRouteHandler('./routes/bjx/huanbao')); // gamersky router.get('/gamersky/news', lazyloadRouteHandler('./routes/gamersky/news')); diff --git a/lib/routes/bjx/huanbao.js b/lib/routes/bjx/huanbao.js deleted file mode 100644 index 380fddfd2b..0000000000 --- a/lib/routes/bjx/huanbao.js +++ /dev/null @@ -1,78 +0,0 @@ -const got = require('@/utils/got'); -const date = require('@/utils/date'); -const cheerio = require('cheerio'); -const url = require('url'); -const iconv = require('iconv-lite'); - -module.exports = async (ctx) => { - const listURL = 'http://huanbao.bjx.com.cn/NewsList'; - const response = await got({ - method: 'get', - url: listURL, - }); - - const $ = cheerio.load(response.data); - const list = $('.list_main .list_left_ul a') - .map((_, a) => $(a).attr('href')) - .get(); - - const out = await Promise.all( - // 服务器禁止单个IP大并发访问,只能少返回几条 - list.slice(0, 3).map((link) => fetchPage(ctx, link)) - ); - - ctx.state.data = { - title: '北极星环保 - 环保行业垂直门户网站', - link: listURL, - item: out, - }; -}; - -async function fetchPage(ctx, link) { - const cache = await ctx.cache.get(link); - if (cache) { - return JSON.parse(cache); - } - - // 可能一篇文章过长会分成多页 - const pages = []; - - const result = await got.get(link, { responseType: 'buffer' }); - const $page = cheerio.load(iconv.decode(result.data, 'gbk')); - pages.push($page); - - // 如果是有分页链接,则使用顺序加载以保证顺序 - const pagelinks = $page('.list_detail div.page a'); - if (pagelinks.length > 0) { - for (let i = 0; i < pagelinks.length; i++) { - const $a = $page(pagelinks[i]); - if (!/^\d+$/.test($a.text().trim())) { - continue; - } - const sublink = url.resolve(link, $a.attr('href')); - /* eslint-disable no-await-in-loop */ - const result = await got.get(sublink, { responseType: 'buffer' }); - pages.push(cheerio.load(iconv.decode(result.data, 'gbk'))); - } - } - - // 将懒加载的loading图片转换为真实图片 - pages.forEach(($p) => { - $p('.list_detail') - .find('img[data-echo]') - .each((_, img) => { - const $img = $p(img); - $img.attr('src', $img.data('echo')).removeAttr('data-echo'); - }); - }); - - const item = { - title: $page('.list_detail > h1').text(), - description: pages.reduce((desc, $p) => desc + $p('.list_detail .newsrand').html(), ''), - pubDate: date($page('.list_detail .list_copy b').last().text()), - link, - author: $page('.list_detail .list_copy b').first().text(), - }; - ctx.cache.set(link, JSON.stringify(item)); - return item; -} diff --git a/lib/v2/bjx/huanbao.js b/lib/v2/bjx/huanbao.js new file mode 100644 index 0000000000..b3fc99cdbd --- /dev/null +++ b/lib/v2/bjx/huanbao.js @@ -0,0 +1,78 @@ +const got = require('@/utils/got'); +const { parseDate } = require('@/utils/parse-date'); +const cheerio = require('cheerio'); +const timezone = require('@/utils/timezone'); +const asyncPool = require('tiny-async-pool'); + +const asyncPoolAll = async (...args) => { + const results = []; + for await (const result of asyncPool(...args)) { + results.push(result); + } + return results; +}; + +module.exports = async (ctx) => { + const listURL = 'https://huanbao.bjx.com.cn/yw/'; + const response = await got(listURL); + + const $ = cheerio.load(response.data); + let items = $('.cc-layout-3 .cc-list-content li') + .toArray() + .map((e) => { + e = $(e); + return { + title: e.find('a').attr('title'), + link: e.find('a').attr('href'), + pubDate: parseDate(e.find('span').text()), + }; + }); + + items = await asyncPoolAll( + // 服务器禁止单个IP大并发访问,只能少返回几条 + 3, + items, + (items) => fetchPage(ctx, items.link) + ); + + ctx.state.data = { + title: '北极星环保 - 环保行业垂直门户网站', + link: listURL, + item: items, + }; +}; + +const fetchPage = (ctx, link) => + ctx.cache.tryGet(link, async () => { + // 可能一篇文章过长会分成多页 + const pages = []; + + const result = await got(link); + const $page = cheerio.load(result.data); + pages.push($page); + + // 如果是有分页链接,则使用顺序加载以保证顺序 + const pagelinks = $page('#article_cont .cc-paging a'); + + if (pagelinks.length > 0) { + for (let i = 0; i < pagelinks.length; i++) { + const $a = $page(pagelinks[i]); + if (!/^\d+$/.test($a.text().trim())) { + continue; + } + const sublink = new URL($a.attr('href'), link).href; + /* eslint-disable no-await-in-loop */ + const result = await got(sublink); + pages.push(cheerio.load(result.data)); + } + } + + const item = { + title: $page('title').text(), + description: pages.reduce((desc, $p) => desc + $p('.cc-article').html(), ''), + pubDate: timezone(parseDate($page('.cc-headline .box p span').eq(0).text()), +8), + link, + author: $page('.cc-headline .box p span').eq(1).text(), + }; + return item; + }); diff --git a/lib/v2/bjx/maintainer.js b/lib/v2/bjx/maintainer.js new file mode 100644 index 0000000000..8bbbcfbb51 --- /dev/null +++ b/lib/v2/bjx/maintainer.js @@ -0,0 +1,4 @@ +module.exports = { + '/gf/:type': ['Sxuet'], + '/huanbao': ['zsimple'], +}; diff --git a/lib/v2/bjx/radar.js b/lib/v2/bjx/radar.js new file mode 100644 index 0000000000..f3b50efa99 --- /dev/null +++ b/lib/v2/bjx/radar.js @@ -0,0 +1,21 @@ +module.exports = { + 'bjx.com.cn': { + _name: '北极星电力网', + guangfu: [ + { + title: '光伏 - 分类', + docs: 'https://docs.rsshub.app/traditional-media.html#bei-ji-xing-dian-li-wang', + source: ['/:type', '/'], + target: '/bjx/gf/:type?', + }, + ], + huanbao: [ + { + title: '环保要闻', + docs: 'https://docs.rsshub.app/traditional-media.html#bei-ji-xing-dian-li-wang', + source: ['/yw', '/'], + target: '/bjx/huanbao', + }, + ], + }, +}; diff --git a/lib/v2/bjx/router.js b/lib/v2/bjx/router.js new file mode 100644 index 0000000000..80de3f02bb --- /dev/null +++ b/lib/v2/bjx/router.js @@ -0,0 +1,4 @@ +module.exports = (router) => { + router.get('/gf/:type', require('./types')); + router.get('/huanbao', require('./huanbao')); +}; diff --git a/lib/v2/bjx/types.js b/lib/v2/bjx/types.js new file mode 100644 index 0000000000..32b9ffe638 --- /dev/null +++ b/lib/v2/bjx/types.js @@ -0,0 +1,33 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const { parseDate } = require('@/utils/parse-date'); + +module.exports = async (ctx) => { + const type = ctx.params.type; + const response = await got({ + method: 'get', + url: `https://guangfu.bjx.com.cn/${type}/`, + }); + const data = response.data; + const $ = cheerio.load(data); + const typeName = $('div.box2 em:last').text(); + const list = $('div.cc-list-content ul li'); + ctx.state.data = { + title: `北极星太阳能光大网${typeName}`, + description: $('meta[name="Description"]').attr('content'), + link: `https://guangfu.bjx.com.cn/${type}/`, + item: + list && + list + .map((index, item) => { + item = $(item); + return { + title: item.find('a').attr('title'), + description: item.html(), + link: item.find('a').attr('href'), + pubDate: parseDate(item.find('span').text()), + }; + }) + .get(), + }; +};