From 36b870543c0d69d56c8aca7ad6970e99e49636c3 Mon Sep 17 00:00:00 2001 From: cssxsh <32539286+cssxsh@users.noreply.github.com> Date: Fri, 18 Oct 2019 11:57:52 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20add=20X-mol=20=E5=B9=B3=E5=8F=B0=20(#32?= =?UTF-8?q?79)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/study.md | 10 ++++++ lib/router.js | 4 +++ lib/routes/x-mol/news.js | 58 ++++++++++++++++++++++++++++++ lib/routes/x-mol/paper.js | 76 +++++++++++++++++++++++++++++++++++++++ lib/routes/x-mol/utils.js | 16 +++++++++ 5 files changed, 164 insertions(+) create mode 100644 lib/routes/x-mol/news.js create mode 100644 lib/routes/x-mol/paper.js create mode 100644 lib/routes/x-mol/utils.js diff --git a/docs/study.md b/docs/study.md index 2493d2a3a4..2dedf43c99 100644 --- a/docs/study.md +++ b/docs/study.md @@ -110,3 +110,13 @@ pageClass: routes ### 最新 + +## X-MOL 平台 + +### 新闻 + + + +### 期刊 + + diff --git a/lib/router.js b/lib/router.js index d32c396ff6..44a3e56488 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1833,6 +1833,10 @@ router.get('/kzfeed/topic/:id', require('./routes/kzfeed/topic')); // 腾讯新闻较真查证平台 router.get('/factcheck', require('./routes/tencent/factcheck')); +// X-MOL化学资讯平台 +router.get('/x-mol/news/:tag?', require('./routes/x-mol/news.js')); +router.get('/x-mol/paper/:type/:magazine', require('./routes/x-mol/paper')); + // 電撃Online router.get('/dengekionline/:type?', require('./routes/dengekionline/new')); diff --git a/lib/routes/x-mol/news.js b/lib/routes/x-mol/news.js new file mode 100644 index 0000000000..1a4a1336a5 --- /dev/null +++ b/lib/routes/x-mol/news.js @@ -0,0 +1,58 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const utils = require('./utils'); + +module.exports = async (ctx) => { + const tag = ctx.params.tag; + const path = tag ? `/news/tag/${tag}` : '/news/index'; + const response = await got(path, { + method: 'GET', + baseUrl: utils.host, + }); + const data = response.data; + const $ = cheerio.load(data); + + const title = $('title').text(); + const description = $('meta[name="description"]').attr('content'); + const newsitem = $('.newsitem'); + + const item = newsitem + .map((index, element) => { + const title = $(element) + .find('h3') + .find('a') + .text(); + const a = $(element) + .find('p') + .find('a'); + const link = utils.host + a.attr('href'); + const image = $(element) + .find('img') + .attr('src'); + const description = utils.setDesc(image, a.text()); + const span = $(element).find('.space-right-m30'); + const author = span + .text() + .replace('来源:', '') + .trim(); + const date = utils.getDate(span.next().text()); + const pubDate = utils.transDate(date); + + const single = { + title: title, + link: link, + description: description, + author: author, + pubDate: pubDate, + }; + return single; + }) + .get(); + + ctx.state.data = { + title: title, + link: response.url, + description: description, + item: item, + }; +}; diff --git a/lib/routes/x-mol/paper.js b/lib/routes/x-mol/paper.js new file mode 100644 index 0000000000..0c88ffe27d --- /dev/null +++ b/lib/routes/x-mol/paper.js @@ -0,0 +1,76 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const utils = require('./utils'); + +module.exports = async (ctx) => { + const type = ctx.params.type; + const magazine = ctx.params.magazine; + const path = `/paper/${type}/${magazine}`; + const response = await got(path, { + method: 'GET', + baseUrl: utils.host, + headers: { + Cookie: 'closeFloatWindow=true; journalIndexViewType=list; journalSort=publishDate', + }, + }); + const data = response.data; + const $ = cheerio.load(data); + + const title = $('title').text(); + const description = $('meta[name="description"]').attr('content'); + const newsitem = $('.magazine-text'); + + const item = await Promise.all( + newsitem + .map(async (index, element) => { + const news = $(element); + + const a = news.find('.magazine-text-title').find('a'); + const title = a.text(); + const link = utils.host + a.attr('href'); + + const imageId = news + .find('.magazine-pic') + .attr('id') + .substring(9); + const getLink = utils.host + '/attachment/getImgUrl'; + const noPic = utils.host + '/css/images/nothesispic.jpg'; + const imageUrl = await ctx.cache.tryGet(getLink, async () => { + const result = await got.get(getLink, { + params: { + attachmentId: imageId, + }, + }); + return result.data; + }); + const image = imageUrl || noPic; + const text = $(element) + .find('.magazine-description') + .text(); + const description = utils.setDesc(image, text); + + const span = news.find('.magazine-text-atten'); + const arr = span.map((index, element) => $(element).text()).get(); + const author = arr[1]; + const date = utils.getDate(arr[0]); + const pubDate = utils.transDate(date); + + const single = { + title: title, + link: link, + description: description, + author: author, + pubDate: pubDate, + }; + return Promise.resolve(single); + }) + .get() + ); + + ctx.state.data = { + title: title, + link: response.url, + description: description, + item: item, + }; +}; diff --git a/lib/routes/x-mol/utils.js b/lib/routes/x-mol/utils.js new file mode 100644 index 0000000000..dbe61a473b --- /dev/null +++ b/lib/routes/x-mol/utils.js @@ -0,0 +1,16 @@ +const XmolUtils = { + host: 'https://www.x-mol.com', + transDate: (date) => new Date(`${date} GMT+0800`).toUTCString(), + getDate: (text) => { + const reg = /[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])/; + if (typeof text === 'string') { + const arr = text.match(reg); + return arr && text.match(reg)[0]; + } else { + return null; + } + }, + setDesc: (image, text) => `

${text}

`, +}; + +module.exports = XmolUtils;