From 00c4c6a37d4a0623c17140fc5312fbe2296596e8 Mon Sep 17 00:00:00 2001 From: cerebrater <49020242+cerebrater@users.noreply.github.com> Date: Mon, 8 Apr 2019 11:00:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8F=B4=20Matters.News=20(#1845)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 有關獲取主頁中的最新訊息(`matters/latest`),因網站未能提供相應接口,且需模仿 click button,故只能採用 `puppeteer`。 --- docs/social-media.md | 10 ++++++++ lib/router.js | 7 ++++++ lib/routes/matters/author.js | 48 ++++++++++++++++++++++++++++++++++++ lib/routes/matters/hot.js | 48 ++++++++++++++++++++++++++++++++++++ lib/routes/matters/latest.js | 34 +++++++++++++++++++++++++ lib/routes/matters/tags.js | 48 ++++++++++++++++++++++++++++++++++++ lib/routes/matters/topics.js | 48 ++++++++++++++++++++++++++++++++++++ 7 files changed, 243 insertions(+) create mode 100644 lib/routes/matters/author.js create mode 100644 lib/routes/matters/hot.js create mode 100644 lib/routes/matters/latest.js create mode 100644 lib/routes/matters/tags.js create mode 100644 lib/routes/matters/topics.js diff --git a/docs/social-media.md b/docs/social-media.md index d280e40945..354ce44eac 100644 --- a/docs/social-media.md +++ b/docs/social-media.md @@ -469,3 +469,13 @@ + +## Matters + + + + + + + + diff --git a/lib/router.js b/lib/router.js index 0d4ac3b2ab..1c010bb5db 100755 --- a/lib/router.js +++ b/lib/router.js @@ -1221,6 +1221,13 @@ router.get('/checkee/:dispdate', require('./routes/checkee/index')); // 艾瑞 router.get('/iresearch/report', require('./routes/iresearch/report')); +// Matters +router.get('/matters/topics', require('./routes/matters/topics')); +router.get('/matters/latest', require('./routes/matters/latest')); +router.get('/matters/hot', require('./routes/matters/hot')); +router.get('/matters/tags/:tid', require('./routes/matters/tags')); +router.get('/matters/author/:uid', require('./routes/matters/author')); + // MobData router.get('/mobdata/report', require('./routes/mobdata/report')); diff --git a/lib/routes/matters/author.js b/lib/routes/matters/author.js new file mode 100644 index 0000000000..d6f93df20d --- /dev/null +++ b/lib/routes/matters/author.js @@ -0,0 +1,48 @@ +const cheerio = require('cheerio'); +const axios = require('../../utils/axios'); + +module.exports = async (ctx) => { + const url = `https://matters.news/@${ctx.params.uid}`; + + const res = await axios.get(url); + const $ = cheerio.load(res.data); + const list = $('section.jsx-1110843272.container').get(); + + const proList = []; + const indexList = []; + + const out = await Promise.all( + list.map(async (item, i) => { + const $ = cheerio.load(item); + const time = $('time').attr('datetime'); + const title = $('h2.jsx-71409154.feed').text(); + const postfix = encodeURI($('a.jsx-1110843272').attr('href')); + const address = `https://matters.news${postfix}`; + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const single = { + title, + pubDate: new Date(time).toUTCString(), + link: address, + guid: address, + }; + proList.push(axios.get(address)); + indexList.push(i); + return Promise.resolve(single); + }) + ); + const responses = await axios.all(proList); + for (let i = 0; i < responses.length; i++) { + const res = responses[i]; + const $ = cheerio.load(res.data); + out[indexList[i]].description = $('.jsx-2372748515.u-content').html(); + ctx.cache.set(out[indexList[i]].link, JSON.stringify(out[i]), 24 * 60 * 60); + } + ctx.state.data = { + title: $('title').text(), + link: url, + item: out, + }; +}; diff --git a/lib/routes/matters/hot.js b/lib/routes/matters/hot.js new file mode 100644 index 0000000000..0636e082df --- /dev/null +++ b/lib/routes/matters/hot.js @@ -0,0 +1,48 @@ +const cheerio = require('cheerio'); +const axios = require('../../utils/axios'); + +module.exports = async (ctx) => { + const url = `https://matters.news/`; + + const res = await axios.get(url); + const $ = cheerio.load(res.data); + const list = $('section.jsx-1110843272.container').get(); + + const proList = []; + const indexList = []; + + const out = await Promise.all( + list.map(async (item, i) => { + const $ = cheerio.load(item); + const time = $('time').attr('datetime'); + const title = $('h2.jsx-71409154.feed').text(); + const partial = encodeURI($('a.jsx-1110843272').attr('href')); + const completeUrl = `https://matters.news${partial}`; + const cache = await ctx.cache.get(completeUrl); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const single = { + title, + pubDate: new Date(time).toUTCString(), + link: completeUrl, + guid: completeUrl, + }; + proList.push(axios.get(completeUrl)); + indexList.push(i); + return Promise.resolve(single); + }) + ); + const responses = await axios.all(proList); + for (let i = 0; i < responses.length; i++) { + const res = responses[i]; + const $ = cheerio.load(res.data); + out[indexList[i]].description = $('.jsx-2372748515.u-content').html(); + ctx.cache.set(out[indexList[i]].link, JSON.stringify(out[i]), 24 * 60 * 60); + } + ctx.state.data = { + title: $('title').text(), + link: url, + item: out, + }; +}; diff --git a/lib/routes/matters/latest.js b/lib/routes/matters/latest.js new file mode 100644 index 0000000000..15302006d5 --- /dev/null +++ b/lib/routes/matters/latest.js @@ -0,0 +1,34 @@ +const axios = require('../../utils/axios'); + +module.exports = async (ctx) => { + const response = await axios({ + method: 'post', + url: 'https://server.matters.news/', + data: { + operationName: 'NewestFeed', + variables: { + hasArticleDigestActionAuthor: false, + hasArticleDigestActionBookmark: true, + hasArticleDigestActionTopicScore: false, + }, + query: + 'query NewestFeed($cursor: String, $hasArticleDigestActionAuthor: Boolean = false, $hasArticleDigestActionBookmark: Boolean = true, $hasArticleDigestActionTopicScore: Boolean = false) {\n viewer {\n id\n recommendation {\n feed: newest(input: {first: 10, after: $cursor}) {\n ...FeedArticleConnection\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment FeedArticleConnection on ArticleConnection {\n pageInfo {\n startCursor\n endCursor\n hasNextPage\n __typename\n }\n edges {\n cursor\n node {\n ...FeedDigestArticle\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment FeedDigestArticle on Article {\n id\n title\n slug\n cover\n summary\n mediaHash\n live\n author {\n id\n userName\n ...UserDigestMiniUser\n __typename\n }\n ...DigestActionsArticle\n ...FingerprintArticle\n __typename\n}\n\nfragment UserDigestMiniUser on User {\n id\n userName\n displayName\n ...AvatarUser\n __typename\n}\n\nfragment AvatarUser on User {\n avatar\n __typename\n}\n\nfragment DigestActionsArticle on Article {\n author {\n ...UserDigestMiniUser @include(if: $hasArticleDigestActionAuthor)\n __typename\n }\n createdAt\n ...MATArticle\n ...CommentCountArticle\n ...BookmarkArticle @include(if: $hasArticleDigestActionBookmark)\n ...TopicScoreArticle @include(if: $hasArticleDigestActionTopicScore)\n ...StateActionsArticle\n __typename\n}\n\nfragment MATArticle on Article {\n MAT\n __typename\n}\n\nfragment CommentCountArticle on Article {\n comments(input: {first: 0}) {\n totalCount\n __typename\n }\n __typename\n}\n\nfragment BookmarkArticle on Article {\n id\n subscribed\n __typename\n}\n\nfragment TopicScoreArticle on Article {\n topicScore\n __typename\n}\n\nfragment StateActionsArticle on Article {\n state\n __typename\n}\n\nfragment FingerprintArticle on Article {\n id\n dataHash\n __typename\n}\n', + }, + }); + + console.log(response.data); + const items = response.data.data.viewer.recommendation.feed.edges.map(({ node }) => { + const link = `https://matters.news/@${node.author.userName}/${encodeURIComponent(node.slug)}-${node.mediaHash}`; + + return { + title: node.title, + link, + description: node.summary, + }; + }); + + ctx.state.data = { + title: 'Matters | 最新文章', + item: items, + }; +}; diff --git a/lib/routes/matters/tags.js b/lib/routes/matters/tags.js new file mode 100644 index 0000000000..1d466b065e --- /dev/null +++ b/lib/routes/matters/tags.js @@ -0,0 +1,48 @@ +const cheerio = require('cheerio'); +const axios = require('../../utils/axios'); + +module.exports = async (ctx) => { + const url = `https://matters.news/tags/${ctx.params.tid}`; + + const res = await axios.get(url); + const $ = cheerio.load(res.data); + const list = $('section.jsx-1110843272.container').get(); + + const proList = []; + const indexList = []; + + const out = await Promise.all( + list.map(async (item, i) => { + const $ = cheerio.load(item); + const time = $('time').attr('datetime'); + const title = $('h2.jsx-71409154.feed').text(); + const postfix = encodeURI($('a.jsx-1110843272').attr('href')); + const address = `https://matters.news${postfix}`; + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const single = { + title, + pubDate: new Date(time).toUTCString(), + link: address, + guid: address, + }; + proList.push(axios.get(address)); + indexList.push(i); + return Promise.resolve(single); + }) + ); + const responses = await axios.all(proList); + for (let i = 0; i < responses.length; i++) { + const res = responses[i]; + const $ = cheerio.load(res.data); + out[indexList[i]].description = $('.jsx-2372748515.u-content').html(); + ctx.cache.set(out[indexList[i]].link, JSON.stringify(out[i]), 24 * 60 * 60); + } + ctx.state.data = { + title: $('title').text(), + link: url, + item: out, + }; +}; diff --git a/lib/routes/matters/topics.js b/lib/routes/matters/topics.js new file mode 100644 index 0000000000..8e0b5d0bbe --- /dev/null +++ b/lib/routes/matters/topics.js @@ -0,0 +1,48 @@ +const cheerio = require('cheerio'); +const axios = require('../../utils/axios'); + +module.exports = async (ctx) => { + const url = `https://matters.news/topics`; + + const res = await axios.get(url); + const $ = cheerio.load(res.data); + const list = $('section.jsx-1110843272.container').get(); + + const proList = []; + const indexList = []; + + const out = await Promise.all( + list.map(async (item, i) => { + const $ = cheerio.load(item); + const time = $('time').attr('datetime'); + const title = $('h2.jsx-71409154.feed').text(); + const postfix = encodeURI($('a.jsx-1110843272').attr('href')); + const address = `https://matters.news${postfix}`; + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const single = { + title, + pubDate: new Date(time).toUTCString(), + link: address, + guid: address, + }; + proList.push(axios.get(address)); + indexList.push(i); + return Promise.resolve(single); + }) + ); + const responses = await axios.all(proList); + for (let i = 0; i < responses.length; i++) { + const res = responses[i]; + const $ = cheerio.load(res.data); + out[indexList[i]].description = $('.jsx-2372748515.u-content').html(); + ctx.cache.set(out[indexList[i]].link, JSON.stringify(out[i]), 24 * 60 * 60); + } + ctx.state.data = { + title: $('title').text(), + link: url, + item: out, + }; +};