diff --git a/docs/en/journal.md b/docs/en/journal.md index 108ead85de..9bde2e886c 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -8,15 +8,14 @@ pageClass: routes ### Cell Journal - + -> Current Issue (default) +| `:category` | Query Type | Route | +| :---------: | :---------------------: | ---------------------------------------- | +| current | Current Issue (default) | [/cell/cell/current](/cell/cell/current) | +| inpress | Articles in press | [/cell/cell/inpress](/cell/cell/inpress) | -`/cell/cell/current` - -> Articles in press - -`/cell/cell/inpress` + ### eLife - Latest Research-ALL @@ -28,7 +27,46 @@ pageClass: routes ### Nature Journal - Latest Research - + + +| `:journal` | Full Name of the Journal | Route | +| :-----------: | :-------------------------: | ---------------------------------------------------------------- | +| nature | Nature | [/nature/research/nature](/nature/research/nature) | +| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) | +| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) | +| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) | +| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) | +| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) | +| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) | +| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) | + +- Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group. + If the `:journal` parameter is blank, then latest research of Nature will return. +- The journals from NPG are run by different group of people, and the website of may not be consitent for all the journals +- Only the abstract section is rendered + + + +### Nature Journal - News & Comment + + + +| `:journal` | Full Name of the Journal | Route | +| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- | +| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) | +| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) | +| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) | +| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) | +| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) | +| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) | +| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) | + +- Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group. +- The journals from NPG are run by different group of people, and the website of may not be consitent for all the journals + + ### Nature Journal - News @@ -38,26 +76,6 @@ pageClass: routes -### Nature Genetics (ng) - Latest Research - - - -### Nature Methods (nmeth) - Latest Research - - - -### Nature Biotechnology (nbt) - Latest Research - - - -### Nature Neuroscience (neuro) - Latest Research - - - -### Nature Machine Intelligence (natmachintell) - Latest Research - - - ### Proceedings of The National Academy of Sciences (PNAS) - Latest Articles - ALL @@ -68,15 +86,29 @@ pageClass: routes ### Science Journal - Current Issue - + + +| `:journal` | Full Name of the Journal | Route | +| :--------: | :----------------------------: | ---------------------------------------------------------------- | +| science | Science | [/sciencemag/current/science](/sciencemag/current/science) | +| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) | +| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) | +| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) | +| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) | +| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) | + +- Using router (`/sciencemag/current/` + “short name for a journal”) to query current issue of a journal form AAAS. + leave the parameter blank(`/sciencemag/current`)to get update from Science. + + ### Science Journal - First Release - + -### Science Advances - Current Issue +_only support Science Journal_ - + ## Search Engine @@ -84,10 +116,6 @@ pageClass: routes -### X-MOL Platform - News - - - ### X-MOL Platform - Journal diff --git a/docs/en/study.md b/docs/en/study.md index eb5b433a79..318e69a4ca 100644 --- a/docs/en/study.md +++ b/docs/en/study.md @@ -33,6 +33,12 @@ pageClass: routes | columns | columns=健康 | string / undefined | | columns | columns=virus | string / undefined | +## X-MOL + +### News + + + ## ZhiShiFenZi ### News diff --git a/docs/journal.md b/docs/journal.md index dee2472800..3368d9d347 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -10,10 +10,6 @@ pageClass: routes -### X-MOL 平台-新闻 - - - ### X-MOL 平台-期刊 @@ -41,13 +37,12 @@ pageClass: routes -> 本期刊物 (默认选项) +| `:category` | 类型说明 | 路由 | +| :---------: | :-----------------: | ---------------------------------------- | +| current | 本期刊物 (默认选项) | [/cell/cell/current](/cell/cell/current) | +| inpress | 在线发表 | [/cell/cell/inpress](/cell/cell/inpress) | -`/cell/cell/current` - -> 在线发表 - -`/cell/cell/inpress` + ### eLife-最新成果-综合 @@ -57,9 +52,49 @@ pageClass: routes -### Nature 主刊-最新成果 +### Nature 系列-最新成果 - + + +| `:journal` | 期刊名 | 路由 | +| :-----------: | :-------------------------: | ---------------------------------------------------------------- | +| nature | Nature | [/nature/research/nature](/nature/research/nature) | +| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) | +| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) | +| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) | +| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) | +| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) | +| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) | +| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) | + +- 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章(Latest Research)。 + 若参数置空(`/nature/research`),则默认获取主刊(Nature)的最新文章。 +- 由于 Nature 系列的刊物是分别由不同的编辑来独立运营,所以页面格式上有些差异。目前**仅**对以下杂志进行了测试。 +- 由于权限的限制,目前仅获取论文的摘要进行展示。 + + + +### Nature-新闻及评论 + + + +| `:journal` | 期刊名 | 路由 | +| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- | +| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) | +| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) | +| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) | +| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) | +| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) | +| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) | +| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) | + +- 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章(Latest Research)。 + 主刊由于格式不同,该 router 并未支持,采用 `/nature/nature/news` 来获取新闻。 +- 由于 Nature 系列的刊物是分别由不同的编辑来独立运营,所以页面格式上有些差异。目前**仅**对以下杂志进行了测试。 + + ### Nature 主刊-新闻动态 @@ -69,26 +104,6 @@ pageClass: routes -### Nature Genetics (ng)-最新成果 - - - -### Nature Methods (nmeth)-最新成果 - - - -### Nature Biotechnology (nbt)-最新成果 - - - -### Nature Neuroscience (neuro)-最新成果 - - - -### Nature Machine Intelligence (natmachintell)-最新成果 - - - ### PNAS-最新文章(全部) @@ -97,14 +112,28 @@ pageClass: routes -### Science 主刊-本期刊物 +### Science 系列-本期刊物 - + + +| `:journal` | 期刊名 | 路由 | +| :--------: | :----------------------------: | ---------------------------------------------------------------- | +| science | Science | [/sciencemag/current/science](/sciencemag/current/science) | +| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) | +| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) | +| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) | +| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) | +| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) | + +- 通过 `/sciencemag/current/` + “杂志简写”来获取对应杂志最新一期的文章(Current Issue)。 + 若参数置空(`/sciencemag/current`),则默认获取主刊(Science)的最新文章。 + + ### Science 主刊-在线发表 - + -### Science Advances-本期刊物 +_仅支持 Science 主刊_ - + diff --git a/docs/study.md b/docs/study.md index c1b9ff9869..bc567524c0 100644 --- a/docs/study.md +++ b/docs/study.md @@ -14,6 +14,12 @@ pageClass: routes +## X-MOL 平台 + +### 新闻 + + + ## 领研 ### 论文 diff --git a/lib/router.js b/lib/router.js index 446369df16..3abd608bed 100644 --- a/lib/router.js +++ b/lib/router.js @@ -2007,22 +2007,15 @@ router.get('/pnas/:tid', require('./routes/pnas/topic')); // cell [Sci Journal] router.get('/cell/cell/:category', require('./routes/cell/cell/index')); -// nature [Sci Journal] -router.get('/nature/nature/research', require('./routes/nature/nature/research')); -router.get('/nature/nature/news', require('./routes/nature/nature/news')); -router.get('/nature/nature/highlight', require('./routes/nature/nature/highlight')); -// nature 子刊 [Sci Journal] -router.get('/nature/nmeth/research', require('./routes/nature/nmeth/research')); -router.get('/nature/ng/research', require('./routes/nature/ng/research')); -router.get('/nature/nbt/research', require('./routes/nature/nbt/research')); -router.get('/nature/neuro/research', require('./routes/nature/neuro/research')); -router.get('/nature/natmachintell/research', require('./routes/nature/natmachintell/research')); +// nature + nature 子刊 [Sci Journal] +router.get('/nature/research/:journal?', require('./routes/nature/research')); +router.get('/nature/news-and-comment/:journal?', require('./routes/nature/news-and-comment')); +router.get('/nature/news', require('./routes/nature/news')); +router.get('/nature/highlight', require('./routes/nature/highlight')); // science [Sci Journal] -router.get('/sciencemag/science/current', require('./routes/sciencemag/science/current')); -router.get('/sciencemag/science/early', require('./routes/sciencemag/science/early')); -// science 子刊 [Sci Journal] -router.get('/sciencemag/advances/current', require('./routes/sciencemag/advances/current')); +router.get('/sciencemag/current/:journal?', require('./routes/sciencemag/current')); +router.get('/sciencemag/early/science', require('./routes/sciencemag/early')); // dlsite router.get('/dlsite/new/:type', require('./routes/dlsite/new')); diff --git a/lib/routes/nature/nature/highlight.js b/lib/routes/nature/highlight.js similarity index 100% rename from lib/routes/nature/nature/highlight.js rename to lib/routes/nature/highlight.js diff --git a/lib/routes/nature/natmachintell/research.js b/lib/routes/nature/natmachintell/research.js deleted file mode 100644 index b09783c1cc..0000000000 --- a/lib/routes/nature/natmachintell/research.js +++ /dev/null @@ -1,44 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); -const url = require('url'); - -const host = 'https://www.nature.com'; -const link = 'https://www.nature.com/natmachintell/research'; - -module.exports = async (ctx) => { - const responses = await got.get(link); - const $ = cheerio.load(responses.data); - - const list = $('article').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 a').text(); - const itemUrl = url.resolve(host, $('h3 a').attr('href')); - - const cache = await ctx.cache.get(itemUrl); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - - const responses = await got.get(itemUrl); - const $d = cheerio.load(responses.data); - const description = $d('div.c-article-body').html(); - - const single = { - title, - link: itemUrl, - description, - }; - - ctx.cache.set(itemUrl, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: 'nature > nature machine intelligence > latest research', - link: link, - item: out, - }; -}; diff --git a/lib/routes/nature/nature/research.js b/lib/routes/nature/nature/research.js deleted file mode 100644 index d50f03904a..0000000000 --- a/lib/routes/nature/nature/research.js +++ /dev/null @@ -1,75 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://www.nature.com`; - const url = `${base}/nature/research`; - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.border-bottom-1.pb20').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 > a').text(); - const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; - const brief = $('.hide-overflow.inline').text(); - const time = $('time').text(); - let author; - if ($('.js-list-authors-3 li').length > 3) { - author = - $('.js-list-authors-3 li') - .slice(0, 1) - .text() + ' et al.'; - } else { - author = $('.js-list-authors-3 li').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - const abs = capture('div#Abs1-content.c-article-section__content > p').html(); - - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - let absContents = ''; - if (abs !== null) { - absContents = ` -
-

Abstract

-

${abs}

-
- `; - } - const contents = briefContents + absContents; - - const single = { - title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: new Date(time).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Nature | Latest Research`, - description: `Nature, a nature research journal`, - link: url, - item: out, - }; -}; diff --git a/lib/routes/nature/nbt/research.js b/lib/routes/nature/nbt/research.js deleted file mode 100644 index 59cb6398d7..0000000000 --- a/lib/routes/nature/nbt/research.js +++ /dev/null @@ -1,75 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://www.nature.com`; - const url = `${base}/nbt/research`; - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.border-bottom-1.pb20').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 > a').text(); - const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; - const brief = $('.hide-overflow.inline').text(); - const time = $('time').text(); - let author; - if ($('.js-list-authors-3 li').length > 3) { - author = - $('.js-list-authors-3 li') - .slice(0, 1) - .text() + ' et al.'; - } else { - author = $('.js-list-authors-3 li').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - const abs = capture('div#Abs1-content.c-article-section__content > p').html(); - - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - let absContents = ''; - if (abs !== null) { - absContents = ` -
-

Abstract

-

${abs}

-
- `; - } - const contents = briefContents + absContents; - - const single = { - title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: new Date(time).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Nature Biotechnology | Latest Research`, - description: `Nature Biotechnology, a nature research journal`, - link: url, - item: out, - }; -}; diff --git a/lib/routes/nature/neuro/research.js b/lib/routes/nature/neuro/research.js deleted file mode 100644 index 8daca578e1..0000000000 --- a/lib/routes/nature/neuro/research.js +++ /dev/null @@ -1,75 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://www.nature.com`; - const url = `${base}/neuro/research`; - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.border-bottom-1.pb20').get(); - - const out = await Promise.all( - list.slice(0, 3).map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 > a').text(); - const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; - const brief = $('.hide-overflow.inline').text(); - const time = $('time').text(); - let author; - if ($('.js-list-authors-3 li').length > 3) { - author = - $('.js-list-authors-3 li') - .slice(0, 1) - .text() + ' et al.'; - } else { - author = $('.js-list-authors-3 li').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - const abs = capture('div#Abs1-content.c-article-section__content > p').html(); - - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - let absContents = ''; - if (abs !== null) { - absContents = ` -
-

Abstract

-

${abs}

-
- `; - } - const contents = briefContents + absContents; - - const single = { - title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: new Date(time).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Nature Neuroscience | Latest Research`, - description: `Nature Neuroscience, a nature research journal`, - link: url, - item: out, - }; -}; diff --git a/lib/routes/nature/news-and-comment.js b/lib/routes/nature/news-and-comment.js new file mode 100644 index 0000000000..e08c1eebd2 --- /dev/null +++ b/lib/routes/nature/news-and-comment.js @@ -0,0 +1,80 @@ +// example usage: `/nature/news-and-comment/ng` +// The journals from NPG are run by different group of people, +// and the website of may not be consitent for all the journals +// +// This router has **just** been tested in: +// nbt: Nature Biotechnology +// neuro: Nature Neuroscience +// ng: Nature Genetics +// ni: Nature Immunology +// nmeth: Nature Method +// nchem: Nature Chemistry +// nmat: Nature Materials +// natmachintell: Nature Machine Intelligence + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = `https://www.nature.com`; + + const journal = ctx.params.journal; + const pageURL = `${baseURL}/${journal}/news-and-comment`; + + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`; + const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`; + const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'News & Comment'; + + const list = pageCapture('.border-bottom-1.pb20').get(); + + const items = await Promise.all( + list.map(async (el) => { + const $ = cheerio.load(el); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${baseURL}${partial}`; + const brief = $('.hide-overflow.inline').text(); + const time = $('time').text(); + const author = $('.js-list-authors-3 li').text(); + const articleType = $('p > span').attr('data-class'); + const headerContents = ` +
+

+ ${articleType} + | + ${author} +

+
+ `; + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
+

Brief

+

${brief}

+
+ `; + } + const contents = headerContents + briefContents; + + const item = { + title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + return Promise.resolve(item); + }) + ); + ctx.state.data = { + title: `${pageTitleName} | ${pageTitleSub}`, + description: pageDescription, + link: pageURL, + item: items, + }; +}; diff --git a/lib/routes/nature/nature/news.js b/lib/routes/nature/news.js similarity index 100% rename from lib/routes/nature/nature/news.js rename to lib/routes/nature/news.js diff --git a/lib/routes/nature/ng/research.js b/lib/routes/nature/ng/research.js deleted file mode 100644 index c1d843dc8b..0000000000 --- a/lib/routes/nature/ng/research.js +++ /dev/null @@ -1,75 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://www.nature.com`; - const url = `${base}/ng/research`; - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.border-bottom-1.pb20').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 > a').text(); - const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; - const brief = $('.hide-overflow.inline').text(); - const time = $('time').text(); - let author; - if ($('.js-list-authors-3 li').length > 3) { - author = - $('.js-list-authors-3 li') - .slice(0, 1) - .text() + ' et al.'; - } else { - author = $('.js-list-authors-3 li').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - const abs = capture('div#Abs1-content.c-article-section__content > p').html(); - - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - let absContents = ''; - if (abs !== null) { - absContents = ` -
-

Abstract

-

${abs}

-
- `; - } - const contents = briefContents + absContents; - - const single = { - title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: new Date(time).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Nature Genetics | Latest Research`, - description: `Nature Genetics, a nature research journal`, - link: url, - item: out, - }; -}; diff --git a/lib/routes/nature/nmeth/research.js b/lib/routes/nature/nmeth/research.js deleted file mode 100644 index 8f49a97bbc..0000000000 --- a/lib/routes/nature/nmeth/research.js +++ /dev/null @@ -1,75 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://www.nature.com`; - const url = `${base}/nmeth/research`; - - const res = await got.get(url); - const $ = cheerio.load(res.data); - const list = $('.border-bottom-1.pb20').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - const title = $('h3 > a').text(); - const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; - const brief = $('.hide-overflow.inline').text(); - const time = $('time').text(); - let author; - if ($('.js-list-authors-3 li').length > 3) { - author = - $('.js-list-authors-3 li') - .slice(0, 1) - .text() + ' et al.'; - } else { - author = $('.js-list-authors-3 li').text(); - } - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - const res = await got.get(address); - const capture = cheerio.load(res.data); - const abs = capture('div#Abs1-content.c-article-section__content > p').html(); - - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - let absContents = ''; - if (abs !== null) { - absContents = ` -
-

Abstract

-

${abs}

-
- `; - } - const contents = briefContents + absContents; - - const single = { - title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: new Date(time).toUTCString(), - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Nature Method | Latest Research`, - description: `Nature Method, a nature research journal`, - link: url, - item: out, - }; -}; diff --git a/lib/routes/nature/research.js b/lib/routes/nature/research.js new file mode 100644 index 0000000000..ba6a816427 --- /dev/null +++ b/lib/routes/nature/research.js @@ -0,0 +1,97 @@ +// example usage: `/nature/research/ng` +// The journals from NPG are run by different group of people, +// and the website of may not be consitent for all the journals +// +// This router has **just** been tested in: +// nature: Nature +// nbt: Nature Biotechnology +// neuro: Nature Neuroscience +// ng: Nature Genetics +// ni: Nature Immunology +// nmeth: Nature Method +// nchem: Nature Chemistry +// nmat: Nature Materials +// natmachintell: Nature Machine Intelligence + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = `https://www.nature.com`; + + const journal = ctx.params.journal || 'nature'; + const pageURL = `${baseURL}/${journal}/research`; + + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`; + const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`; + const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'Latest Research'; + + const list = pageCapture('.border-bottom-1.pb20').get(); + + const items = await Promise.all( + list.slice(4, 6).map(async (el) => { + const $ = cheerio.load(el); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${baseURL}${partial}`; + const brief = $('.hide-overflow.inline').text(); + const time = $('time').text(); + let author; + if ($('.js-list-authors-3 li').length > 3) { + author = + $('.js-list-authors-3 li') + .slice(0, 1) + .text() + ' et al.'; + } else { + author = $('.js-list-authors-3 li').text(); + } + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const itemResponse = await got.get(address); + const itemCapture = cheerio.load(itemResponse.data); + const abs = itemCapture('div#Abs1-content.c-article-section__content > p').html(); + + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
+

Brief

+

${brief}

+
+ `; + } + let absContents = ''; + if (abs !== null) { + absContents = ` +
+

Abstract

+

${abs}

+
+ `; + } + const contents = briefContents + absContents; + + const item = { + title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(item)); + return Promise.resolve(item); + }) + ); + ctx.state.data = { + title: `${pageTitleName} | ${pageTitleSub}`, + description: pageDescription, + link: pageURL, + item: items, + }; +}; diff --git a/lib/routes/sciencemag/advances/current.js b/lib/routes/sciencemag/advances/current.js deleted file mode 100644 index 8ae093c916..0000000000 --- a/lib/routes/sciencemag/advances/current.js +++ /dev/null @@ -1,98 +0,0 @@ -const cheerio = require('cheerio'); -const got = require('@/utils/got'); - -module.exports = async (ctx) => { - const base = `https://advances.sciencemag.org`; - - const res = await got.get(base); - const pageCapture = cheerio.load(res.data); - - const list = pageCapture('ul > li > div > div > article > div').get(); - - const out = await Promise.all( - list.map(async (item) => { - const $ = cheerio.load(item); - - const title = $('h3').text(); - const partial = $('h3 > a').attr('href'); - // TODO: .full.txt is a way for getting text preview - const address = `${base}${partial}`; - - let author; - const authorList = $('span.highwire-citation-authors > span.highwire-citation-author') - .map(function(i, el) { - return $(el).text(); - }) - .get(); - if (authorList.length > 5) { - author = authorList.slice(0, 5).join(', ') + ' et al.'; - } else { - author = authorList.join(', '); - } - - const time = new Date($('p.highwire-cite-metadata > time').text()).toUTCString(); - - const cache = await ctx.cache.get(address); - if (cache) { - return Promise.resolve(JSON.parse(cache)); - } - - // contents - // brief content - const brief = $('div.highwire-cite-snippet > div > div > p').text(); - let briefContents = ''; - if (brief !== '') { - briefContents = ` -
-

Brief

-

${brief}

-
- `; - } - - const itemPage = await got.get(address); - const itemCapture = cheerio.load(itemPage.data); - // section and subject content - const section = itemCapture('header > div.overline > span.overline__section').text(); - const subject = itemCapture('header > div.overline > span.overline__subject').text(); - let sectionContents = ''; - if (section !== '' || subject !== '') { - sectionContents = ` -
- ${section} - ${subject} -
- `; - } - // abs content - const abs = itemCapture('div.section.abstract > p').text(); - let absContents = ''; - if (abs !== '') { - absContents = ` -
-

Abstract

- ${abs} -
- `; - } - const contents = sectionContents + briefContents + absContents; - - const single = { - title: title, - author: author, - description: contents, - link: address, - guid: address, - pubDate: time, - }; - ctx.cache.set(address, JSON.stringify(single)); - return Promise.resolve(single); - }) - ); - ctx.state.data = { - title: `Science | First Release`, - description: `Science, a research journal. For papers that published online.`, - link: base, - item: out, - }; -}; diff --git a/lib/routes/sciencemag/science/current.js b/lib/routes/sciencemag/current.js similarity index 62% rename from lib/routes/sciencemag/science/current.js rename to lib/routes/sciencemag/current.js index 466be0227c..b617607544 100644 --- a/lib/routes/sciencemag/science/current.js +++ b/lib/routes/sciencemag/current.js @@ -1,23 +1,41 @@ +// journals form AAAS publishing group +// +// science: Science +// advances: Science Advances +// immunology: Science Immunology +// robotics: Science Robotics +// stke: Science Signaling +// stm: Science Translational Medicine + const cheerio = require('cheerio'); const got = require('@/utils/got'); module.exports = async (ctx) => { - const base = `https://science.sciencemag.org`; + const journal = ctx.params.journal || 'science'; + const baseURL = `https://${journal}.sciencemag.org`; - const res = await got.get(base); - const pageCapture = cheerio.load(res.data); + const pageURL = baseURL; + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageTitleName = pageCapture('head > title').text() || `Science (${journal})`; // just select paper relative sections - const sectionList = ['issue-toc-section-research-articles', 'issue-toc-section-review', 'issue-toc-section-reports']; + const sectionList = ['research-articles', 'review', 'reports']; + const list = [].concat.apply( [], sectionList.map((section) => { - const sec = cheerio.load(pageCapture(`ul > li .issue-toc-section.${section}`).html()); - const sectionName = sec('h2').text(); - const sectionList = sec('ul > li > div > div > article > div') - .append(`
${sectionName}
`) - .get(); - return sectionList; + const sectionContent = pageCapture(`ul > li.issue-toc-section.issue-toc-section-${section}`).html(); + if (sectionContent !== null) { + const sec = cheerio.load(sectionContent); + const sectionName = sec('h2').text(); + const sectionList = sec('ul > li > div > div > article > div') + .append(`
${sectionName}
`) + .get(); + return sectionList; + } + return []; }) ); @@ -27,17 +45,15 @@ module.exports = async (ctx) => { const title = $('h3').text(); const partial = $('h3 > a').attr('href'); - const address = `${base}${partial}`; + const address = `${baseURL}${partial}`; const section = $('div .toc-section-type').text(); let author; const authorList = $('span.highwire-citation-authors > span.highwire-citation-author') - .map(function(i, el) { - return $(el).text(); - }) + .map((_, el) => $(el).text()) .get(); if (authorList.length > 5) { - author = authorList.slice(0, 5).join(', ') + ' et al.'; + author = authorList.slice(0, 5).join(', ') + ' et al.'; } else { author = authorList.join(', '); } @@ -71,10 +87,9 @@ module.exports = async (ctx) => { } const itemPage = await got.get(address); const itemCapture = cheerio.load(itemPage.data); - const abs = itemCapture('div > div.abstract-view > div.section') - .map(function(i, el) { - return $(el).html(); - }) + + const abs = itemCapture('div > div.article > div.section') + .map((_, el) => $(el).html()) .get() .join('
'); @@ -102,9 +117,9 @@ module.exports = async (ctx) => { }) ); ctx.state.data = { - title: `Science | Current Table of Contents`, - description: `Science, a research journal`, - link: base, + title: `${pageTitleName} | Current Issue`, + description: `Current Issue of ${pageTitleName}`, + link: baseURL, item: out, }; }; diff --git a/lib/routes/sciencemag/science/early.js b/lib/routes/sciencemag/early.js similarity index 99% rename from lib/routes/sciencemag/science/early.js rename to lib/routes/sciencemag/early.js index dd07c1a681..12524a5c28 100644 --- a/lib/routes/sciencemag/science/early.js +++ b/lib/routes/sciencemag/early.js @@ -1,3 +1,5 @@ +// only support Science journal + const cheerio = require('cheerio'); const got = require('@/utils/got');