diff --git a/docs/en/journal.md b/docs/en/journal.md index 08365b4cbb..f673cca9f1 100644 --- a/docs/en/journal.md +++ b/docs/en/journal.md @@ -6,7 +6,7 @@ pageClass: routes ## Cell Journal - + | `:category` | Query Type | Route | | :---------: | :---------------------: | ---------------------------------------- | @@ -19,7 +19,7 @@ pageClass: routes ### Latest Research - Research by Subject - + ## Google Scholar @@ -51,19 +51,19 @@ The parameter id in the route is the id in the URL of the user ’s Google Schol ### Latest Research - + -| `:journal` | Full Name of the Journal | Route | -| :-----------: | :-------------------------: | ---------------------------------------------------------------- | -| nature | Nature | [/nature/research/nature](/nature/research/nature) | -| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) | -| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) | -| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) | -| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) | -| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) | -| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) | -| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) | -| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) | +| `:journal` | Full Name of the Journal | Route | +| :-----------: | :-------------------------: | ---------------------------------------------------------------------------------- | +| nature | Nature | [/nature/research/nature](https://rsshub.app/nature/research/nature) | +| nbt | Nature Biotechnology | [/nature/research/nbt](https://rsshub.app/nature/research/nbt) | +| neuro | Nature Neuroscience | [/nature/research/neuro](https://rsshub.app/nature/research/neuro) | +| ng | Nature Genetics | [/nature/research/ng](https://rsshub.app/nature/research/ng) | +| ni | Nature Immunology | [/nature/research/ni](https://rsshub.app/nature/research/ni) | +| nmeth | Nature Method | [/nature/research/nmeth](https://rsshub.app/nature/research/nmeth) | +| nchem | Nature Chemistry | [/nature/research/nchem](https://rsshub.app/nature/research/nchem) | +| nmat | Nature Materials | [/nature/research/nmat](https://rsshub.app/nature/research/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](https://rsshub.app/nature/research/natmachintell) | - Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group. If the `:journal` parameter is blank, then latest research of Nature will return. @@ -74,67 +74,94 @@ The parameter id in the route is the id in the URL of the user ’s Google Schol ### News & Comment - + -| `:journal` | Full Name of the Journal | Route | -| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- | -| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) | -| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) | -| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) | -| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) | -| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) | -| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) | -| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) | -| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) | +| `:journal` | Full Name of the Journal | Route | +| :-----------: | :-------------------------: | --------------------------------------------------------------------------------------------------------------------- | +| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](https://rsshub.app/nature/news-and-comment/nbt) | +| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](https://rsshub.app/nature/news-and-comment/neuro) | +| ng | Nature Genetics | [/nature/news-and-comment/ng](https://rsshub.app/nature/news-and-comment/ng) | +| ni | Nature Immunology | [/nature/news-and-comment/ni](https://rsshub.app/nature/news-and-comment/ni) | +| nmeth | Nature Method | [/nature/news-and-comment/nmeth](https://rsshub.app/nature/news-and-comment/nmeth) | +| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](https://rsshub.app/nature/news-and-comment/nchem) | +| nmat | Nature Materials | [/nature/news-and-comment/nmat](https://rsshub.app/nature/news-and-comment/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/news-and-https://rsshub.app/comment/natmachintell](https://rsshub.app/nature/news-and-comment/natmachintell) | - Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group. - The journals from NPG are run by different group of people, and the website of may not be consitent for all the journals +### Cover Story + + + +Subscribe to the cover images of the Nature journals, and get the latest publication updates in time. + + + ### News - + ### Research Highlight - + ## Proceedings of The National Academy of Sciences (PNAS) ### Latest Articles - Articles by Topic - +### Proceedings of The National Academy of Sciences (PNAS) - Latest Articles + + + +- Using router (`/pnas/` + Topic of Interest) to query latest research paper for a certain topic from PNAS journal. + If the `:topic` parameter is blank, or equal to 'latest', then all the latest papers will return. + + + + ## PubMed ### Trending - + ## Science Journal ### Current Issue - + -| `:journal` | Full Name of the Journal | Route | -| :--------: | :----------------------------: | ---------------------------------------------------------------- | -| science | Science | [/sciencemag/current/science](/sciencemag/current/science) | -| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) | -| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) | -| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) | -| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) | -| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) | +| `:journal` | Full Name of the Journal | Route | +| :--------: | :----------------------------: | ---------------------------------------------------------------------------------- | +| science | Science | [/sciencemag/current/science](https://rsshub.app/sciencemag/current/science) | +| advances | Science Advances | [/sciencemag/current/advances](https://rsshub.app/sciencemag/current/advances) | +| immunology | Science Immunology | [/sciencemag/current/immunology](https://rsshub.app/sciencemag/current/immunology) | +| robotics | Science Robotics | [/sciencemag/current/robotics](https://rsshub.app/sciencemag/current/robotics) | +| stke | Science Signaling | [/sciencemag/current/stke](https://rsshub.app/sciencemag/current/stke) | +| stm | Science Translational Medicine | [/sciencemag/current/stm](https://rsshub.app/sciencemag/current/stm) | - Using router (`/sciencemag/current/` + “short name for a journal”) to query current issue of a journal form AAAS. leave the parameter blank(`/sciencemag/current`)to get update from Science. +### Cover Story + + + +Subscribe to the cover images of the Science journals, and get the latest publication updates in time. + +Including ‘Science’, 'Science Advances', 'Science Immunology', 'Science Robotics', 'Science Signaling' and 'Science Translational Medicine'. + + + ### First Release - + _only support Science Journal_ @@ -144,4 +171,4 @@ _only support Science Journal_ ### Journal - + diff --git a/docs/journal.md b/docs/journal.md index 2fbd75666b..017eea673a 100644 --- a/docs/journal.md +++ b/docs/journal.md @@ -21,25 +21,25 @@ pageClass: routes ### 最新成果 - + ## Nature 系列 ### 最新成果 - + -| `:journal` | 期刊名 | 路由 | -| :-----------: | :-------------------------: | ---------------------------------------------------------------- | -| nature | Nature | [/nature/research/nature](/nature/research/nature) | -| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) | -| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) | -| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) | -| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) | -| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) | -| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) | -| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) | -| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) | +| `:journal` | 期刊名 | 路由 | +| :-----------: | :-------------------------: | ---------------------------------------------------------------------------------- | +| nature | Nature | [/nature/research/nature](https://rsshub.app/nature/research/nature) | +| nbt | Nature Biotechnology | [/nature/research/nbt](https://rsshub.app/nature/research/nbt) | +| neuro | Nature Neuroscience | [/nature/research/neuro](https://rsshub.app/nature/research/neuro) | +| ng | Nature Genetics | [/nature/research/ng](https://rsshub.app/nature/research/ng) | +| ni | Nature Immunology | [/nature/research/ni](https://rsshub.app/nature/research/ni) | +| nmeth | Nature Method | [/nature/research/nmeth](https://rsshub.app/nature/research/nmeth) | +| nchem | Nature Chemistry | [/nature/research/nchem](https://rsshub.app/nature/research/nchem) | +| nmat | Nature Materials | [/nature/research/nmat](https://rsshub.app/nature/research/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](https://rsshub.app/nature/research/natmachintell) | - 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章(Latest Research)。 若参数置空(`/nature/research`),则默认获取主刊(Nature)的最新文章。 @@ -50,18 +50,18 @@ pageClass: routes ### 新闻及评论 - + -| `:journal` | 期刊名 | 路由 | -| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- | -| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) | -| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) | -| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) | -| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) | -| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) | -| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) | -| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) | -| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) | +| `:journal` | 期刊名 | 路由 | +| :-----------: | :-------------------------: | -------------------------------------------------------------------------------------------------- | +| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](https://rsshub.app/nature/news-and-comment/nbt) | +| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](https://rsshub.app/nature/news-and-comment/neuro) | +| ng | Nature Genetics | [/nature/news-and-comment/ng](https://rsshub.app/nature/news-and-comment/ng) | +| ni | Nature Immunology | [/nature/news-and-comment/ni](https://rsshub.app/nature/news-and-comment/ni) | +| nmeth | Nature Method | [/nature/news-and-comment/nmeth](https://rsshub.app/nature/news-and-comment/nmeth) | +| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](https://rsshub.app/nature/news-and-comment/nchem) | +| nmat | Nature Materials | [/nature/news-and-comment/nmat](https://rsshub.app/nature/news-and-comment/nmat) | +| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](https://rsshub.app/nature/news-and-comment/natmachintell) | - 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章(Latest Research)。 主刊由于格式不同,该 router 并未支持,采用 `/nature/news` 来获取新闻。 @@ -69,49 +69,72 @@ pageClass: routes +### 封面故事 + + + +订阅 Nature 系列杂志的封面图片,并及时获取刊物更新状态。 + + + ### 主刊 - 新闻动态 - + ### 主刊 - 精彩研究 - + ## PNAS -### 最新文章(根据领域分类) +### 最新文章(可筛选领域) - + + +- 通过 `/pnas/` + “领域名称”来获取对应“领域”的最新文章(Latest Research)。 + 若参数置空(`/pnas`)或为 latest(`/pnas/latest`),则默认获取全部文章。 + + ## PubMed ### 热门文章 - + ## Science 系列 ### 本期刊物 - + -| `:journal` | 期刊名 | 路由 | -| :--------: | :----------------------------: | ---------------------------------------------------------------- | -| science | Science | [/sciencemag/current/science](/sciencemag/current/science) | -| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) | -| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) | -| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) | -| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) | -| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) | +| `:journal` | 期刊名 | 路由 | +| :--------: | :----------------------------: | ---------------------------------------------------------------------------------- | +| science | Science | [/sciencemag/current/science](https://rsshub.app/sciencemag/current/science) | +| advances | Science Advances | [/sciencemag/current/advances](https://rsshub.app/sciencemag/current/advances) | +| immunology | Science Immunology | [/sciencemag/current/immunology](https://rsshub.app/sciencemag/current/immunology) | +| robotics | Science Robotics | [/sciencemag/current/robotics](https://rsshub.app/sciencemag/current/robotics) | +| stke | Science Signaling | [/sciencemag/current/stke](https://rsshub.app/sciencemag/current/stke) | +| stm | Science Translational Medicine | [/sciencemag/current/stm](https://rsshub.app/sciencemag/current/stm) | - 通过 `/sciencemag/current/` + “杂志简写”来获取对应杂志最新一期的文章(Current Issue)。 若参数置空(`/sciencemag/current`),则默认获取主刊(Science)的最新文章。 +### 封面故事 + + + +订阅 Science 系列杂志的封面图片,并及时获取刊物更新状态。 + +包含了: ‘Science’, 'Science Advances', 'Science Immunology', 'Science Robotics', 'Science Signaling' 和 'Science Translational Medicine'。 + + + ### 主刊-在线发表 - + _仅支持 Science 主刊_ @@ -121,7 +144,7 @@ _仅支持 Science 主刊_ ### 平台-期刊 - + ## 谷歌学术 diff --git a/lib/router.js b/lib/router.js index b3589994be..221da5b38b 100644 --- a/lib/router.js +++ b/lib/router.js @@ -1998,20 +1998,22 @@ router.get('/linkresearcher/:params', require('./routes/linkresearcher/index')); router.get('/elife/:tid', require('./routes/journals/elife/index')); // PNAS [Sci Journal] -router.get('/pnas/:tid', require('./routes/journals/pnas/index')); +router.get('/pnas/:topic?', require('./routes/pnas/index')); // cell [Sci Journal] router.get('/cell/cell/:category', require('./routes/journals/cell/cell/index')); // nature + nature 子刊 [Sci Journal] -router.get('/nature/research/:journal?', require('./routes/journals/nature/research')); -router.get('/nature/news-and-comment/:journal?', require('./routes/journals/nature/news-and-comment')); -router.get('/nature/news', require('./routes/journals/nature/news')); -router.get('/nature/highlight', require('./routes/journals/nature/highlight')); +router.get('/nature/research/:journal?', require('./routes/nature/research')); +router.get('/nature/news-and-comment/:journal?', require('./routes/nature/news-and-comment')); +router.get('/nature/cover', require('./routes/nature/cover')); +router.get('/nature/news', require('./routes/nature/news')); +router.get('/nature/highlight', require('./routes/nature/highlight')); // science [Sci Journal] -router.get('/sciencemag/current/:journal?', require('./routes/journals/sciencemag/current')); -router.get('/sciencemag/early/science', require('./routes/journals/sciencemag/early')); +router.get('/sciencemag/current/:journal?', require('./routes/sciencemag/current')); +router.get('/sciencemag/cover', require('./routes/sciencemag/cover')); +router.get('/sciencemag/early/science', require('./routes/sciencemag/early')); // dlsite router.get('/dlsite/new/:type', require('./routes/dlsite/new')); diff --git a/lib/routes/cell/cell/index.js b/lib/routes/cell/cell/index.js new file mode 100644 index 0000000000..4b4c7042ca --- /dev/null +++ b/lib/routes/cell/cell/index.js @@ -0,0 +1,123 @@ +// cell.com is extremely slow, and redirect too many times. +// Thus, the content page are replaced by www.sciencedirect.com. + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = 'https://www.cell.com'; + const category = ctx.params.category; + let pageURL = `${baseURL}/cell/current.rss`; + let categoryTitle = 'Latest issue'; + if (category === 'inpress') { + pageURL = `${baseURL}/cell/inpress.rss`; + categoryTitle = 'Articles in press'; + } + + const alternativeURL = 'https://www.sciencedirect.com/science/article/pii/'; + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const list = pageCapture('item') + .get() + .filter(function(item) { + const $ = cheerio.load(item); + const section = $('prism\\:section').text(); + + return ['Article', 'Resource'].includes(section); + }); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + const section = $('prism\\:section').text(); + const address = + alternativeURL + + $('item') + .attr('rdf:about') + .replace('?rss=yes', '') + .split('/') + .pop(); + const title = $('dc\\:title').text(); + const author = $('dc\\:creator').text(); + const pubDate = new Date($('dc\\:date').text()).toUTCString(); + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const itemPage = await got.get(address); + const itemCapture = cheerio.load(itemPage.data); + // section + keywords content + const keywords = itemCapture('div.keywords-section > div.keyword') + .map(function(i, el) { + return $(el).text(); + }) + .get() + .join('; '); + const sectionContents = ` +
+ ${section} +
+

[${keywords}]

+
+ `; + // graphical content + const brief = $('description').text(); + const graphical = itemCapture('div.abstract.graphical') + .find('img') + .attr('src'); + let graphicalContents = ''; + if (graphical !== '') { + graphicalContents = ` +
+

Graphical Abstract

+
+
+ +
+
+ ${brief} +
`; + } + // highlight content + const highlight = itemCapture('div.abstract.author-highlights dl').html(); + let highlightContents = ''; + if (highlight !== '') { + highlightContents = ` +
+

Highlights

+ ${highlight} +
`; + } + // summary content + const summary = itemCapture('div.abstract.author p').html(); + let summaryContents = ''; + if (summary !== '') { + summaryContents = ` +
+

Summary

+ ${summary} +
`; + } + const contents = sectionContents + graphicalContents + highlightContents + summaryContents; + + const single = { + title: title, + author: author, + description: contents, + link: address, + guid: address, + doi: $('dc\\:identifier').text(), + pubDate: pubDate, + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `Cell | ${categoryTitle}`, + description: `Cell, a research journal`, + link: baseURL, + item: out, + }; +}; diff --git a/lib/routes/elife/index.js b/lib/routes/elife/index.js new file mode 100644 index 0000000000..f5b79e6489 --- /dev/null +++ b/lib/routes/elife/index.js @@ -0,0 +1,69 @@ +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseUrl = `https://elifesciences.org`; + const tid = ctx.params.tid; + + let url = baseUrl; + if (tid !== 'latest') { + url = `${baseUrl}/subjects/${ctx.params.tid}`; + } + + const res = await got.get(url); + const $ = cheerio.load(res.data); + $('.grid-secondary-column').remove(); + + let topic = 'latest'; + if (tid !== 'latest') { + topic = $('h1.content-header__title.content-header__title--xx-short').text(); + } + + const list = $('li.listing-list__item').get(); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + const title = $('a.teaser__header_text_link').text(); + const partial = $('a.teaser__header_text_link').attr('href'); + const address = `${baseUrl}${partial}`; + const time = $('time').text(); + const description = $('div.teaser__body').text(); + const author = $('div.teaser__secondary_info') + .text() + .trim(); + + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const res = await got.get(address); + const capture = cheerio.load(res.data); + + const abstract = capture('#abstract .article-section__body').html(); + let contents; + if (description !== '') { + contents = `${description}

Abstract

${abstract}`; + } else { + contents = `
Abstract
${abstract}`; + } + + const single = { + title, + author: author, + description: contents, + link: address, + guid: address, + doi: capture('meta[name="dc.identifier"]')[0].attribs.content, + pubDate: new Date(time).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `eLife | ${topic}`, + link: url, + item: out, + }; +}; diff --git a/lib/routes/journals/nature/news.js b/lib/routes/journals/nature/news.js index ab81becbc5..69ece554f0 100644 --- a/lib/routes/journals/nature/news.js +++ b/lib/routes/journals/nature/news.js @@ -10,7 +10,7 @@ module.exports = async (ctx) => { const list = $('.border-bottom-1.pb20').get(); const out = await Promise.all( - list.slice(0, 2).map(async (item) => { + list.map(async (item) => { const $ = cheerio.load(item); const title = $('h3 > a').text(); const partial = $('h3 > a').attr('href'); diff --git a/lib/routes/journals/nature/research.js b/lib/routes/journals/nature/research.js index ba6a816427..e8595e84e8 100644 --- a/lib/routes/journals/nature/research.js +++ b/lib/routes/journals/nature/research.js @@ -32,7 +32,7 @@ module.exports = async (ctx) => { const list = pageCapture('.border-bottom-1.pb20').get(); const items = await Promise.all( - list.slice(4, 6).map(async (el) => { + list.map(async (el) => { const $ = cheerio.load(el); const title = $('h3 > a').text(); const partial = $('h3 > a').attr('href'); @@ -54,27 +54,117 @@ module.exports = async (ctx) => { } const itemResponse = await got.get(address); const itemCapture = cheerio.load(itemResponse.data); - const abs = itemCapture('div#Abs1-content.c-article-section__content > p').html(); + // Brief [obtain from entry of each item] let briefContents = ''; if (brief !== '') { briefContents = ` -
-

Brief

-

${brief}

+
+

Brief

+

${brief}

`; } + // Abstract [obtain form address of each page] + const abs = itemCapture('div#Abs1-content.c-article-section__content > p').html(); let absContents = ''; if (abs !== null) { absContents = ` -
-

Abstract

-

${abs}

+
+

Abstract

+

${abs}

`; } - const contents = briefContents + absContents; + // Info [obtain form address of each page] + const subject = itemCapture('li.c-article-subject-list__subject > a') + .map(function() { + const link = $(this).attr('href'); + const name = $(this).text(); + if (name !== '') { + return `
  • ${name}
  • `; + } else { + return ''; + } + }) + .get() + .join(''); + const subjectContents = subject !== '' ? `
      ${subject}
    ` : ''; + const citation = itemCapture('p.c-bibliographic-information__download-citation > a').attr('href'); + const citationContents = citation !== undefined ? `Download citation` : ''; + const doi = itemCapture('meta[name="DOI"]').attr('content'); + const doiContents = doi !== undefined ? `DOI: ${doi}` : ''; + const pdf = itemCapture('meta[name="citation_pdf_url"]').attr('content'); + const pdfContents = pdf !== undefined ? `Offical PDF` : ''; + const linkContents = '
      ' + [citationContents, doiContents, pdfContents].filter((x) => x !== '').map((x) => `
    • ${x}
    • `) + '
    '; + const infoContents = ` +
    +

    About this article

    +

    Subjects:

    + ${subjectContents} +
    +

    Links:

    + ${linkContents} +
    + `; + // Add style + const contentStyle = ` + + `; + const contents = briefContents + absContents + infoContents + contentStyle; const item = { title, diff --git a/lib/routes/nature/cover.js b/lib/routes/nature/cover.js new file mode 100644 index 0000000000..c3f6aed7c1 --- /dev/null +++ b/lib/routes/nature/cover.js @@ -0,0 +1,90 @@ +// The content is generateed by undocumentated API of nature journals +// This router has **just** been tested in: +// nature: Nature +// nbt: Nature Biotechnology +// neuro: Nature Neuroscience +// ng: Nature Genetics +// ni: Nature Immunology +// nmeth: Nature Method +// nchem: Nature Chemistry +// nmat: Nature Materials +// natmachintell: Nature Machine Intelligence +// ncb: Nature Cell Biology +// nplants: Nature Plants +// natastron: Nature Astronomy +// nphys Nature Physics + +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = 'https://www.nature.com'; + const journalMap = new Map([ + ['nature', { id: '41586', name: 'Nature' }], + ['nbt', { id: '41587', name: 'Nature Biotechnology' }], + ['neuro', { id: '41593', name: 'Nature Neuroscience' }], + ['ng', { id: '41588', name: 'Nature Genetics' }], + ['ni', { id: '41590', name: 'Nature Immunology' }], + ['nmeth', { id: '41592', name: 'Nature Methods' }], + ['nchem', { id: '41590', name: 'Nature Chemistry' }], + ['nmat', { id: '41563', name: 'Nature Material' }], + ['natmachintell', { id: '42256', name: 'Nature Machine Intelligence' }], + ['ncb', { id: '41556', name: 'Nature Cell Biology' }], + ['nplants', { id: '41477', name: 'Nature Plants' }], + ['natastron', { id: '41550', name: 'Nature Astronomy' }], + ['nphys', { id: '41567', name: 'Nature Physics' }], + ]); + const journals = [...journalMap.keys()]; + const out = await Promise.all( + journals.map(async (journal) => { + // get the lastest volumn and issue id + const pageURL = `${baseURL}/${journal}/current-issue`; + const cookieData = await got + .extend({ + prefixUrl: 'https://idp.nature.com/authorize', + followRedirect: false, + }) + .get(`?response_type=cookie&client_id=grover&redirect_uri=${encodeURI(pageURL)}`) + .then((response) => response.headers['set-cookie'].join(' ')); + const issueURL = await got + .extend({ + prefixUrl: pageURL, + headers: { + cookie: cookieData, + }, + followRedirect: false, + }) + .get('') + .then((response) => response.headers.location); + const capturingRegex = /volumes\/(?\d+)\/issues\/(?\d+)/; + const { volumes, issues } = issueURL.match(capturingRegex).groups; + + const address = `${baseURL}${issueURL}`; + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const imgSize = 600; + + const id = journalMap.get(journal).id; + const imageURL = `https://media.springernature.com/w${imgSize}/springer-static/cover-hires/journal/${id}/${volumes}/${issues}?as=webp`; + const contents = `
    Volume ${volumes} Issue ${issues}
    `; + + const single = { + title: `${journalMap.get(journal).name} | Volume ${volumes} Issue ${issues}`, + author: '@yech1990', + description: contents, + link: address, + guid: address, + pubDate: new Date().toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: 'Nature Covers Story', + description: 'Find out the cover story of some Nature journals.', + link: baseURL, + item: out, + }; +}; diff --git a/lib/routes/nature/highlight.js b/lib/routes/nature/highlight.js new file mode 100644 index 0000000000..35c5335aa3 --- /dev/null +++ b/lib/routes/nature/highlight.js @@ -0,0 +1,60 @@ +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const base = `https://www.nature.com`; + const url = `${base}/nature/articles?type=research-highlight`; + + const res = await got.get(url); + const $ = cheerio.load(res.data); + const list = $('.border-bottom-1.pb20').get(); + + const out = await Promise.all( + list.slice(0, 2).map(async (item) => { + const $ = cheerio.load(item); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${base}${partial}`; + const time = $('time').text(); + let author; + if ($('.js-list-authors-3 li').length > 3) { + author = + $('.js-list-authors-3 li') + .slice(0, 1) + .text() + ' et al.'; + } else { + author = $('.js-list-authors-3 li').text(); + } + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const res = await got.get(address); + const capture = cheerio.load(res.data); + let figure = capture('figure .figure--bleed').html(); + if (figure === null) { + figure = ''; + } + let contents = capture('.article-item--open .article-item__body').html(); + if (contents === null) { + contents = ''; + } + const single = { + title, + author: author, + description: figure + contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `Nature | Research Highlight`, + description: `Nature, a nature research journal`, + link: url, + item: out, + }; +}; diff --git a/lib/routes/nature/news-and-comment.js b/lib/routes/nature/news-and-comment.js new file mode 100644 index 0000000000..e08c1eebd2 --- /dev/null +++ b/lib/routes/nature/news-and-comment.js @@ -0,0 +1,80 @@ +// example usage: `/nature/news-and-comment/ng` +// The journals from NPG are run by different group of people, +// and the website of may not be consitent for all the journals +// +// This router has **just** been tested in: +// nbt: Nature Biotechnology +// neuro: Nature Neuroscience +// ng: Nature Genetics +// ni: Nature Immunology +// nmeth: Nature Method +// nchem: Nature Chemistry +// nmat: Nature Materials +// natmachintell: Nature Machine Intelligence + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = `https://www.nature.com`; + + const journal = ctx.params.journal; + const pageURL = `${baseURL}/${journal}/news-and-comment`; + + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`; + const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`; + const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'News & Comment'; + + const list = pageCapture('.border-bottom-1.pb20').get(); + + const items = await Promise.all( + list.map(async (el) => { + const $ = cheerio.load(el); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${baseURL}${partial}`; + const brief = $('.hide-overflow.inline').text(); + const time = $('time').text(); + const author = $('.js-list-authors-3 li').text(); + const articleType = $('p > span').attr('data-class'); + const headerContents = ` +
    +

    + ${articleType} + | + ${author} +

    +
    + `; + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
    +

    Brief

    +

    ${brief}

    +
    + `; + } + const contents = headerContents + briefContents; + + const item = { + title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + return Promise.resolve(item); + }) + ); + ctx.state.data = { + title: `${pageTitleName} | ${pageTitleSub}`, + description: pageDescription, + link: pageURL, + item: items, + }; +}; diff --git a/lib/routes/nature/news.js b/lib/routes/nature/news.js new file mode 100644 index 0000000000..69ece554f0 --- /dev/null +++ b/lib/routes/nature/news.js @@ -0,0 +1,54 @@ +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const base = `https://www.nature.com`; + const url = `${base}/nature/articles?type=news`; + + const res = await got.get(url); + const $ = cheerio.load(res.data); + const list = $('.border-bottom-1.pb20').get(); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${base}${partial}`; + const time = $('time').text(); + let author; + if ($('.js-list-authors-3 li').length > 3) { + author = + $('.js-list-authors-3 li') + .slice(0, 1) + .text() + ' et al.'; + } else { + author = $('.js-list-authors-3 li').text(); + } + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const res = await got.get(address); + const capture = cheerio.load(res.data); + const contents = capture('.content .article__body').html(); + + const single = { + title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `Nature | Latest News`, + description: `Nature, a nature research journal`, + link: url, + item: out, + }; +}; diff --git a/lib/routes/nature/research.js b/lib/routes/nature/research.js new file mode 100644 index 0000000000..e8595e84e8 --- /dev/null +++ b/lib/routes/nature/research.js @@ -0,0 +1,187 @@ +// example usage: `/nature/research/ng` +// The journals from NPG are run by different group of people, +// and the website of may not be consitent for all the journals +// +// This router has **just** been tested in: +// nature: Nature +// nbt: Nature Biotechnology +// neuro: Nature Neuroscience +// ng: Nature Genetics +// ni: Nature Immunology +// nmeth: Nature Method +// nchem: Nature Chemistry +// nmat: Nature Materials +// natmachintell: Nature Machine Intelligence + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseURL = `https://www.nature.com`; + + const journal = ctx.params.journal || 'nature'; + const pageURL = `${baseURL}/${journal}/research`; + + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`; + const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`; + const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'Latest Research'; + + const list = pageCapture('.border-bottom-1.pb20').get(); + + const items = await Promise.all( + list.map(async (el) => { + const $ = cheerio.load(el); + const title = $('h3 > a').text(); + const partial = $('h3 > a').attr('href'); + const address = `${baseURL}${partial}`; + const brief = $('.hide-overflow.inline').text(); + const time = $('time').text(); + let author; + if ($('.js-list-authors-3 li').length > 3) { + author = + $('.js-list-authors-3 li') + .slice(0, 1) + .text() + ' et al.'; + } else { + author = $('.js-list-authors-3 li').text(); + } + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const itemResponse = await got.get(address); + const itemCapture = cheerio.load(itemResponse.data); + + // Brief [obtain from entry of each item] + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
    +

    Brief

    +

    ${brief}

    +
    + `; + } + // Abstract [obtain form address of each page] + const abs = itemCapture('div#Abs1-content.c-article-section__content > p').html(); + let absContents = ''; + if (abs !== null) { + absContents = ` +
    +

    Abstract

    +

    ${abs}

    +
    + `; + } + // Info [obtain form address of each page] + const subject = itemCapture('li.c-article-subject-list__subject > a') + .map(function() { + const link = $(this).attr('href'); + const name = $(this).text(); + if (name !== '') { + return `
  • ${name}
  • `; + } else { + return ''; + } + }) + .get() + .join(''); + const subjectContents = subject !== '' ? `
      ${subject}
    ` : ''; + const citation = itemCapture('p.c-bibliographic-information__download-citation > a').attr('href'); + const citationContents = citation !== undefined ? `Download citation` : ''; + const doi = itemCapture('meta[name="DOI"]').attr('content'); + const doiContents = doi !== undefined ? `DOI: ${doi}` : ''; + const pdf = itemCapture('meta[name="citation_pdf_url"]').attr('content'); + const pdfContents = pdf !== undefined ? `Offical PDF` : ''; + const linkContents = '
      ' + [citationContents, doiContents, pdfContents].filter((x) => x !== '').map((x) => `
    • ${x}
    • `) + '
    '; + const infoContents = ` +
    +

    About this article

    +

    Subjects:

    + ${subjectContents} +
    +

    Links:

    + ${linkContents} +
    + `; + // Add style + const contentStyle = ` + + `; + const contents = briefContents + absContents + infoContents + contentStyle; + + const item = { + title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: new Date(time).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(item)); + return Promise.resolve(item); + }) + ); + ctx.state.data = { + title: `${pageTitleName} | ${pageTitleSub}`, + description: pageDescription, + link: pageURL, + item: items, + }; +}; diff --git a/lib/routes/pnas/index.js b/lib/routes/pnas/index.js new file mode 100644 index 0000000000..cddb3ae781 --- /dev/null +++ b/lib/routes/pnas/index.js @@ -0,0 +1,66 @@ +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const baseUrl = `https://www.pnas.org`; + + const tid = ctx.params.tid; + + let url = `${baseUrl}/content/early/recent`; + if (tid !== 'latest') { + url = `${baseUrl}/content/by/section/${ctx.params.tid}`; + } else { + ctx.params.tid = 'Latest Research'; + } + + const res = await got.get(url); + const $ = cheerio.load(res.data); + const list = $('.highwire-citation-pnas-list-complete').get(); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + const title = $('.highwire-cite-title').text(); + const partial = $('.highwire-cite-linked-title').attr('href'); + const address = `${baseUrl}${partial}`; + let author; + if ($('.highwire-citation-authors span').length > 3) { + author = $('.highwire-citation-author.first').text() + ' et al.'; + } else { + author = $('.highwire-citation-authors span').text(); + } + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + const res = await got.get(address); + const capture = cheerio.load(res.data); + + const significance = capture('.executive-summary').html(); + const abstract = capture('.section.abstract').html(); + let contents; + if (abstract !== null) { + contents = significance + abstract; + } else { + contents = significance; + } + + const single = { + title, + author: author, + description: contents, + link: address, + guid: address, + doi: capture('meta[name="DC.Identifier"]')[0].attribs.content, + pubDate: new Date(capture('meta[name="DC.Date"]')[0].attribs.content).toUTCString(), + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `PNAS | ${ctx.params.tid}`, + link: url, + item: out, + }; +}; diff --git a/lib/routes/pubmed/trending.js b/lib/routes/pubmed/trending.js new file mode 100644 index 0000000000..e777af3165 --- /dev/null +++ b/lib/routes/pubmed/trending.js @@ -0,0 +1,73 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const url = require('url'); +const date = require('@/utils/date'); + +const base = 'https://www.ncbi.nlm.nih.gov'; + +module.exports = async (ctx) => { + const link = `${base}/pubmed/trending/`; + const response = await got.get(encodeURI(link)); + const pageCapture = cheerio.load(response.data); + + const list = pageCapture('.content div.rprt > div.rslt').get(); + const out = await Promise.all( + list.map(async (elem) => { + const $ = cheerio.load(elem); + const title = $('p > a').text(); + const partial = $('p > a').attr('href'); + const address = url.resolve(base, partial); + const author = $('div.supp > p.desc').text(); + const pubDate = date( + $('div.supp > p.details') + .text() + .split('. ')[1] + ); + + const item = { + title, + author, + pubDate, + link: encodeURI(address), + }; + + const value = await ctx.cache.get(address); + if (value) { + item.description = value; + } else { + const detail = await got.get(item.link); + const detailCapture = cheerio.load(detail.data); + + let authorContents = ''; + if (author !== '') { + authorContents = ` +
    + ${author} +
    + `; + } + const abs = detailCapture('div.abstr > div').html(); + let absContents = ''; + if (abs !== null) { + absContents = ` +
    +

    Abstract

    +

    ${abs}

    +
    + `; + } + item.description = authorContents + absContents; + ctx.cache.set(address, item.description); + } + + return Promise.resolve(item); + }) + ); + + ctx.state.data = { + title: 'PubMed | Trending Articles', + description: 'Trending Articles from PubMed Website', + link: link, + item: out, + }; +}; diff --git a/lib/routes/sciencemag/cover.js b/lib/routes/sciencemag/cover.js new file mode 100644 index 0000000000..f598a6400f --- /dev/null +++ b/lib/routes/sciencemag/cover.js @@ -0,0 +1,58 @@ +// journals form AAAS publishing group +// +// science: Science +// advances: Science Advances +// immunology: Science Immunology +// robotics: Science Robotics +// stke: Science Signaling +// stm: Science Translational Medicine + +const got = require('@/utils/got'); +const cheerio = require('cheerio'); + +module.exports = async (ctx) => { + const pageURL = 'https://www.sciencemag.org/journals'; + + const pageResponse = await got.get(pageURL); + const $ = cheerio.load(pageResponse.data); + + const items = $('section.journal-landing') + .map((_, elem) => { + // eg, www.sciencemag.org/sites/default/files/styles/240x300__4_3_/public/highwire/covers/scitransmed/12/532/-F1.medium.gif?itok=aOwRMF-8 + const { id, vol, no } = $('div.media__icon > a > img', elem) + .attr('src') + .match(/\/covers\/(?\w+)\/(?\d+)\/(?\d+)\//).groups; + const url = $('div.media__body > ul > li > a', elem) + .filter(function(_, el) { + return $(el).text() === 'Current Issue'; + }) + .attr('href') + .replace(/^http:/, 'https:') + .replace(/\/$/, ''); + const name = $('div.media__body > h2', elem).text(); + const date = $('div.media__body > p > span', elem).text(); + + // eg, https://advances.sciencemag.org/content/6/8 + const address = `${url}/content/${vol}/${no}`; + // eg, https://immunology.sciencemag.org/content/immunology/5/44/F1.medium.gif + const imageURL = `${url}/content/${id}/${vol}/${no}/F1.medium.gif`; + const contents = `
    Vol. ${vol} No. ${no}
    `; + + return { + title: `${name} | Vol. ${vol} No. ${no}`, + author: '@yech1990', + description: contents, + link: address, + guid: address, + pubDate: new Date(date).toUTCString(), + }; + }) + .get(); + + ctx.state.data = { + title: 'Science Covers Story', + description: 'Find out the cover story of some Science journals.', + link: pageURL, + item: items, + }; +}; diff --git a/lib/routes/sciencemag/current.js b/lib/routes/sciencemag/current.js new file mode 100644 index 0000000000..b617607544 --- /dev/null +++ b/lib/routes/sciencemag/current.js @@ -0,0 +1,125 @@ +// journals form AAAS publishing group +// +// science: Science +// advances: Science Advances +// immunology: Science Immunology +// robotics: Science Robotics +// stke: Science Signaling +// stm: Science Translational Medicine + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const journal = ctx.params.journal || 'science'; + const baseURL = `https://${journal}.sciencemag.org`; + + const pageURL = baseURL; + const pageResponse = await got.get(pageURL); + const pageCapture = cheerio.load(pageResponse.data); + + const pageTitleName = pageCapture('head > title').text() || `Science (${journal})`; + + // just select paper relative sections + const sectionList = ['research-articles', 'review', 'reports']; + + const list = [].concat.apply( + [], + sectionList.map((section) => { + const sectionContent = pageCapture(`ul > li.issue-toc-section.issue-toc-section-${section}`).html(); + if (sectionContent !== null) { + const sec = cheerio.load(sectionContent); + const sectionName = sec('h2').text(); + const sectionList = sec('ul > li > div > div > article > div') + .append(`
    ${sectionName}
    `) + .get(); + return sectionList; + } + return []; + }) + ); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + + const title = $('h3').text(); + const partial = $('h3 > a').attr('href'); + const address = `${baseURL}${partial}`; + const section = $('div .toc-section-type').text(); + + let author; + const authorList = $('span.highwire-citation-authors > span.highwire-citation-author') + .map((_, el) => $(el).text()) + .get(); + if (authorList.length > 5) { + author = authorList.slice(0, 5).join(', ') + ' et al.'; + } else { + author = authorList.join(', '); + } + + const time = new Date($('p.highwire-cite-metadata > time').text()).toUTCString(); + + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + + // contents + // section content + let sectionContents = ''; + if (section !== '') { + sectionContents = ` +
    + ${section} +
    + `; + } + // brief content + const brief = $('div.highwire-cite-snippet > div > div > p').text(); + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
    +

    ${brief}

    +
    + `; + } + const itemPage = await got.get(address); + const itemCapture = cheerio.load(itemPage.data); + + const abs = itemCapture('div > div.article > div.section') + .map((_, el) => $(el).html()) + .get() + .join('
    '); + + // abs content + let absContents = ''; + if (abs !== null) { + absContents = ` +
    + ${abs} +
    + `; + } + const contents = sectionContents + briefContents + absContents; + + const single = { + title: title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: time, + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `${pageTitleName} | Current Issue`, + description: `Current Issue of ${pageTitleName}`, + link: baseURL, + item: out, + }; +}; diff --git a/lib/routes/sciencemag/early.js b/lib/routes/sciencemag/early.js new file mode 100644 index 0000000000..12524a5c28 --- /dev/null +++ b/lib/routes/sciencemag/early.js @@ -0,0 +1,101 @@ +// only support Science journal + +const cheerio = require('cheerio'); +const got = require('@/utils/got'); + +module.exports = async (ctx) => { + const base = `https://science.sciencemag.org/content/early/recent`; + + const res = await got.get(base); + const pageCapture = cheerio.load(res.data); + + const list = pageCapture('ul > li > div > article > div').get(); + + const out = await Promise.all( + list.map(async (item) => { + const $ = cheerio.load(item); + + const title = $('h3').text(); + const partial = $('h3 > a').attr('href'); + const address = `${base}${partial}`; + + let author; + const authorList = $('span.highwire-citation-authors > span.highwire-citation-author') + .map(function(i, el) { + return $(el).text(); + }) + .get(); + if (authorList.length > 5) { + author = authorList.slice(0, 5).join(', ') + ' et al.'; + } else { + author = authorList.join(', '); + } + + const time = new Date($('p.highwire-cite-metadata > time').text()).toUTCString(); + + const cache = await ctx.cache.get(address); + if (cache) { + return Promise.resolve(JSON.parse(cache)); + } + + // contents + // brief content + const brief = $('div.highwire-cite-snippet > div > div > p').text(); + let briefContents = ''; + if (brief !== '') { + briefContents = ` +
    +

    ${brief}

    +
    + `; + } + + const itemPage = await got.get(address); + const itemCapture = cheerio.load(itemPage.data); + // section content + const section = itemCapture('header > div.overline').text(); + let sectionContents = ''; + if (section !== '') { + sectionContents = ` +
    + ${section} + [Published Online] +
    + `; + } + // abs content + const abs = itemCapture('div > div.abstract-view > div.section') + .map(function(i, el) { + return $(el).html(); + }) + .get() + .join('
    '); + let absContents = ''; + if (abs !== null) { + absContents = ` +
    + ${abs} +
    + `; + } + const contents = sectionContents + briefContents + absContents; + + const single = { + title: title, + author: author, + description: contents, + link: address, + guid: address, + pubDate: time, + }; + ctx.cache.set(address, JSON.stringify(single)); + return Promise.resolve(single); + }) + ); + ctx.state.data = { + title: `Science | First Release`, + description: `Science, a research journal. For papers that published online.`, + link: base, + item: out, + }; +}; diff --git a/lib/routes/x-mol/news.js b/lib/routes/x-mol/news.js new file mode 100644 index 0000000000..8e3b1799f4 --- /dev/null +++ b/lib/routes/x-mol/news.js @@ -0,0 +1,58 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const utils = require('./utils'); + +module.exports = async (ctx) => { + const tag = ctx.params.tag; + const path = tag ? `news/tag/${tag}` : 'news/index'; + const response = await got(path, { + method: 'GET', + prefixUrl: utils.host, + }); + const data = response.data; + const $ = cheerio.load(data); + + const title = $('title').text(); + const description = $('meta[name="description"]').attr('content'); + const newsitem = $('.newsitem'); + + const item = newsitem + .map((index, element) => { + const title = $(element) + .find('h3') + .find('a') + .text(); + const a = $(element) + .find('p') + .find('a'); + const link = utils.host + a.attr('href'); + const image = $(element) + .find('img') + .attr('src'); + const description = utils.setDesc(image, a.text()); + const span = $(element).find('.space-right-m30'); + const author = span + .text() + .replace('来源:', '') + .trim(); + const date = utils.getDate(span.next().text()); + const pubDate = utils.transDate(date); + + const single = { + title: title, + link: link, + description: description, + author: author, + pubDate: pubDate, + }; + return single; + }) + .get(); + + ctx.state.data = { + title: title, + link: response.url, + description: description, + item: item, + }; +}; diff --git a/lib/routes/x-mol/paper.js b/lib/routes/x-mol/paper.js new file mode 100644 index 0000000000..cf4f25570a --- /dev/null +++ b/lib/routes/x-mol/paper.js @@ -0,0 +1,79 @@ +const got = require('@/utils/got'); +const cheerio = require('cheerio'); +const utils = require('./utils'); +const queryString = require('query-string'); + +module.exports = async (ctx) => { + const type = ctx.params.type; + const magazine = ctx.params.magazine; + const path = `paper/${type}/${magazine}`; + const response = await got(path, { + method: 'GET', + prefixUrl: utils.host, + headers: { + Cookie: 'closeFloatWindow=true; journalIndexViewType=list; journalSort=publishDate', + }, + }); + const data = response.data; + const $ = cheerio.load(data); + + const title = $('title').text(); + const description = $('meta[name="description"]').attr('content'); + const newsitem = $('.magazine-text'); + + const item = await Promise.all( + newsitem + .map(async (index, element) => { + const news = $(element); + + const a = news.find('.magazine-text-title').find('a'); + const title = a.text(); + const link = utils.host + a.attr('href'); + + const picId = news.find('.magazine-pic').attr('id'); + const noPic = utils.host + '/css/images/nothesispic.jpg'; + let imageUrl = noPic; + if (picId) { + const imageId = picId.substring(9); + const getLink = utils.host + '/attachment/getImgUrl'; + imageUrl = + (await ctx.cache.tryGet(getLink, async () => { + const result = await got.get(getLink, { + searchParams: queryString.stringify({ + attachmentId: imageId, + }), + }); + return result.data; + })) || noPic; + } + const image = imageUrl; + const text = $(element) + .find('.magazine-description') + .text(); + const description = utils.setDesc(image, text); + + const span = news.find('.magazine-text-atten'); + const arr = span.map((index, element) => $(element).text()).get(); + const author = arr[1]; + const date = utils.getDate(arr[0]); + const pubDate = utils.transDate(date); + + const single = { + title: title, + link: link, + description: description, + author: author, + pubDate: pubDate, + }; + return Promise.resolve(single); + }) + .get() + ); + + ctx.state.data = { + title: title, + link: response.url, + description: description, + item: item, + }; +}; diff --git a/lib/routes/x-mol/utils.js b/lib/routes/x-mol/utils.js new file mode 100644 index 0000000000..dbe61a473b --- /dev/null +++ b/lib/routes/x-mol/utils.js @@ -0,0 +1,16 @@ +const XmolUtils = { + host: 'https://www.x-mol.com', + transDate: (date) => new Date(`${date} GMT+0800`).toUTCString(), + getDate: (text) => { + const reg = /[1-9]\d{3}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1])/; + if (typeof text === 'string') { + const arr = text.match(reg); + return arr && text.match(reg)[0]; + } else { + return null; + } + }, + setDesc: (image, text) => `

    ${text}

    `, +}; + +module.exports = XmolUtils; diff --git a/lib/routes/yahoo-news/index.js b/lib/routes/yahoo-news/index.js old mode 100755 new mode 100644