feat: Update and Combine Rounter for Journal Section 📚 (#4089)

This commit is contained in:
Chang Y
2020-02-27 00:40:08 +08:00
committed by GitHub
parent 9fffa7fc1b
commit 1305d7b196
18 changed files with 365 additions and 626 deletions

View File

@@ -8,15 +8,14 @@ pageClass: routes
### Cell Journal
<Route author="yech1990" example="/cell/cell/current" path="/cell/cell/:category" />
<RouteEn author="yech1990" example="/cell/cell/current" path="/cell/cell/:category" />
> Current Issue (default)
| `:category` | Query Type | Route |
| :---------: | :---------------------: | ---------------------------------------- |
| current | Current Issue (default) | [/cell/cell/current](/cell/cell/current) |
| inpress | Articles in press | [/cell/cell/inpress](/cell/cell/inpress) |
`/cell/cell/current`
> Articles in press
`/cell/cell/inpress`
</RouteEn>
### eLife - Latest Research-ALL
@@ -28,7 +27,46 @@ pageClass: routes
### Nature Journal - Latest Research
<RouteEn author="yech1990" example="/nature/nature/research" path="/nature/nature/research" />
<RouteEn author="yech1990" example="/nature/research/ng" path="/nature/research/:journal" :paramsDesc="['short name for a journal']" />
| `:journal` | Full Name of the Journal | Route |
| :-----------: | :-------------------------: | ---------------------------------------------------------------- |
| nature | Nature | [/nature/research/nature](/nature/research/nature) |
| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) |
| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) |
| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) |
| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) |
| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) |
| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) |
| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) |
| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) |
- Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group.
If the `:journal` parameter is blank, then latest research of Nature will return.
- The journals from NPG are run by different group of people, and the website of may not be consitent for all the journals
- Only the abstract section is rendered
</RouteEn>
### Nature Journal - News & Comment
<RouteEn author="yech1990" example="/nature/news-and-comment/ng" path="/nature/news-and-comment/:journal" :paramsDesc="['short name for a journal']" />
| `:journal` | Full Name of the Journal | Route |
| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- |
| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) |
| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) |
| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) |
| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) |
| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) |
| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) |
| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) |
| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) |
- Using router (`/nature/research/` + “short name for a journal”) to query latest research paper for a certain journal of Nature Publishing Group.
- The journals from NPG are run by different group of people, and the website of may not be consitent for all the journals
</RouteEn>
### Nature Journal - News
@@ -38,26 +76,6 @@ pageClass: routes
<RouteEn author="yech1990" example="/nature/nature/highlight" path="/nature/nature/highlight" />
### Nature Genetics (ng) - Latest Research
<RouteEn author="yech1990" example="/nature/ng/research" path="/nature/ng/research" />
### Nature Methods (nmeth) - Latest Research
<RouteEn author="yech1990" example="/nature/nmeth/research" path="/nature/nmeth/research" />
### Nature Biotechnology (nbt) - Latest Research
<RouteEn author="yech1990" example="/nature/nbt/research" path="/nature/nbt/research" />
### Nature Neuroscience (neuro) - Latest Research
<RouteEn author="yech1990" example="/nature/neuro/research" path="/nature/neuro/research" />
### Nature Machine Intelligence (natmachintell) - Latest Research
<RouteEn author="LogicJake" example="/nature/natmachintell/research" path="/nature/natmachintell/research" />
### Proceedings of The National Academy of Sciences (PNAS) - Latest Articles - ALL
<RouteEn author="emdoe" example="/pnas/latest" path="/pnas/latest" />
@@ -68,15 +86,29 @@ pageClass: routes
### Science Journal - Current Issue
<RouteEn author="yech1990" example="/sciencemag/science/current" path="/sciencemag/science/current" />
<RouteEn author="yech1990" example="/sciencemag/current/science" path="/nature/research/:journal" :paramsDesc="['short name for a journal']" />
| `:journal` | Full Name of the Journal | Route |
| :--------: | :----------------------------: | ---------------------------------------------------------------- |
| science | Science | [/sciencemag/current/science](/sciencemag/current/science) |
| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) |
| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) |
| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) |
| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) |
| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) |
- Using router (`/sciencemag/current/` + “short name for a journal”) to query current issue of a journal form AAAS.
leave the parameter blank`/sciencemag/current`to get update from Science.
</RouteEn>
### Science Journal - First Release
<RouteEn author="yech1990" example="/sciencemag/science/early" path="/sciencemag/science/early" />
<RouteEn author="yech1990" example="/sciencemag/early/science" path="/sciencemag/early/science" />
### Science Advances - Current Issue
_only support Science Journal_
<RouteEn author="yech1990" example="/sciencemag/advances/current" path="/sciencemag/advances/current" />
</RouteEn>
## Search Engine
@@ -84,10 +116,6 @@ pageClass: routes
<RouteEn author="yech1990" example="/pubmed/trending" path="/pubmed/trending" />
### X-MOL Platform - News
<RouteEn author="cssxsh" example="/x-mol/news/3" path="/x-mol/news/:tag?" :paramsDesc="['数字编号可从新闻列表URL得到。为空时从新闻主页获取新闻。']" />
### X-MOL Platform - Journal
<RouteEn author="cssxsh" example="/x-mol/paper/0/9" path="/x-mol/paper/:type/:magazine" :paramsDesc="['类别','机构两个参数都可从期刊URL获取。']" />

View File

@@ -33,6 +33,12 @@ pageClass: routes
| columns | columns=健康 | string / undefined |
| columns | columns=virus | string / undefined |
## X-MOL
### News
<RouteEn author="cssxsh" example="/x-mol/news/3" path="/x-mol/news/:tag?" :paramsDesc="['数字编号可从新闻列表URL得到。为空时从新闻主页获取新闻。']" />
## ZhiShiFenZi
### News

View File

@@ -10,10 +10,6 @@ pageClass: routes
<Route author="yech1990" example="/pubmed/trending" path="/pubmed/trending" />
### X-MOL 平台-新闻
<Route author="cssxsh" example="/x-mol/news/3" path="/x-mol/news/:tag?" :paramsDesc="['数字编号可从新闻列表URL得到。为空时从新闻主页获取新闻。']" />
### X-MOL 平台-期刊
<Route author="cssxsh" example="/x-mol/paper/0/9" path="/x-mol/paper/:type/:magazine" :paramsDesc="['类别','机构两个参数都可从期刊URL获取。']" />
@@ -41,13 +37,12 @@ pageClass: routes
<Route author="yech1990" example="/cell/cell/current" path="/cell/cell/:category" />
> 本期刊物 (默认选项)
| `:category` | 类型说明 | 路由 |
| :---------: | :-----------------: | ---------------------------------------- |
| current | 本期刊物 (默认选项) | [/cell/cell/current](/cell/cell/current) |
| inpress | 在线发表 | [/cell/cell/inpress](/cell/cell/inpress) |
`/cell/cell/current`
> 在线发表
`/cell/cell/inpress`
</Route>
### eLife-最新成果-综合
@@ -57,9 +52,49 @@ pageClass: routes
<Route author="emdoe" example="/elife/cell-biology" path="/elife/:subject" :paramsDesc="['方向名称', '请在主页获取']" />
### Nature 主刊-最新成果
### Nature 系列-最新成果
<Route author="yech1990" example="/nature/nature/research" path="/nature/nature/research" />
<Route author="yech1990" example="/nature/research/ng" path="/nature/research/:journal" :paramsDesc="['期刊名简写']" />
| `:journal` | 期刊名 | 路由 |
| :-----------: | :-------------------------: | ---------------------------------------------------------------- |
| nature | Nature | [/nature/research/nature](/nature/research/nature) |
| nbt | Nature Biotechnology | [/nature/research/nbt](/nature/research/nbt) |
| neuro | Nature Neuroscience | [/nature/research/neuro](/nature/research/neuro) |
| ng | Nature Genetics | [/nature/research/ng](/nature/research/ng) |
| ni | Nature Immunology | [/nature/research/ni](/nature/research/ni) |
| nmeth | Nature Method | [/nature/research/nmeth](/nature/research/nmeth) |
| nchem | Nature Chemistry | [/nature/research/nchem](/nature/research/nchem) |
| nmat | Nature Materials | [/nature/research/nmat](/nature/research/nmat) |
| natmachintell | Nature Machine Intelligence | [/nature/research/natmachintell](/nature/research/natmachintell) |
- 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章Latest Research
若参数置空(`/nature/research`则默认获取主刊Nature的最新文章。
- 由于 Nature 系列的刊物是分别由不同的编辑来独立运营,所以页面格式上有些差异。目前**仅**对以下杂志进行了测试。
- 由于权限的限制,目前仅获取论文的摘要进行展示。
</Route>
### Nature-新闻及评论
<Route author="yech1990" example="/nature/news-and-comment/ng" path="/nature/news-and-comment/:journal" :paramsDesc="['期刊名简写']" />
| `:journal` | 期刊名 | 路由 |
| :-----------: | :-------------------------: | -------------------------------------------------------------------------------- |
| nbt | Nature Biotechnology | [/nature/news-and-comment/nbt](/nature/news-and-comment/nbt) |
| neuro | Nature Neuroscience | [/nature/news-and-comment/neuro](/nature/news-and-comment/neuro) |
| ng | Nature Genetics | [/nature/news-and-comment/ng](/nature/news-and-comment/ng) |
| ni | Nature Immunology | [/nature/news-and-comment/ni](/nature/news-and-comment/ni) |
| nmeth | Nature Method | [/nature/news-and-comment/nmeth](/nature/news-and-comment/nmeth) |
| nchem | Nature Chemistry | [/nature/news-and-comment/nchem](/nature/news-and-comment/nchem) |
| nmat | Nature Materials | [/nature/news-and-comment/nmat](/nature/news-and-comment/nmat) |
| natmachintell | Nature Machine Intelligence | [/nature/news-and-comment/natmachintell](/nature/news-and-comment/natmachintell) |
- 通过 `/nature/research/` + “杂志简写”来获取对应杂志的最新文章Latest Research
主刊由于格式不同,该 router 并未支持,采用 `/nature/nature/news` 来获取新闻。
- 由于 Nature 系列的刊物是分别由不同的编辑来独立运营,所以页面格式上有些差异。目前**仅**对以下杂志进行了测试。
</Route>
### Nature 主刊-新闻动态
@@ -69,26 +104,6 @@ pageClass: routes
<Route author="yech1990" example="/nature/nature/highlight" path="/nature/nature/highlight" />
### Nature Genetics (ng)-最新成果
<Route author="yech1990" example="/nature/ng/research" path="/nature/ng/research" />
### Nature Methods (nmeth)-最新成果
<Route author="yech1990" example="/nature/nmeth/research" path="/nature/nmeth/research" />
### Nature Biotechnology (nbt)-最新成果
<Route author="yech1990" example="/nature/nbt/research" path="/nature/nbt/research" />
### Nature Neuroscience (neuro)-最新成果
<Route author="yech1990" example="/nature/neuro/research" path="/nature/neuro/research" />
### Nature Machine Intelligence (natmachintell)-最新成果
<Route author="LogicJake" example="/nature/natmachintell/research" path="/nature/natmachintell/research" />
### PNAS-最新文章(全部)
<Route author="emdoe" example="/pnas/latest" path="/pnas/latest" />
@@ -97,14 +112,28 @@ pageClass: routes
<Route author="emdoe" example="/pnas/Applied Mathematics" path="/pnas/:topic" :paramsDesc="['领域名称','可从 pnas.org 获得']" />
### Science 主刊-本期刊物
### Science 系列-本期刊物
<Route author="yech1990" example="/sciencemag/science/current" path="/sciencemag/science/current" />
<Route author="yech1990" example="/sciencemag/current/science" path="/nature/research/:journal" :paramsDesc="['期刊名简写']" />
| `:journal` | 期刊名 | 路由 |
| :--------: | :----------------------------: | ---------------------------------------------------------------- |
| science | Science | [/sciencemag/current/science](/sciencemag/current/science) |
| advances | Science Advances | [/sciencemag/current/advances](/sciencemag/current/advances) |
| immunology | Science Immunology | [/sciencemag/current/immunology](/sciencemag/current/immunology) |
| robotics | Science Robotics | [/sciencemag/current/robotics](/sciencemag/current/robotics) |
| stke | Science Signaling | [/sciencemag/current/stke](/sciencemag/current/stke) |
| stm | Science Translational Medicine | [/sciencemag/current/stm](/sciencemag/current/stm) |
- 通过 `/sciencemag/current/` + “杂志简写”来获取对应杂志最新一期的文章Current Issue
若参数置空(`/sciencemag/current`则默认获取主刊Science的最新文章。
</Route>
### Science 主刊-在线发表
<Route author="yech1990" example="/sciencemag/science/early" path="/sciencemag/science/early" />
<Route author="yech1990" example="/sciencemag/early/science" path="/sciencemag/early/science" />
### Science Advances-本期刊物
_仅支持 Science 主刊_
<Route author="yech1990" example="/sciencemag/advances/current" path="/sciencemag/advances/current" />
</Route>

View File

@@ -14,6 +14,12 @@ pageClass: routes
<Route author="liecn" example="/gradcafe/result/computer" path="/gradcafe/result/:type" :paramsDesc="['按关键词进行搜索,如 computer']"/>
## X-MOL 平台
### 新闻
<Route author="cssxsh" example="/x-mol/news/3" path="/x-mol/news/:tag?" :paramsDesc="['数字编号可从新闻列表URL得到。为空时从新闻主页获取新闻。']" />
## 领研
### 论文

View File

@@ -2007,22 +2007,15 @@ router.get('/pnas/:tid', require('./routes/pnas/topic'));
// cell [Sci Journal]
router.get('/cell/cell/:category', require('./routes/cell/cell/index'));
// nature [Sci Journal]
router.get('/nature/nature/research', require('./routes/nature/nature/research'));
router.get('/nature/nature/news', require('./routes/nature/nature/news'));
router.get('/nature/nature/highlight', require('./routes/nature/nature/highlight'));
// nature 子刊 [Sci Journal]
router.get('/nature/nmeth/research', require('./routes/nature/nmeth/research'));
router.get('/nature/ng/research', require('./routes/nature/ng/research'));
router.get('/nature/nbt/research', require('./routes/nature/nbt/research'));
router.get('/nature/neuro/research', require('./routes/nature/neuro/research'));
router.get('/nature/natmachintell/research', require('./routes/nature/natmachintell/research'));
// nature + nature 子刊 [Sci Journal]
router.get('/nature/research/:journal?', require('./routes/nature/research'));
router.get('/nature/news-and-comment/:journal?', require('./routes/nature/news-and-comment'));
router.get('/nature/news', require('./routes/nature/news'));
router.get('/nature/highlight', require('./routes/nature/highlight'));
// science [Sci Journal]
router.get('/sciencemag/science/current', require('./routes/sciencemag/science/current'));
router.get('/sciencemag/science/early', require('./routes/sciencemag/science/early'));
// science 子刊 [Sci Journal]
router.get('/sciencemag/advances/current', require('./routes/sciencemag/advances/current'));
router.get('/sciencemag/current/:journal?', require('./routes/sciencemag/current'));
router.get('/sciencemag/early/science', require('./routes/sciencemag/early'));
// dlsite
router.get('/dlsite/new/:type', require('./routes/dlsite/new'));

View File

@@ -1,44 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
const url = require('url');
const host = 'https://www.nature.com';
const link = 'https://www.nature.com/natmachintell/research';
module.exports = async (ctx) => {
const responses = await got.get(link);
const $ = cheerio.load(responses.data);
const list = $('article').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 a').text();
const itemUrl = url.resolve(host, $('h3 a').attr('href'));
const cache = await ctx.cache.get(itemUrl);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const responses = await got.get(itemUrl);
const $d = cheerio.load(responses.data);
const description = $d('div.c-article-body').html();
const single = {
title,
link: itemUrl,
description,
};
ctx.cache.set(itemUrl, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: 'nature > nature machine intelligence > latest research',
link: link,
item: out,
};
};

View File

@@ -1,75 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://www.nature.com`;
const url = `${base}/nature/research`;
const res = await got.get(url);
const $ = cheerio.load(res.data);
const list = $('.border-bottom-1.pb20').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const res = await got.get(address);
const capture = cheerio.load(res.data);
const abs = capture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const single = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Nature | Latest Research`,
description: `Nature, a nature research journal`,
link: url,
item: out,
};
};

View File

@@ -1,75 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://www.nature.com`;
const url = `${base}/nbt/research`;
const res = await got.get(url);
const $ = cheerio.load(res.data);
const list = $('.border-bottom-1.pb20').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const res = await got.get(address);
const capture = cheerio.load(res.data);
const abs = capture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const single = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Nature Biotechnology | Latest Research`,
description: `Nature Biotechnology, a nature research journal`,
link: url,
item: out,
};
};

View File

@@ -1,75 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://www.nature.com`;
const url = `${base}/neuro/research`;
const res = await got.get(url);
const $ = cheerio.load(res.data);
const list = $('.border-bottom-1.pb20').get();
const out = await Promise.all(
list.slice(0, 3).map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const res = await got.get(address);
const capture = cheerio.load(res.data);
const abs = capture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const single = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Nature Neuroscience | Latest Research`,
description: `Nature Neuroscience, a nature research journal`,
link: url,
item: out,
};
};

View File

@@ -0,0 +1,80 @@
// example usage: `/nature/news-and-comment/ng`
// The journals from NPG are run by different group of people,
// and the website of may not be consitent for all the journals
//
// This router has **just** been tested in:
// nbt: Nature Biotechnology
// neuro: Nature Neuroscience
// ng: Nature Genetics
// ni: Nature Immunology
// nmeth: Nature Method
// nchem: Nature Chemistry
// nmat: Nature Materials
// natmachintell: Nature Machine Intelligence
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const baseURL = `https://www.nature.com`;
const journal = ctx.params.journal;
const pageURL = `${baseURL}/${journal}/news-and-comment`;
const pageResponse = await got.get(pageURL);
const pageCapture = cheerio.load(pageResponse.data);
const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`;
const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`;
const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'News & Comment';
const list = pageCapture('.border-bottom-1.pb20').get();
const items = await Promise.all(
list.map(async (el) => {
const $ = cheerio.load(el);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${baseURL}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
const author = $('.js-list-authors-3 li').text();
const articleType = $('p > span').attr('data-class');
const headerContents = `
<div>
<p style="color: #666">
<span>${articleType}</span>
<span>| </span>
<span>${author}</span>
</p>
</div>
`;
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
const contents = headerContents + briefContents;
const item = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
return Promise.resolve(item);
})
);
ctx.state.data = {
title: `${pageTitleName} | ${pageTitleSub}`,
description: pageDescription,
link: pageURL,
item: items,
};
};

View File

@@ -1,75 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://www.nature.com`;
const url = `${base}/ng/research`;
const res = await got.get(url);
const $ = cheerio.load(res.data);
const list = $('.border-bottom-1.pb20').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const res = await got.get(address);
const capture = cheerio.load(res.data);
const abs = capture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const single = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Nature Genetics | Latest Research`,
description: `Nature Genetics, a nature research journal`,
link: url,
item: out,
};
};

View File

@@ -1,75 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://www.nature.com`;
const url = `${base}/nmeth/research`;
const res = await got.get(url);
const $ = cheerio.load(res.data);
const list = $('.border-bottom-1.pb20').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const res = await got.get(address);
const capture = cheerio.load(res.data);
const abs = capture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const single = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Nature Method | Latest Research`,
description: `Nature Method, a nature research journal`,
link: url,
item: out,
};
};

View File

@@ -0,0 +1,97 @@
// example usage: `/nature/research/ng`
// The journals from NPG are run by different group of people,
// and the website of may not be consitent for all the journals
//
// This router has **just** been tested in:
// nature: Nature
// nbt: Nature Biotechnology
// neuro: Nature Neuroscience
// ng: Nature Genetics
// ni: Nature Immunology
// nmeth: Nature Method
// nchem: Nature Chemistry
// nmat: Nature Materials
// natmachintell: Nature Machine Intelligence
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const baseURL = `https://www.nature.com`;
const journal = ctx.params.journal || 'nature';
const pageURL = `${baseURL}/${journal}/research`;
const pageResponse = await got.get(pageURL);
const pageCapture = cheerio.load(pageResponse.data);
const pageDescription = pageCapture('meta[name="description"]').attr('content') || `Nature, a nature research journal`;
const pageTitleName = pageCapture('meta[name="WT.cg_n"]').attr('content') || `Nature (${journal})`;
const pageTitleSub = pageCapture('meta[name="WT.cg_s"]').attr('content') || 'Latest Research';
const list = pageCapture('.border-bottom-1.pb20').get();
const items = await Promise.all(
list.slice(4, 6).map(async (el) => {
const $ = cheerio.load(el);
const title = $('h3 > a').text();
const partial = $('h3 > a').attr('href');
const address = `${baseURL}${partial}`;
const brief = $('.hide-overflow.inline').text();
const time = $('time').text();
let author;
if ($('.js-list-authors-3 li').length > 3) {
author =
$('.js-list-authors-3 li')
.slice(0, 1)
.text() + ' et al.';
} else {
author = $('.js-list-authors-3 li').text();
}
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
const itemResponse = await got.get(address);
const itemCapture = cheerio.load(itemResponse.data);
const abs = itemCapture('div#Abs1-content.c-article-section__content > p').html();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div>
<h2 align="left">Brief</h2>
<p>${brief}</p>
</div>
`;
}
let absContents = '';
if (abs !== null) {
absContents = `
<div>
<h2 align="left">Abstract</h2>
<p>${abs}</p>
</div>
`;
}
const contents = briefContents + absContents;
const item = {
title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: new Date(time).toUTCString(),
};
ctx.cache.set(address, JSON.stringify(item));
return Promise.resolve(item);
})
);
ctx.state.data = {
title: `${pageTitleName} | ${pageTitleSub}`,
description: pageDescription,
link: pageURL,
item: items,
};
};

View File

@@ -1,98 +0,0 @@
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://advances.sciencemag.org`;
const res = await got.get(base);
const pageCapture = cheerio.load(res.data);
const list = pageCapture('ul > li > div > div > article > div').get();
const out = await Promise.all(
list.map(async (item) => {
const $ = cheerio.load(item);
const title = $('h3').text();
const partial = $('h3 > a').attr('href');
// TODO: .full.txt is a way for getting text preview
const address = `${base}${partial}`;
let author;
const authorList = $('span.highwire-citation-authors > span.highwire-citation-author')
.map(function(i, el) {
return $(el).text();
})
.get();
if (authorList.length > 5) {
author = authorList.slice(0, 5).join(', ') + ' <i>et al.</i>';
} else {
author = authorList.join(', ');
}
const time = new Date($('p.highwire-cite-metadata > time').text()).toUTCString();
const cache = await ctx.cache.get(address);
if (cache) {
return Promise.resolve(JSON.parse(cache));
}
// contents
// brief content
const brief = $('div.highwire-cite-snippet > div > div > p').text();
let briefContents = '';
if (brief !== '') {
briefContents = `
<div id="content-brief">
<h2>Brief</h2>
<p>${brief}</p>
</div>
`;
}
const itemPage = await got.get(address);
const itemCapture = cheerio.load(itemPage.data);
// section and subject content
const section = itemCapture('header > div.overline > span.overline__section').text();
const subject = itemCapture('header > div.overline > span.overline__subject').text();
let sectionContents = '';
if (section !== '' || subject !== '') {
sectionContents = `
<div id="content-section">
<span style="color: #d40016; text-transform: uppercase; font-weight: 700;">${section}</span>
<span style="color: #666; text-transform: uppercase; font-weight: 400; border-left: 1px solid #e6e6e6;">${subject}</span>
</div>
`;
}
// abs content
const abs = itemCapture('div.section.abstract > p').text();
let absContents = '';
if (abs !== '') {
absContents = `
<div id="content-abs">
<h2>Abstract</h2>
${abs}
</div>
`;
}
const contents = sectionContents + briefContents + absContents;
const single = {
title: title,
author: author,
description: contents,
link: address,
guid: address,
pubDate: time,
};
ctx.cache.set(address, JSON.stringify(single));
return Promise.resolve(single);
})
);
ctx.state.data = {
title: `Science | First Release`,
description: `Science, a research journal. For papers that published online.`,
link: base,
item: out,
};
};

View File

@@ -1,23 +1,41 @@
// journals form AAAS publishing group
//
// science: Science
// advances: Science Advances
// immunology: Science Immunology
// robotics: Science Robotics
// stke: Science Signaling
// stm: Science Translational Medicine
const cheerio = require('cheerio');
const got = require('@/utils/got');
module.exports = async (ctx) => {
const base = `https://science.sciencemag.org`;
const journal = ctx.params.journal || 'science';
const baseURL = `https://${journal}.sciencemag.org`;
const res = await got.get(base);
const pageCapture = cheerio.load(res.data);
const pageURL = baseURL;
const pageResponse = await got.get(pageURL);
const pageCapture = cheerio.load(pageResponse.data);
const pageTitleName = pageCapture('head > title').text() || `Science (${journal})`;
// just select paper relative sections
const sectionList = ['issue-toc-section-research-articles', 'issue-toc-section-review', 'issue-toc-section-reports'];
const sectionList = ['research-articles', 'review', 'reports'];
const list = [].concat.apply(
[],
sectionList.map((section) => {
const sec = cheerio.load(pageCapture(`ul > li .issue-toc-section.${section}`).html());
const sectionName = sec('h2').text();
const sectionList = sec('ul > li > div > div > article > div')
.append(`<div class="toc-section-type">${sectionName}</div>`)
.get();
return sectionList;
const sectionContent = pageCapture(`ul > li.issue-toc-section.issue-toc-section-${section}`).html();
if (sectionContent !== null) {
const sec = cheerio.load(sectionContent);
const sectionName = sec('h2').text();
const sectionList = sec('ul > li > div > div > article > div')
.append(`<div class="toc-section-type">${sectionName}</div>`)
.get();
return sectionList;
}
return [];
})
);
@@ -27,17 +45,15 @@ module.exports = async (ctx) => {
const title = $('h3').text();
const partial = $('h3 > a').attr('href');
const address = `${base}${partial}`;
const address = `${baseURL}${partial}`;
const section = $('div .toc-section-type').text();
let author;
const authorList = $('span.highwire-citation-authors > span.highwire-citation-author')
.map(function(i, el) {
return $(el).text();
})
.map((_, el) => $(el).text())
.get();
if (authorList.length > 5) {
author = authorList.slice(0, 5).join(', ') + ' <i>et al.</i>';
author = authorList.slice(0, 5).join(', ') + ' et al.';
} else {
author = authorList.join(', ');
}
@@ -71,10 +87,9 @@ module.exports = async (ctx) => {
}
const itemPage = await got.get(address);
const itemCapture = cheerio.load(itemPage.data);
const abs = itemCapture('div > div.abstract-view > div.section')
.map(function(i, el) {
return $(el).html();
})
const abs = itemCapture('div > div.article > div.section')
.map((_, el) => $(el).html())
.get()
.join('<br>');
@@ -102,9 +117,9 @@ module.exports = async (ctx) => {
})
);
ctx.state.data = {
title: `Science | Current Table of Contents`,
description: `Science, a research journal`,
link: base,
title: `${pageTitleName} | Current Issue`,
description: `Current Issue of ${pageTitleName}`,
link: baseURL,
item: out,
};
};

View File

@@ -1,3 +1,5 @@
// only support Science journal
const cheerio = require('cheerio');
const got = require('@/utils/got');