perf(route): block unnecessary requests in all puppeteer routes (#10010)

* fix: block unnecessary request in all puppeteer routes

* fix(route): prestige-av migrate to v2

* fix(route): ncwu migrate to v2

* fix(route): nju/rczp without puppeteer
This commit is contained in:
Tony
2022-06-22 22:54:29 +08:00
committed by GitHub
parent f099a33f95
commit ae2464de6e
30 changed files with 278 additions and 179 deletions

View File

@@ -305,6 +305,18 @@ Refer to [Pornhub F.A.Qs](https://help.pornhub.com/hc/en-us/articles/36004432703
- `jp` (Japanese), for Pornhub in Japan <https://jp.pornhub.com/> etc. - `jp` (Japanese), for Pornhub in Japan <https://jp.pornhub.com/> etc.
## PRESTIGE(プレステージ)
### シリーズ
<RouteEn author="minimalistrojan" example="/prestige-av/series/847" path="/prestige-av/series/:mid/:sort?" :paramsDesc="['Series number', 'Sorting, `near` by default']" puppeteer="1">
| 人気順 | 新着順 | 発売日順 | タイトル順 | 価格の安い順 | 価格の高い順 |
| ------- | ------ | ---- | ----- | ---- | ---- |
| popular | near | date | title | low | high |
</RouteEn>
## s-hentai ## s-hentai
### Category ### Category

View File

@@ -1312,7 +1312,7 @@ jsjxy.hbut.edu.cn 证书链不全,自建 RSSHub 可设置环境变量 NODE_TLS
### 学校通知 ### 学校通知
<Route author="vuhe" example="/ncwu/notice" path="/ncwu/notice" puppeteer="1"/> <Route author="vuhe" example="/ncwu/notice" path="/ncwu/notice" radar="1" rssbud="1"/>
## 华东理工大学 ## 华东理工大学
@@ -1655,7 +1655,7 @@ jsjxy.hbut.edu.cn 证书链不全,自建 RSSHub 可设置环境变量 NODE_TLS
### 人才招聘网 ### 人才招聘网
<Route author="ret-1" example="/nju/rczp/xxfb" path="/nju/rczp/:type" :paramsDesc="['分类名']" puppeteer="1"> <Route author="ret-1" example="/nju/rczp/xxfb" path="/nju/rczp/:type" :paramsDesc="['分类名']">
| 信息发布 | 教研类岗位 | 管理岗位及其他 | | 信息发布 | 教研类岗位 | 管理岗位及其他 |
| ---- | ----- | ------- | | ---- | ----- | ------- |

View File

@@ -228,8 +228,8 @@ router.get('/pornhub/:language?/users/:username', lazyloadRouteHandler('./routes
router.get('/pornhub/:language?/model/:username/:sort?', lazyloadRouteHandler('./routes/pornhub/model')); router.get('/pornhub/:language?/model/:username/:sort?', lazyloadRouteHandler('./routes/pornhub/model'));
router.get('/pornhub/:language?/pornstar/:username/:sort?', lazyloadRouteHandler('./routes/pornhub/pornstar')); router.get('/pornhub/:language?/pornstar/:username/:sort?', lazyloadRouteHandler('./routes/pornhub/pornstar'));
// Prestige // Prestige migrated to v2
router.get('/prestige-av/series/:mid/:sort?', lazyloadRouteHandler('./routes/prestige-av/series')); // router.get('/prestige-av/series/:mid/:sort?', lazyloadRouteHandler('./routes/prestige-av/series'));
// yande.re // yande.re
router.get('/yande.re/post/popular_recent', lazyloadRouteHandler('./routes/yande.re/post_popular_recent')); router.get('/yande.re/post/popular_recent', lazyloadRouteHandler('./routes/yande.re/post_popular_recent'));
@@ -759,7 +759,7 @@ router.get('/upc/main/:type?', lazyloadRouteHandler('./routes/universities/upc/m
router.get('/upc/jsj/:type?', lazyloadRouteHandler('./routes/universities/upc/jsj')); router.get('/upc/jsj/:type?', lazyloadRouteHandler('./routes/universities/upc/jsj'));
// 华北水利水电大学 // 华北水利水电大学
router.get('/ncwu/notice', lazyloadRouteHandler('./routes/universities/ncwu/notice')); // router.get('/ncwu/notice', lazyloadRouteHandler('./routes/universities/ncwu/notice'));
// 太原师范学院 // 太原师范学院
router.get('/tynu', lazyloadRouteHandler('./routes/universities/tynu/tynu')); router.get('/tynu', lazyloadRouteHandler('./routes/universities/tynu/tynu'));

View File

@@ -1,42 +0,0 @@
const date = require('@/utils/date');
const cheerio = require('cheerio');
const baseUrl = 'https://www5.ncwu.edu.cn/channels/5.html';
module.exports = async (ctx) => {
const htmlCache = await ctx.cache.tryGet(
baseUrl,
async () => {
// 由于学校系统升级,更换为使用 puppeteer 渲染页面获取
const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage();
await page.goto(baseUrl);
const html = await page.evaluate(
() =>
// eslint-disable-next-line no-undef
document.querySelector('div.news-list').innerHTML
);
browser.close();
return html;
},
60 * 60 * 12
); // 防止访问频率过高
const $ = cheerio.load(htmlCache);
const list = $('div.news-item');
ctx.state.data = {
title: '华水学校通知',
link: baseUrl,
item:
list &&
list
.map((index, item) => ({
title: `` + $(item).find('a.dw').text() + `` + $(item).find('a.dw').next().text(),
description: $(item).find('div.detail').text(),
pubDate: date($(item).find('div.month').text() + '-' + $(item).find('div.day').text()),
link: $(item).find('a.dw').next().attr('href'),
}))
.get(),
};
};

View File

@@ -1,40 +1,18 @@
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const dayjs = require('dayjs');
module.exports = async (ctx) => { module.exports = async (ctx) => {
const baseIndexUrl = 'https://www.sice.uestc.edu.cn/index.htm'; const baseIndexUrl = 'https://www.sice.uestc.edu.cn/index.htm';
const host = 'https://www.sice.uestc.edu.cn/'; const host = 'https://www.sice.uestc.edu.cn/';
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')({ stealth: true });
const page = await browser.newPage(); const page = await browser.newPage();
await page.setRequestInterception(true);
// 浏览器伪装 page.on('request', (request) => {
await page.evaluateOnNewDocument(() => { request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
// 在每个新页面打开前执行以下脚本
const newProto = navigator.__proto__;
delete newProto.webdriver; // 删除navigator.webdriver字段
navigator.__proto__ = newProto;
window.chrome = {}; // 添加window.chrome字段为增加真实性还需向内部填充一些值
window.chrome.app = { InstallState: 'hehe', RunningState: 'haha', getDetails: 'xixi', getIsInstalled: 'ohno' };
window.chrome.csi = function () {};
window.chrome.loadTimes = function () {};
window.chrome.runtime = function () {};
Object.defineProperty(navigator, 'userAgent', {
// userAgent在无头模式下有headless字样所以需覆写
get: () => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36',
});
Object.defineProperty(navigator, 'plugins', {
// 伪装真实的插件信息
get: () => [{ description: 'Portable Document Format', filename: 'internal-pdf-viewer', length: 1, name: 'Chrome PDF Plugin' }],
});
Object.defineProperty(navigator, 'languages', {
// 添加语言
get: () => ['zh-CN', 'zh', 'en'],
});
const originalQuery = window.navigator.permissions.query; // notification伪装
window.navigator.permissions.query = (parameters) => (parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters));
}); });
await page.goto(baseIndexUrl, { await page.goto(baseIndexUrl, {
waitUntil: 'networkidle0', waitUntil: 'networkidle2',
}); });
const content = await page.content(); const content = await page.content();
await browser.close(); await browser.close();
@@ -43,14 +21,15 @@ module.exports = async (ctx) => {
const out = $('.notice p') const out = $('.notice p')
.map((index, item) => { .map((index, item) => {
item = $(item); item = $(item);
let date = new Date(new Date().getFullYear() + '-' + item.find('a.date').text()); const now = dayjs();
if (new Date() < date) { let date = dayjs(now.year() + '-' + item.find('a.date').text());
date = new Date(new Date().getFullYear() - 1 + '-' + item.find('a.date').text()); if (now < date) {
date = dayjs(now.year() - 1 + '-' + item.find('a.date').text());
} }
return { return {
title: item.find('a[href]').text(), title: item.find('a[href]').text(),
link: host + item.find('a[href]').attr('href'), link: host + item.find('a[href]').attr('href'),
pubDate: date, pubDate: parseDate(date),
}; };
}) })
.get(); .get();

View File

@@ -10,18 +10,26 @@ module.exports = async (ctx) => {
const BASE = utils.langBase(lang); const BASE = utils.langBase(lang);
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(BASE); await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(BASE, {
waitUntil: 'domcontentloaded',
});
const articles = await page.evaluate(() => window.articles); const articles = await page.evaluate(() => window.articles);
await browser.close();
const list = utils const list = utils
.typeFilter(articles, type) .typeFilter(articles, type)
.slice(0, 11) .slice(0, ctx.query.limit ? Number(ctx.query.limit) : 30)
.map((item) => ({ .map((item) => ({
title: item.name, title: item.name,
category: item.tags.map((tag) => tag.name).join(','), category: item.tags.map((tag) => tag.name),
link: utils.BASE_URL + item.url, link: utils.BASE_URL + item.url,
pubDate: parseDate(item.time, 'YYYY-MM-DD'), pubDate: parseDate(item.time, 'YYYY-MM-DD'),
})); }));
const items = await Promise.all( const items = await Promise.all(
list.map((item) => list.map((item) =>
ctx.cache.tryGet(item.link, async () => { ctx.cache.tryGet(item.link, async () => {
@@ -37,6 +45,7 @@ module.exports = async (ctx) => {
}) })
) )
); );
ctx.state.data = { ctx.state.data = {
title: `CCAC ${type}`, title: `CCAC ${type}`,
link: BASE, link: BASE,

View File

@@ -64,6 +64,10 @@ module.exports = async (ctx) => {
async () => { async () => {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage(); const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(tagUrl, { await page.goto(tagUrl, {
waitUntil: 'domcontentloaded', waitUntil: 'domcontentloaded',
}); });

View File

@@ -23,33 +23,34 @@ module.exports = async (ctx) => {
link = `${host}/customs/302249/zfxxgk/2799825/2799883/index.html`; link = `${host}/customs/302249/zfxxgk/2799825/2799883/index.html`;
break; break;
} }
const response = await ctx.cache.tryGet(
const browser = await require('@/utils/puppeteer')();
const list = await ctx.cache.tryGet(
link, link,
async () => { async () => {
const response = await puppeteerGet(link); const response = await puppeteerGet(link, browser);
return response; const $ = cheerio.load(response);
const list = $('[class^="conList_ul"] li')
.toArray()
.map((item) => {
item = $(item);
return {
title: item.find('a').attr('title'),
link: new URL(item.find('a').attr('href'), host).href,
date: parseDate(item.find('span').text()),
};
});
return list;
}, },
config.cache.routeExpire, config.cache.routeExpire,
false false
); );
const $ = cheerio.load(response);
const list = $('[class^="conList_ul"] li')
.toArray()
.map((item) => {
item = $(item);
return {
title: item.find('a').attr('title'),
link: new URL(item.find('a').attr('href'), host).href,
date: parseDate(item.find('span').text()),
};
});
const out = await Promise.all( const out = await Promise.all(
list.map((info) => list.map((info) =>
ctx.cache.tryGet(info.link, async () => { ctx.cache.tryGet(info.link, async () => {
const response = await puppeteerGet(info.link); const response = await puppeteerGet(info.link, browser);
const $ = cheerio.load(response); const $ = cheerio.load(response);
let date; let date;
@@ -68,6 +69,9 @@ module.exports = async (ctx) => {
}) })
) )
); );
browser.close();
ctx.state.data = { ctx.state.data = {
title: `中国海关-${channelName}`, title: `中国海关-${channelName}`,
link, link,

View File

@@ -1,13 +1,17 @@
const host = 'http://www.customs.gov.cn'; const host = 'http://www.customs.gov.cn';
const puppeteerGet = async (url) => { const puppeteerGet = async (url, browser) => {
const browser = await require('@/utils/puppeteer')({ stealth: true });
const page = await browser.newPage(); const page = await browser.newPage();
page.setExtraHTTPHeaders({ referer: host }); await page.setExtraHTTPHeaders({ referer: host });
await page.goto(url); await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(url, {
waitUntil: 'domcontentloaded',
});
await page.waitForSelector('.pubCon'); await page.waitForSelector('.pubCon');
const html = await page.evaluate(() => document.documentElement.innerHTML); const html = await page.evaluate(() => document.documentElement.innerHTML);
browser.close();
return html; return html;
}; };

View File

@@ -5,22 +5,25 @@ const timezone = require('@/utils/timezone');
module.exports = async (ctx) => { module.exports = async (ctx) => {
const link = 'http://www.pbc.gov.cn/goutongjiaoliu/113456/113469/index.html'; const link = 'http://www.pbc.gov.cn/goutongjiaoliu/113456/113469/index.html';
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')({ stealth: true });
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(link); await page.setRequestInterception(true);
const html = await page.evaluate( page.on('request', (request) => {
() => request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
// eslint-disable-next-line });
document.querySelector('body').innerHTML await page.goto(link, {
); waitUntil: 'domcontentloaded',
});
const html = await page.evaluate(() => document.documentElement.innerHTML);
const $ = cheerio.load(html); const $ = cheerio.load(html);
const list = $('font.newslist_style') const list = $('font.newslist_style')
.map((_, item) => { .map((_, item) => {
item = $(item); item = $(item);
const a = item.find('a[title]'); const a = item.find('a[title]');
return { return {
title: a.text(), title: a.attr('title'),
link: new URL(a.attr('href'), 'http://www.pbc.gov.cn'), link: new URL(a.attr('href'), 'http://www.pbc.gov.cn').href,
}; };
}) })
.get(); .get();
@@ -29,12 +32,14 @@ module.exports = async (ctx) => {
list.map((item) => list.map((item) =>
ctx.cache.tryGet(item.link, async () => { ctx.cache.tryGet(item.link, async () => {
const detailPage = await browser.newPage(); const detailPage = await browser.newPage();
await detailPage.goto(item.link); await detailPage.setRequestInterception(true);
const detailHtml = await detailPage.evaluate( detailPage.on('request', (request) => {
() => request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
// eslint-disable-next-line });
document.querySelector('body').innerHTML await detailPage.goto(item.link, {
); waitUntil: 'domcontentloaded',
});
const detailHtml = await detailPage.evaluate(() => document.documentElement.innerHTML);
const content = cheerio.load(detailHtml); const content = cheerio.load(detailHtml);
item.description = content('#zoom').html(); item.description = content('#zoom').html();
item.pubDate = timezone(parseDate(content('.hui12').eq(5).text()), +8); item.pubDate = timezone(parseDate(content('.hui12').eq(5).text()), +8);

View File

@@ -7,20 +7,22 @@ module.exports = async (ctx) => {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(link); await page.setRequestInterception(true);
const html = await page.evaluate( page.on('request', (request) => {
() => request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
// eslint-disable-next-line });
document.querySelector('body').innerHTML await page.goto(link, {
); waitUntil: 'domcontentloaded',
});
const html = await page.evaluate(() => document.documentElement.innerHTML);
const $ = cheerio.load(html); const $ = cheerio.load(html);
const list = $('font.newslist_style') const list = $('font.newslist_style')
.map((_, item) => { .map((_, item) => {
item = $(item); item = $(item);
const a = item.find('a[title]'); const a = item.find('a[title]');
return { return {
title: a.text(), title: a.attr('title'),
link: new URL(a.attr('href'), 'http://www.pbc.gov.cn'), link: new URL(a.attr('href'), 'http://www.pbc.gov.cn').href,
}; };
}) })
.get(); .get();
@@ -29,12 +31,14 @@ module.exports = async (ctx) => {
list.map((item) => list.map((item) =>
ctx.cache.tryGet(item.link, async () => { ctx.cache.tryGet(item.link, async () => {
const detailPage = await browser.newPage(); const detailPage = await browser.newPage();
await detailPage.goto(item.link); await detailPage.setRequestInterception(true);
const detailHtml = await detailPage.evaluate( detailPage.on('request', (request) => {
() => request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
// eslint-disable-next-line });
document.querySelector('body').innerHTML await detailPage.goto(item.link, {
); waitUntil: 'domcontentloaded',
});
const detailHtml = await detailPage.evaluate(() => document.documentElement.innerHTML);
const content = cheerio.load(detailHtml); const content = cheerio.load(detailHtml);
item.description = content('#zoom').html(); item.description = content('#zoom').html();
item.pubDate = timezone(parseDate(content('#shijian').text()), +8); item.pubDate = timezone(parseDate(content('#shijian').text()), +8);

View File

@@ -11,8 +11,13 @@ module.exports = async (ctx) => {
async () => { async () => {
const browser = await require('@/utils/puppeteer')({ stealth: true }); const browser = await require('@/utils/puppeteer')({ stealth: true });
const page = await browser.newPage(); const page = await browser.newPage();
await page.setRequestInterception(true);
await page.goto(targetUrl); page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(targetUrl, {
waitUntil: 'domcontentloaded',
});
await page.waitForSelector('div.container'); await page.waitForSelector('div.container');
const html = await page.evaluate(() => document.documentElement.innerHTML); const html = await page.evaluate(() => document.documentElement.innerHTML);

View File

@@ -6,13 +6,13 @@ module.exports = {
title: '分类', title: '分类',
docs: 'https://docs.rsshub.app/finance.html#mei-jing-wang', docs: 'https://docs.rsshub.app/finance.html#mei-jing-wang',
source: '/', source: '/',
targer: '/nbd', target: '/nbd',
}, },
{ {
title: '重磅原创', title: '重磅原创',
docs: 'https://docs.rsshub.app/finance.html#mei-jing-wang', docs: 'https://docs.rsshub.app/finance.html#mei-jing-wang',
source: '/', source: '/',
targer: '/nbd/daily', target: '/nbd/daily',
}, },
], ],
}, },

View File

@@ -0,0 +1,3 @@
module.exports = {
'/note': ['vuhe'],
};

27
lib/v2/ncwu/notice.js Normal file
View File

@@ -0,0 +1,27 @@
const got = require('@/utils/got');
const { parseDate } = require('@/utils/parse-date');
const cheerio = require('cheerio');
const baseUrl = 'https://www.ncwu.edu.cn/xxtz.htm';
module.exports = async (ctx) => {
const response = await got(baseUrl);
const $ = cheerio.load(response.data);
const list = $('div.news-item')
.toArray()
.map((item) => {
item = $(item);
return {
title: `` + item.find('a.dw').text() + `` + item.find('a.dw').next().text(),
description: item.find('div.detail').text(),
pubDate: parseDate(item.find('div.month').text() + '-' + item.find('div.day').text(), 'YYYY-MM-DD'),
link: item.find('a.dw').next().attr('href'),
};
});
ctx.state.data = {
title: $('title').text(),
link: baseUrl,
item: list,
};
};

13
lib/v2/ncwu/radar.js Normal file
View File

@@ -0,0 +1,13 @@
module.exports = {
'ncwu.edu.cn': {
_name: '华北水利水电大学',
'.': [
{
title: '学校通知',
docs: 'https://docs.rsshub.app/university.html#hua-bei-shui-li-shui-dian-da-xue',
source: '/xxtz.htm',
target: '/ncwu/notice',
},
],
},
};

3
lib/v2/ncwu/router.js Normal file
View File

@@ -0,0 +1,3 @@
module.exports = (router) => {
router.get('/notice', require('./notice'));
};

View File

@@ -1,38 +1,48 @@
const cheerio = require('cheerio'); const got = require('@/utils/got');
const { parseDate } = require('@/utils/parse-date'); const { parseDate } = require('@/utils/parse-date');
const timezone = require('@/utils/timezone'); const config = require('@/config').value;
module.exports = async (ctx) => { module.exports = async (ctx) => {
const type = ctx.params.type; const type = ctx.params.type;
const title_dict = { const title_dict = {
xxfb: '信息发布', xxfb: { title: '信息发布', channelid: '9531,9532,9533,9534,9535,9419' },
jylgw: '教研类岗位', jylgw: { title: '教研类岗位', channelid: '9420,9421,9422,9423' },
gllgw: '管理岗位及其他', gllgw: { title: '管理岗位及其他', channelid: '9424,9425,9426' },
}; };
const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage();
const link = `https://rczp.nju.edu.cn/sylm/${type}/index.html`; const link = `https://rczp.nju.edu.cn/sylm/${type}/index.html`;
await page.goto(link);
const html = await page.evaluate(() => document.querySelector('div.u-list').innerHTML); const data = await ctx.cache.tryGet(
browser.close(); `nju:rczp:${type}`,
async () => {
const { data } = await got.post('https://rczp.nju.edu.cn/njdx/openapi/t/info/list.do', {
headers: {
referer: link,
'x-requested-with': 'XMLHttpRequest',
},
form: {
channelid: Buffer.from(title_dict[type].channelid).toString('base64'),
pagesize: Buffer.from('15').toString('base64'),
pageno: Buffer.from('1').toString('base64'),
hasPage: Buffer.from('true').toString('base64'),
},
});
return data;
},
config.cache.routeExpire,
false
);
const $ = cheerio.load(html); const items = data.infolist.map((item) => ({
const list = $('div.item'); title: item.title,
description: item.summary,
link: item.url,
pubDate: parseDate(item.releasetime, 'x'),
author: item.username,
}));
ctx.state.data = { ctx.state.data = {
title: `人才招聘-${title_dict[type]}`, title: `人才招聘-${title_dict[type].title}`,
link, link,
item: list item: items,
.map((index, item) => {
item = $(item);
return {
title: item.find('a').attr('title'),
link: item.find('a').attr('href'),
pubDate: timezone(parseDate(item.find('span').first().text(), 'YYYY-MM-DD'), +8),
};
})
.get(),
}; };
}; };

View File

@@ -10,6 +10,10 @@ async function getContent(url, pptr = false) {
// get: () => undefined, // get: () => undefined,
// }); // });
// }); // });
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(url, { await page.goto(url, {
waitUntil: 'networkidle0', waitUntil: 'networkidle0',
}); });

View File

@@ -5,12 +5,11 @@ const host = 'http://aao.nuaa.edu.cn/';
* @desc 返回一个可用的cookie使用 `got` 发起请求的时候,传入到`options.headers.cookie`即可 * @desc 返回一个可用的cookie使用 `got` 发起请求的时候,传入到`options.headers.cookie`即可
*/ */
module.exports = async function getCookie() { module.exports = async function getCookie() {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')({ stealth: true });
const page = await browser.newPage(); const page = await browser.newPage();
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'); await page.setRequestInterception(true);
await page.evaluateOnNewDocument(() => { page.on('request', (request) => {
// eslint-disable-next-line request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
Object.defineProperty(navigator, 'webdriver', { get: () => false });
}); });
await page.goto(host, { await page.goto(host, {

View File

@@ -38,7 +38,7 @@ module.exports = async (ctx) => {
try { try {
response = await ctx.cache.tryGet(`nyt: ${link}`, async () => { response = await ctx.cache.tryGet(`nyt: ${link}`, async () => {
const response = await got.get(link); const response = await got(link);
return response.data; return response.data;
}); });
@@ -46,14 +46,14 @@ module.exports = async (ctx) => {
dual = true; dual = true;
} catch (error) { } catch (error) {
response = await ctx.cache.tryGet(`nyt: ${item.link}`, async () => { response = await ctx.cache.tryGet(`nyt: ${item.link}`, async () => {
const response = await got.get(item.link); const response = await got(item.link);
return response.data; return response.data;
}); });
} }
} else { } else {
response = await ctx.cache.tryGet(`nyt: ${item.link}`, async () => { response = await ctx.cache.tryGet(`nyt: ${item.link}`, async () => {
const response = await got.get(item.link); const response = await got(item.link);
return response.data; return response.data;
}); });

View File

@@ -16,7 +16,13 @@ const ProcessImage = ($, e) => {
const PuppeterGetter = async (ctx, browser, link) => { const PuppeterGetter = async (ctx, browser, link) => {
const result = await ctx.cache.tryGet(`nyt: ${link}`, async () => { const result = await ctx.cache.tryGet(`nyt: ${link}`, async () => {
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(link); await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(link, {
waitUntil: 'domcontentloaded',
});
const response = await page.evaluate(() => document.querySelector('body').innerHTML); const response = await page.evaluate(() => document.querySelector('body').innerHTML);
return response; return response;
}); });

View File

@@ -4,7 +4,13 @@ const puppeteerGet = (url, cache) =>
cache.tryGet(url, async () => { cache.tryGet(url, async () => {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(url); await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' ? request.continue() : request.abort();
});
await page.goto(url, {
waitUntil: 'domcontentloaded',
});
const html = await page.evaluate(() => document.documentElement.innerHTML); const html = await page.evaluate(() => document.documentElement.innerHTML);
browser.close(); browser.close();
return html; return html;

View File

@@ -0,0 +1,3 @@
module.exports = {
'/series/:mid/:sort?': ['minimalistrojan'],
};

View File

@@ -0,0 +1,18 @@
module.exports = {
'prestige-av.com': {
_name: 'Prestige 蚊香社',
'.': [
{
title: '系列作品',
docs: 'https://docs.rsshub.app/multimedia.html#prestige-wen-xiang-she',
source: ['/goods/goods_list.php'],
target: (_params, url) => {
const link = new URL(url);
if (link.searchParams.get('mode') === 'series') {
return link.searchParams.has('sort') ? `/prestige-av/series/${link.searchParams.get('mid')}/${link.searchParams.get('sort')}` : `/prestige-av/series/${link.searchParams.get('mid')}`;
}
},
},
],
},
};

View File

@@ -0,0 +1,3 @@
module.exports = (router) => {
router.get('/series/:mid/:sort?', require('./series'));
};

View File

@@ -7,19 +7,24 @@ module.exports = async (ctx) => {
const sort = ctx.params.sort ?? 'near'; const sort = ctx.params.sort ?? 'near';
const page = await browser.newPage(); const page = await browser.newPage();
const link = `https://www.prestige-av.com/goods/goods_list.php?mode=series&mid=${id}&sort=${sort}`; const link = `https://www.prestige-av.com/goods/goods_list.php?mode=series&mid=${id}&count=100&sort=${sort}`;
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(link); await page.goto(link);
await page.waitForSelector('.buttons'); await page.waitForSelector('.buttons');
await page.click('#AC'); await page.click('#AC');
await page.waitForSelector('#body_goods'); await page.waitForSelector('#body_goods');
const bodyHandle = await page.$('body'); const html = await page.evaluate(() => document.documentElement.innerHTML);
const html = await page.evaluate((body) => body.innerHTML, bodyHandle); browser.close();
const $ = cheerio.load(html); const $ = cheerio.load(html);
const list = $('div#body_goods li'); const list = $('div#body_goods li');
ctx.state.data = { ctx.state.data = {
title: `【Prestige】${$('div[class=search_title_layout_01]').children('h1').first().text().replace('シリーズ ▶ ', '').replace(/\s*/g, '')}`, title: `【Prestige】${$('div[class=search_title_layout_01]').children('h1').first().text().replace('シリーズ ▶ ', '').replace(/\s*/g, '')}`,
description: $('meta[name=Description]').attr('content'),
link: `https://www.prestige-av.com/goods/goods_list.php?mode=series&mid=${id}&sort=${sort}`, link: `https://www.prestige-av.com/goods/goods_list.php?mode=series&mid=${id}&sort=${sort}`,
item: item:
list && list &&
@@ -34,6 +39,4 @@ module.exports = async (ctx) => {
}) })
.get(), .get(),
}; };
browser.close();
}; };

View File

@@ -11,8 +11,14 @@ module.exports = async (ctx) => {
async () => { async () => {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage(); const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? request.continue() : request.abort();
});
await page.goto(link); await page.goto(link, {
waitUntil: 'domcontentloaded',
});
await page.waitForSelector('#pickup03 .grid-cell'); await page.waitForSelector('#pickup03 .grid-cell');
await page.waitForSelector('#pickup04 .grid-cell'); await page.waitForSelector('#pickup04 .grid-cell');
await page.waitForSelector('#main-block .grid-cell'); await page.waitForSelector('#main-block .grid-cell');

View File

@@ -8,7 +8,14 @@ module.exports = async (ctx) => {
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage(); const page = await browser.newPage();
await page.goto(link); await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'fetch' ? request.continue() : request.abort();
});
await page.goto(link, {
waitUntil: 'domcontentloaded',
});
await page.waitForSelector('#pickup03 .grid-cell'); await page.waitForSelector('#pickup03 .grid-cell');
await page.waitForSelector('#pickup04 .grid-cell'); await page.waitForSelector('#pickup04 .grid-cell');

View File

@@ -7,6 +7,10 @@ const getContent = (url, cache) =>
const browser = await require('@/utils/puppeteer')(); const browser = await require('@/utils/puppeteer')();
try { try {
const page = await browser.newPage(); const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' || request.resourceType() === 'xhr' ? request.continue() : request.abort();
});
await page.goto(url); await page.goto(url);
await page.waitForSelector('.pc-container'); await page.waitForSelector('.pc-container');
let content = await page.content(); let content = await page.content();