feat: add cookie support for discuz (#4033)

This commit is contained in:
junfengP
2020-02-19 23:04:16 +08:00
committed by GitHub
parent 7a68497814
commit da288c5f98
7 changed files with 90 additions and 10 deletions

View File

@@ -30,6 +30,16 @@ pageClass: routes
</Route> </Route>
### 通用子版块-支持 Cookie
<Route author="junfengP" example="/discuz/x/00/https%3a%2f%2fbbs.zdfx.net%2fforum-2-1.html" path="/discuz/:ver/:cid/:link" :paramsDesc="['discuz版本类型见下表', 'Cookie id需自建并配置环境变量详情见部署页面的配置模块','子版块链接, 需要手动Url编码']" >
| Discuz X 系列 | Discuz 7.x 系列 |
| ------------- | --------------- |
| x | 7 |
</Route>
## MCBBS ## MCBBS
### 版块 ### 版块

View File

@@ -19,3 +19,13 @@ pageClass: routes
| x | 7 | | x | 7 |
</Route> </Route>
### General Subforum - Support cookie
<Route author="junfengP" example="/discuz/x/00/https%3a%2f%2fbbs.zdfx.net%2fforum-2-1.html" path="/discuz/:ver/:cid/:link" :paramsDesc="['discuz versionsee below table', 'Cookie idrequire self hosted and set environment parameters, see Deploy - Configuration pages for detail','link of subforum, require url encoded']" >
| Discuz X Series | Discuz 7.x Series |
| --------------- | ----------------- |
| x | 7 |
</Route>

View File

@@ -372,3 +372,7 @@ Access control includes a whitelist and a blacklist, support IP and route, use `
- `NHENTAI_USERNAME`: nhentai username or email - `NHENTAI_USERNAME`: nhentai username or email
- `NHENTAI_PASSWORD`: nhentai password - `NHENTAI_PASSWORD`: nhentai password
- discuz cookies
- `DISCUZ_COOKIE_{cid}`: Cookie of a forum powered by discuz, cid can be anything from 00 to 99. When visiting route discuz, using cid to specify this cookie.

View File

@@ -409,3 +409,7 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式
- `NHENTAI_USERNAME`: nhentai 用户名或邮箱 - `NHENTAI_USERNAME`: nhentai 用户名或邮箱
- `NHENTAI_PASSWORD`: nhentai 密码 - `NHENTAI_PASSWORD`: nhentai 密码
- discuz cookies 设定
- `DISCUZ_COOKIE_{cid}`: 某 Discuz 驱动的论坛,用户注册后的 Cookie 值 , cid 可自由设定,取值范围[00, 99], 使用 discuz 通用路由时, 通过指定 cid 来调用该 cookie

View File

@@ -6,6 +6,7 @@ const calculateValue = () => {
const bilibili_cookies = {}; const bilibili_cookies = {};
const twitter_tokens = {}; const twitter_tokens = {};
const email_config = {}; const email_config = {};
const discuz_cookies = {};
for (const name in envs) { for (const name in envs) {
if (name.startsWith('BILIBILI_COOKIE_')) { if (name.startsWith('BILIBILI_COOKIE_')) {
@@ -17,6 +18,9 @@ const calculateValue = () => {
} else if (name.startsWith('EMAIL_CONFIG_')) { } else if (name.startsWith('EMAIL_CONFIG_')) {
const id = name.slice(13); const id = name.slice(13);
email_config[id] = envs[name]; email_config[id] = envs[name];
} else if (name.startsWith('DISCUZ_COOKIE_')) {
const cid = name.slice(14);
discuz_cookies[cid] = envs[name];
} }
} }
@@ -117,6 +121,9 @@ const calculateValue = () => {
username: envs.NHENTAI_USERNAME, username: envs.NHENTAI_USERNAME,
password: envs.NHENTAI_PASSWORD, password: envs.NHENTAI_PASSWORD,
}, },
discuz: {
cookies: discuz_cookies,
},
}; };
}; };
calculateValue(); calculateValue();

View File

@@ -2166,6 +2166,7 @@ router.get('/gbcc/trust', require('./routes/gbcc/trust'));
router.get('/apnews/topics/:topic', require('./routes/apnews/topics')); router.get('/apnews/topics/:topic', require('./routes/apnews/topics'));
// discuz // discuz
router.get('/discuz/:ver([7|x])/:cid([0-9]{2})/:link(.*)', require('./routes/discuz/discuz'));
router.get('/discuz/:ver([7|x])/:link(.*)', require('./routes/discuz/discuz')); router.get('/discuz/:ver([7|x])/:link(.*)', require('./routes/discuz/discuz'));
router.get('/discuz/:link(.*)', require('./routes/discuz/discuz')); router.get('/discuz/:link(.*)', require('./routes/discuz/discuz'));

View File

@@ -2,9 +2,10 @@ const got = require('@/utils/got');
const cheerio = require('cheerio'); const cheerio = require('cheerio');
const iconv = require('iconv-lite'); const iconv = require('iconv-lite');
const dateUtil = require('@/utils/date'); const dateUtil = require('@/utils/date');
const config = require('@/config').value;
// discuz 7.x 与 discuz x系列 通用文章内容抓取 // discuz 7.x 与 discuz x系列 通用文章内容抓取
async function load(baseUrl, itemLink, ctx, charset) { async function load(baseUrl, itemLink, ctx, charset, header) {
// 处理相对链接 // 处理相对链接
if (itemLink) { if (itemLink) {
if (baseUrl && !baseUrl.match(/^https?:\/\//)) { if (baseUrl && !baseUrl.match(/^https?:\/\//)) {
@@ -25,9 +26,25 @@ async function load(baseUrl, itemLink, ctx, charset) {
// 处理编码问题 // 处理编码问题
let responseData; let responseData;
if (charset === 'utf-8') { if (charset === 'utf-8') {
responseData = (await got.get(itemLink)).data; responseData = (
await got({
method: 'get',
url: itemLink,
headers: header,
})
).data;
} else { } else {
responseData = iconv.decode((await got.get({ url: itemLink, responseType: 'buffer' })).data, charset); responseData = iconv.decode(
(
await got({
method: 'get',
url: itemLink,
responseType: 'buffer',
headers: header,
})
).data,
charset
);
} }
if (!responseData) { if (!responseData) {
const description = '获取详细内容失败'; const description = '获取详细内容失败';
@@ -45,9 +62,21 @@ async function load(baseUrl, itemLink, ctx, charset) {
module.exports = async (ctx) => { module.exports = async (ctx) => {
let link = ctx.params.link; let link = ctx.params.link;
const ver = ctx.params.ver ? ctx.params.ver.toUpperCase() : undefined; const ver = ctx.params.ver ? ctx.params.ver.toUpperCase() : undefined;
const cid = ctx.params.cid;
link = link.replace(/:\/\//, ':/').replace(/:\//, '://'); link = link.replace(/:\/\//, ':/').replace(/:\//, '://');
const cookie = cid === undefined ? '' : config.discuz.cookies[cid];
const response = await got.get(link); if (cookie === undefined) {
throw Error('缺少对应论坛的cookie.');
}
const header = {
Cookie: cookie,
Referer: link,
};
const response = await got({
method: 'get',
url: link,
headers: header,
});
const contentType = response.headers['content-type'] || ''; const contentType = response.headers['content-type'] || '';
// 若没有指定编码则默认utf-8 // 若没有指定编码则默认utf-8
let charset = 'utf-8'; let charset = 'utf-8';
@@ -59,7 +88,22 @@ module.exports = async (ctx) => {
.toLowerCase(); .toLowerCase();
} }
} }
const responseData = charset === 'utf-8' ? response.data : iconv.decode((await got.get({ url: link, responseType: 'buffer' })).data, charset); const responseData =
charset === 'utf-8'
? response.data
: iconv.decode(
(
await got({
method: 'get',
url: link,
responseType: 'buffer',
headers: {
Cookie: cookie,
},
})
).data,
charset
);
const $ = cheerio.load(responseData); const $ = cheerio.load(responseData);
const title = $('head > title').text(); const title = $('head > title').text();
const version = ver ? 'DISCUZ! ' + ver : $('head > meta[name=generator]').attr('content'); const version = ver ? 'DISCUZ! ' + ver : $('head > meta[name=generator]').attr('content');
@@ -67,7 +111,7 @@ module.exports = async (ctx) => {
if (version.toUpperCase().startsWith('DISCUZ! 7')) { if (version.toUpperCase().startsWith('DISCUZ! 7')) {
// discuz 7.x 系列 // discuz 7.x 系列
// 支持全文抓取限制抓取页面5个 // 支持全文抓取限制抓取页面5个
const list = $('tbody[id^="normalthread"] tr') const list = $('tbody[id^="normalthread"] > tr')
.slice(0, 5) .slice(0, 5)
.get(); .get();
process = await Promise.all( process = await Promise.all(
@@ -79,14 +123,14 @@ module.exports = async (ctx) => {
link: itemLink, link: itemLink,
pubDate: dateUtil(item.find('td.author em').text()), pubDate: dateUtil(item.find('td.author em').text()),
}; };
const detail = await load(link, itemLink, ctx, charset); const detail = await load(link, itemLink, ctx, charset, header);
return Promise.resolve(Object.assign({}, single, detail)); return Promise.resolve(Object.assign({}, single, detail));
}) })
); );
} else if (version.toUpperCase().startsWith('DISCUZ! X')) { } else if (version.toUpperCase().startsWith('DISCUZ! X')) {
// discuz X 系列 // discuz X 系列
// 支持全文抓取限制抓取页面5个 // 支持全文抓取限制抓取页面5个
const list = $('tbody[id^="normalthread"] tr') const list = $('tbody[id^="normalthread"] > tr')
.slice(0, 5) .slice(0, 5)
.get(); .get();
process = await Promise.all( process = await Promise.all(
@@ -103,7 +147,7 @@ module.exports = async (ctx) => {
.text() .text()
), ),
}; };
const detail = await load(link, itemLink, ctx, charset); const detail = await load(link, itemLink, ctx, charset, header);
return Promise.resolve(Object.assign({}, single, detail)); return Promise.resolve(Object.assign({}, single, detail));
}) })
); );