feat: add cookie support for discuz (#4033)

This commit is contained in:
junfengP
2020-02-19 23:04:16 +08:00
committed by GitHub
parent 7a68497814
commit da288c5f98
7 changed files with 90 additions and 10 deletions

View File

@@ -30,6 +30,16 @@ pageClass: routes
</Route>
### 通用子版块-支持 Cookie
<Route author="junfengP" example="/discuz/x/00/https%3a%2f%2fbbs.zdfx.net%2fforum-2-1.html" path="/discuz/:ver/:cid/:link" :paramsDesc="['discuz版本类型见下表', 'Cookie id需自建并配置环境变量详情见部署页面的配置模块','子版块链接, 需要手动Url编码']" >
| Discuz X 系列 | Discuz 7.x 系列 |
| ------------- | --------------- |
| x | 7 |
</Route>
## MCBBS
### 版块

View File

@@ -19,3 +19,13 @@ pageClass: routes
| x | 7 |
</Route>
### General Subforum - Support cookie
<Route author="junfengP" example="/discuz/x/00/https%3a%2f%2fbbs.zdfx.net%2fforum-2-1.html" path="/discuz/:ver/:cid/:link" :paramsDesc="['discuz versionsee below table', 'Cookie idrequire self hosted and set environment parameters, see Deploy - Configuration pages for detail','link of subforum, require url encoded']" >
| Discuz X Series | Discuz 7.x Series |
| --------------- | ----------------- |
| x | 7 |
</Route>

View File

@@ -372,3 +372,7 @@ Access control includes a whitelist and a blacklist, support IP and route, use `
- `NHENTAI_USERNAME`: nhentai username or email
- `NHENTAI_PASSWORD`: nhentai password
- discuz cookies
- `DISCUZ_COOKIE_{cid}`: Cookie of a forum powered by discuz, cid can be anything from 00 to 99. When visiting route discuz, using cid to specify this cookie.

View File

@@ -409,3 +409,7 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式
- `NHENTAI_USERNAME`: nhentai 用户名或邮箱
- `NHENTAI_PASSWORD`: nhentai 密码
- discuz cookies 设定
- `DISCUZ_COOKIE_{cid}`: 某 Discuz 驱动的论坛,用户注册后的 Cookie 值 , cid 可自由设定,取值范围[00, 99], 使用 discuz 通用路由时, 通过指定 cid 来调用该 cookie

View File

@@ -6,6 +6,7 @@ const calculateValue = () => {
const bilibili_cookies = {};
const twitter_tokens = {};
const email_config = {};
const discuz_cookies = {};
for (const name in envs) {
if (name.startsWith('BILIBILI_COOKIE_')) {
@@ -17,6 +18,9 @@ const calculateValue = () => {
} else if (name.startsWith('EMAIL_CONFIG_')) {
const id = name.slice(13);
email_config[id] = envs[name];
} else if (name.startsWith('DISCUZ_COOKIE_')) {
const cid = name.slice(14);
discuz_cookies[cid] = envs[name];
}
}
@@ -117,6 +121,9 @@ const calculateValue = () => {
username: envs.NHENTAI_USERNAME,
password: envs.NHENTAI_PASSWORD,
},
discuz: {
cookies: discuz_cookies,
},
};
};
calculateValue();

View File

@@ -2166,6 +2166,7 @@ router.get('/gbcc/trust', require('./routes/gbcc/trust'));
router.get('/apnews/topics/:topic', require('./routes/apnews/topics'));
// discuz
router.get('/discuz/:ver([7|x])/:cid([0-9]{2})/:link(.*)', require('./routes/discuz/discuz'));
router.get('/discuz/:ver([7|x])/:link(.*)', require('./routes/discuz/discuz'));
router.get('/discuz/:link(.*)', require('./routes/discuz/discuz'));

View File

@@ -2,9 +2,10 @@ const got = require('@/utils/got');
const cheerio = require('cheerio');
const iconv = require('iconv-lite');
const dateUtil = require('@/utils/date');
const config = require('@/config').value;
// discuz 7.x 与 discuz x系列 通用文章内容抓取
async function load(baseUrl, itemLink, ctx, charset) {
async function load(baseUrl, itemLink, ctx, charset, header) {
// 处理相对链接
if (itemLink) {
if (baseUrl && !baseUrl.match(/^https?:\/\//)) {
@@ -25,9 +26,25 @@ async function load(baseUrl, itemLink, ctx, charset) {
// 处理编码问题
let responseData;
if (charset === 'utf-8') {
responseData = (await got.get(itemLink)).data;
responseData = (
await got({
method: 'get',
url: itemLink,
headers: header,
})
).data;
} else {
responseData = iconv.decode((await got.get({ url: itemLink, responseType: 'buffer' })).data, charset);
responseData = iconv.decode(
(
await got({
method: 'get',
url: itemLink,
responseType: 'buffer',
headers: header,
})
).data,
charset
);
}
if (!responseData) {
const description = '获取详细内容失败';
@@ -45,9 +62,21 @@ async function load(baseUrl, itemLink, ctx, charset) {
module.exports = async (ctx) => {
let link = ctx.params.link;
const ver = ctx.params.ver ? ctx.params.ver.toUpperCase() : undefined;
const cid = ctx.params.cid;
link = link.replace(/:\/\//, ':/').replace(/:\//, '://');
const response = await got.get(link);
const cookie = cid === undefined ? '' : config.discuz.cookies[cid];
if (cookie === undefined) {
throw Error('缺少对应论坛的cookie.');
}
const header = {
Cookie: cookie,
Referer: link,
};
const response = await got({
method: 'get',
url: link,
headers: header,
});
const contentType = response.headers['content-type'] || '';
// 若没有指定编码则默认utf-8
let charset = 'utf-8';
@@ -59,7 +88,22 @@ module.exports = async (ctx) => {
.toLowerCase();
}
}
const responseData = charset === 'utf-8' ? response.data : iconv.decode((await got.get({ url: link, responseType: 'buffer' })).data, charset);
const responseData =
charset === 'utf-8'
? response.data
: iconv.decode(
(
await got({
method: 'get',
url: link,
responseType: 'buffer',
headers: {
Cookie: cookie,
},
})
).data,
charset
);
const $ = cheerio.load(responseData);
const title = $('head > title').text();
const version = ver ? 'DISCUZ! ' + ver : $('head > meta[name=generator]').attr('content');
@@ -67,7 +111,7 @@ module.exports = async (ctx) => {
if (version.toUpperCase().startsWith('DISCUZ! 7')) {
// discuz 7.x 系列
// 支持全文抓取限制抓取页面5个
const list = $('tbody[id^="normalthread"] tr')
const list = $('tbody[id^="normalthread"] > tr')
.slice(0, 5)
.get();
process = await Promise.all(
@@ -79,14 +123,14 @@ module.exports = async (ctx) => {
link: itemLink,
pubDate: dateUtil(item.find('td.author em').text()),
};
const detail = await load(link, itemLink, ctx, charset);
const detail = await load(link, itemLink, ctx, charset, header);
return Promise.resolve(Object.assign({}, single, detail));
})
);
} else if (version.toUpperCase().startsWith('DISCUZ! X')) {
// discuz X 系列
// 支持全文抓取限制抓取页面5个
const list = $('tbody[id^="normalthread"] tr')
const list = $('tbody[id^="normalthread"] > tr')
.slice(0, 5)
.get();
process = await Promise.all(
@@ -103,7 +147,7 @@ module.exports = async (ctx) => {
.text()
),
};
const detail = await load(link, itemLink, ctx, charset);
const detail = await load(link, itemLink, ctx, charset, header);
return Promise.resolve(Object.assign({}, single, detail));
})
);