mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-08 05:59:00 +08:00
feat(core): true ua (#10761)
* feat(core): true ua * test: skip mercury parser temporary
This commit is contained in:
@@ -412,7 +412,9 @@ Configure RSSHub by setting environment variables
|
||||
|
||||
`REQUEST_TIMEOUT`: milliseconds to wait for the server to end the response before aborting the request with error, default to `3000`
|
||||
|
||||
`UA`: user agent, default to `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36`
|
||||
`UA`: user agent, using random user agent (Chrome on macOS) by default
|
||||
|
||||
`NO_RANDOM_UA`: disable random user agent, default to `null`
|
||||
|
||||
### CORS Request
|
||||
|
||||
|
||||
@@ -419,7 +419,9 @@ gcloud app deploy
|
||||
|
||||
`REQUEST_TIMEOUT`: 请求超时毫秒数,默认 `3000`
|
||||
|
||||
`UA`: 用户代理,默认 `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36`
|
||||
`UA`: 用户代理,默认为随机用户代理用户代理(macOS 上的 Chrome)
|
||||
|
||||
`NO_RANDOM_UA`: 是否禁用随机用户代理,默认 `null`
|
||||
|
||||
### 跨域请求
|
||||
|
||||
|
||||
@@ -2,6 +2,8 @@ require('dotenv').config();
|
||||
const randUserAgent = require('./utils/rand-user-agent');
|
||||
let envs = process.env;
|
||||
let value;
|
||||
const TRUE_UA = 'RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)';
|
||||
|
||||
const calculateValue = () => {
|
||||
const bilibili_cookies = {};
|
||||
const twitter_tokens = {};
|
||||
@@ -40,7 +42,8 @@ const calculateValue = () => {
|
||||
listenInaddrAny: envs.LISTEN_INADDR_ANY || 1, // 是否允许公网连接,取值 0 1
|
||||
requestRetry: parseInt(envs.REQUEST_RETRY) || 2, // 请求失败重试次数
|
||||
requestTimeout: parseInt(envs.REQUEST_TIMEOUT) || 30000, // Milliseconds to wait for the server to end the response before aborting the request
|
||||
ua: envs.UA || randUserAgent({ browser: 'chrome', os: 'mac os', device: 'desktop' }),
|
||||
ua: envs.UA ? envs.UA : envs.NO_RANDOM_UA === 'true' || envs.NO_RANDOM_UA === '1' ? TRUE_UA : randUserAgent({ browser: 'chrome', os: 'mac os', device: 'desktop' }),
|
||||
trueUA: TRUE_UA,
|
||||
// cors request
|
||||
allowOrigin: envs.ALLOW_ORIGIN,
|
||||
// cache
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
const entities = require('entities');
|
||||
const cheerio = require('cheerio');
|
||||
const { simplecc } = require('simplecc-wasm');
|
||||
const got = require('@/utils/got');
|
||||
// const got = require('@/utils/got');
|
||||
const config = require('@/config').value;
|
||||
const RE2 = require('re2');
|
||||
|
||||
@@ -248,18 +248,21 @@ module.exports = async (ctx, next) => {
|
||||
// fulltext
|
||||
if (ctx.query.mode && ctx.query.mode.toLowerCase() === 'fulltext') {
|
||||
const tasks = ctx.state.data.item.map(async (item) => {
|
||||
const { link, author, description } = item;
|
||||
const { title, link, author, description } = item;
|
||||
const parsed_result = await ctx.cache.tryGet(`mercury-cache-${link}`, async () => {
|
||||
// if parser failed, return default description and not report error
|
||||
try {
|
||||
mercury_parser = mercury_parser || require('@postlight/mercury-parser');
|
||||
|
||||
const res = await got(link);
|
||||
const $ = cheerio.load(res.data, {
|
||||
xmlMode: true,
|
||||
});
|
||||
// const res = await got(link);
|
||||
// const $ = cheerio.load(res.data, {
|
||||
// xmlMode: true,
|
||||
// });
|
||||
const result = await mercury_parser.parse(link, {
|
||||
html: $.html(),
|
||||
// html: $.html(),
|
||||
headers: {
|
||||
'User-Agent': config.ua,
|
||||
},
|
||||
});
|
||||
return result;
|
||||
} catch (e) {
|
||||
@@ -267,8 +270,9 @@ module.exports = async (ctx, next) => {
|
||||
}
|
||||
});
|
||||
|
||||
item.title = parsed_result?.title || title;
|
||||
item.author = author || (parsed_result ? parsed_result.author : '');
|
||||
item.description = parsed_result ? parsed_result.content : description;
|
||||
item.description = parsed_result ? entities.decodeXML(parsed_result.content) : description;
|
||||
});
|
||||
await Promise.all(tasks);
|
||||
}
|
||||
|
||||
@@ -44,4 +44,32 @@ describe('config', () => {
|
||||
delete process.env['EMAIL_CONFIG_xx.qq.com'];
|
||||
delete process.env['EMAIL_CONFIG_oo.qq.com'];
|
||||
});
|
||||
|
||||
it('discuz cookie', () => {
|
||||
process.env.DISCUZ_COOKIE_12 = 'cookie1';
|
||||
process.env.DISCUZ_COOKIE_34 = 'cookie2';
|
||||
|
||||
const config = require('../lib/config').value;
|
||||
expect(config.discuz.cookies).toMatchObject({
|
||||
12: 'cookie1',
|
||||
34: 'cookie2',
|
||||
});
|
||||
|
||||
delete process.env.DISCUZ_COOKIE_12;
|
||||
delete process.env.DISCUZ_COOKIE_34;
|
||||
});
|
||||
|
||||
it('no random ua', () => {
|
||||
process.env.NO_RANDOM_UA = true;
|
||||
|
||||
const config = require('../lib/config').value;
|
||||
expect(config.ua).toBe('RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)');
|
||||
|
||||
delete process.env.NO_RANDOM_UA;
|
||||
});
|
||||
|
||||
it('random ua', () => {
|
||||
const config = require('../lib/config').value;
|
||||
expect(config.ua).not.toBe('RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -313,13 +313,13 @@ describe('wrong_path', () => {
|
||||
});
|
||||
|
||||
describe('fulltext_mode', () => {
|
||||
it(`fulltext`, async () => {
|
||||
it.skip(`fulltext`, async () => {
|
||||
const response = await request.get('/test/1?mode=fulltext');
|
||||
expect(response.status).toBe(200);
|
||||
const parsed = await parser.parseString(response.text);
|
||||
expect(parsed.items[0].content).not.toBe(undefined);
|
||||
});
|
||||
}, 10000);
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe('complicated_description', () => {
|
||||
it(`complicated_description`, async () => {
|
||||
|
||||
Reference in New Issue
Block a user