diff --git a/docs/en/install/README.md b/docs/en/install/README.md index ae38b3be96..9d81903989 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -412,7 +412,9 @@ Configure RSSHub by setting environment variables `REQUEST_TIMEOUT`: milliseconds to wait for the server to end the response before aborting the request with error, default to `3000` -`UA`: user agent, default to `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36` +`UA`: user agent, using random user agent (Chrome on macOS) by default + +`NO_RANDOM_UA`: disable random user agent, default to `null` ### CORS Request diff --git a/docs/install/README.md b/docs/install/README.md index aa0ae1d534..0fee633f95 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -419,7 +419,9 @@ gcloud app deploy `REQUEST_TIMEOUT`: 请求超时毫秒数,默认 `3000` -`UA`: 用户代理,默认 `Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36` +`UA`: 用户代理,默认为随机用户代理用户代理(macOS 上的 Chrome) + +`NO_RANDOM_UA`: 是否禁用随机用户代理,默认 `null` ### 跨域请求 diff --git a/lib/config.js b/lib/config.js index 0faecc77f6..e9c1edf2ca 100644 --- a/lib/config.js +++ b/lib/config.js @@ -2,6 +2,8 @@ require('dotenv').config(); const randUserAgent = require('./utils/rand-user-agent'); let envs = process.env; let value; +const TRUE_UA = 'RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)'; + const calculateValue = () => { const bilibili_cookies = {}; const twitter_tokens = {}; @@ -40,7 +42,8 @@ const calculateValue = () => { listenInaddrAny: envs.LISTEN_INADDR_ANY || 1, // 是否允许公网连接,取值 0 1 requestRetry: parseInt(envs.REQUEST_RETRY) || 2, // 请求失败重试次数 requestTimeout: parseInt(envs.REQUEST_TIMEOUT) || 30000, // Milliseconds to wait for the server to end the response before aborting the request - ua: envs.UA || randUserAgent({ browser: 'chrome', os: 'mac os', device: 'desktop' }), + ua: envs.UA ? envs.UA : envs.NO_RANDOM_UA === 'true' || envs.NO_RANDOM_UA === '1' ? TRUE_UA : randUserAgent({ browser: 'chrome', os: 'mac os', device: 'desktop' }), + trueUA: TRUE_UA, // cors request allowOrigin: envs.ALLOW_ORIGIN, // cache diff --git a/lib/middleware/parameter.js b/lib/middleware/parameter.js index 2ac8cc6405..8f0a4c2791 100644 --- a/lib/middleware/parameter.js +++ b/lib/middleware/parameter.js @@ -1,7 +1,7 @@ const entities = require('entities'); const cheerio = require('cheerio'); const { simplecc } = require('simplecc-wasm'); -const got = require('@/utils/got'); +// const got = require('@/utils/got'); const config = require('@/config').value; const RE2 = require('re2'); @@ -248,18 +248,21 @@ module.exports = async (ctx, next) => { // fulltext if (ctx.query.mode && ctx.query.mode.toLowerCase() === 'fulltext') { const tasks = ctx.state.data.item.map(async (item) => { - const { link, author, description } = item; + const { title, link, author, description } = item; const parsed_result = await ctx.cache.tryGet(`mercury-cache-${link}`, async () => { // if parser failed, return default description and not report error try { mercury_parser = mercury_parser || require('@postlight/mercury-parser'); - const res = await got(link); - const $ = cheerio.load(res.data, { - xmlMode: true, - }); + // const res = await got(link); + // const $ = cheerio.load(res.data, { + // xmlMode: true, + // }); const result = await mercury_parser.parse(link, { - html: $.html(), + // html: $.html(), + headers: { + 'User-Agent': config.ua, + }, }); return result; } catch (e) { @@ -267,8 +270,9 @@ module.exports = async (ctx, next) => { } }); + item.title = parsed_result?.title || title; item.author = author || (parsed_result ? parsed_result.author : ''); - item.description = parsed_result ? parsed_result.content : description; + item.description = parsed_result ? entities.decodeXML(parsed_result.content) : description; }); await Promise.all(tasks); } diff --git a/test/config.js b/test/config.js index 67531d3d1f..7ea6773e39 100644 --- a/test/config.js +++ b/test/config.js @@ -44,4 +44,32 @@ describe('config', () => { delete process.env['EMAIL_CONFIG_xx.qq.com']; delete process.env['EMAIL_CONFIG_oo.qq.com']; }); + + it('discuz cookie', () => { + process.env.DISCUZ_COOKIE_12 = 'cookie1'; + process.env.DISCUZ_COOKIE_34 = 'cookie2'; + + const config = require('../lib/config').value; + expect(config.discuz.cookies).toMatchObject({ + 12: 'cookie1', + 34: 'cookie2', + }); + + delete process.env.DISCUZ_COOKIE_12; + delete process.env.DISCUZ_COOKIE_34; + }); + + it('no random ua', () => { + process.env.NO_RANDOM_UA = true; + + const config = require('../lib/config').value; + expect(config.ua).toBe('RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)'); + + delete process.env.NO_RANDOM_UA; + }); + + it('random ua', () => { + const config = require('../lib/config').value; + expect(config.ua).not.toBe('RSSHub/1.0 (+http://github.com/DIYgod/RSSHub; like FeedFetcher-Google)'); + }); }); diff --git a/test/middleware/parameter.js b/test/middleware/parameter.js index 8629b3f553..5b1182eb93 100644 --- a/test/middleware/parameter.js +++ b/test/middleware/parameter.js @@ -313,13 +313,13 @@ describe('wrong_path', () => { }); describe('fulltext_mode', () => { - it(`fulltext`, async () => { + it.skip(`fulltext`, async () => { const response = await request.get('/test/1?mode=fulltext'); expect(response.status).toBe(200); const parsed = await parser.parseString(response.text); expect(parsed.items[0].content).not.toBe(undefined); - }); -}, 10000); + }, 10000); +}); describe('complicated_description', () => { it(`complicated_description`, async () => {