From cde5d9af45c6e7383f48ab3ced3de60b2e310db1 Mon Sep 17 00:00:00 2001 From: NeverBehave Date: Mon, 27 Apr 2020 22:27:47 -0400 Subject: [PATCH] feat: anti-hotlink for images (#4481) --- docs/en/install/README.md | 2 + docs/install/README.md | 2 + lib/app.js | 4 ++ lib/config.js | 3 ++ lib/middleware/anti-hotlink.js | 54 +++++++++++++++++++++ test/middleware/anti-hotlink.js | 84 +++++++++++++++++++++++++++++++++ 6 files changed, 149 insertions(+) create mode 100644 lib/middleware/anti-hotlink.js create mode 100644 test/middleware/anti-hotlink.js diff --git a/docs/en/install/README.md b/docs/en/install/README.md index f61113f08b..433a249316 100644 --- a/docs/en/install/README.md +++ b/docs/en/install/README.md @@ -349,6 +349,8 @@ Access control includes a whitelist and a blacklist, support IP and route, use ` `DISALLOW_ROBOT`: prevent indexing by search engine +`HOTLINK_TEMPLATE`: Replace image link in description to avoid anti-hotlink protection, leave blank to disable this function. Usage reference [#2769](https://github.com/DIYgod/RSSHub/issues/2769). You may use any properity listed in [URL](https://developer.mozilla.org/en-US/docs/Web/API/URL#Properties), format of JS template literal. e.g. `${protocol}//${host}${pathname}`, `https://i3.wp.com/${host}${pathname}` + ### Route-specific Configurations - pixiv: [Registration](https://accounts.pixiv.net/signup) diff --git a/docs/install/README.md b/docs/install/README.md index 5393e32fa9..205d30f8c9 100644 --- a/docs/install/README.md +++ b/docs/install/README.md @@ -353,6 +353,8 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式 `DISALLOW_ROBOT`: 防止被搜索引擎收录 +`HOTLINK_TEMPLATE`: 用于处理描述中图片的链接,绕过防盗链等限制,留空不生效。用法参考[#2769](https://github.com/DIYgod/RSSHub/issues/2769)。可以使用[URL](https://developer.mozilla.org/en-US/docs/Web/API/URL#Properties)的所有属性,格式为 JS 变量模板。例子:`${protocol}//${host}${pathname}`, `https://i3.wp.com/${host}${pathname}` + ### 部分 RSS 模块配置 - pixiv 全部路由: [注册地址](https://accounts.pixiv.net/signup) diff --git a/lib/app.js b/lib/app.js index a0021206e1..904adf3001 100644 --- a/lib/app.js +++ b/lib/app.js @@ -15,6 +15,7 @@ const template = require('./middleware/template'); const favicon = require('koa-favicon'); const debug = require('./middleware/debug'); const accessControl = require('./middleware/access-control'); +const antiHotlink = require('./middleware/anti-hotlink'); const router = require('./router'); const protected_router = require('./protected_router'); @@ -63,6 +64,9 @@ app.use(apiResponseHandler()); // 4 generate body app.use(template); +// anti-hotlink +app.use(antiHotlink); + // 3 filter content app.use(parameter); diff --git a/lib/config.js b/lib/config.js index cf17473e7b..9573cc46f6 100644 --- a/lib/config.js +++ b/lib/config.js @@ -128,6 +128,9 @@ const calculateValue = () => { scihub: { host: envs.SCIHUB_HOST || 'https://sci-hub.tw/', }, + hotlink: { + template: envs.HOTLINK_TEMPLATE, + }, }; }; calculateValue(); diff --git a/lib/middleware/anti-hotlink.js b/lib/middleware/anti-hotlink.js new file mode 100644 index 0000000000..ffb53bea51 --- /dev/null +++ b/lib/middleware/anti-hotlink.js @@ -0,0 +1,54 @@ +const config = require('@/config').value; +const cheerio = require('cheerio'); +const logger = require('@/utils/logger'); + +const interpolate = (str, obj) => str.replace(/\${([^}]+)}/g, (_, prop) => obj[prop]); +// I don't want to keep another regex and +// URL will be the standard way to parse URL +const parseUrl = (str) => { + let url; + try { + url = new URL(str); + } catch (e) { + logger.error(`Failed to parse ${str}`); + } + + return url; +}; +const replaceUrls = (body, template) => { + const $ = cheerio.load(body, { decodeEntities: false, xmlMode: true }); + $('img').each(function () { + const old_src = $(this).attr('src'); + const url = parseUrl(old_src); + if (url) { + const new_src = interpolate(template, url); + $(this).attr('src', new_src); + } + }); + + return $.root().html(); +}; + +module.exports = async (ctx, next) => { + await next(); + + const template = config.hotlink.template; + // Assume that only description include image link + // and here we will only check them in description. + // Use Cherrio to load the description as html and filter all + // image link + if (template) { + if (ctx.state.data) { + if (ctx.state.data.description) { + ctx.state.data.description = replaceUrls(ctx.state.data.description, template); + } + + ctx.state.data.item && + ctx.state.data.item.forEach((item) => { + if (item.description) { + item.description = replaceUrls(item.description, template); + } + }); + } + } +}; diff --git a/test/middleware/anti-hotlink.js b/test/middleware/anti-hotlink.js new file mode 100644 index 0000000000..23a02ff785 --- /dev/null +++ b/test/middleware/anti-hotlink.js @@ -0,0 +1,84 @@ +const supertest = require('supertest'); +jest.mock('request-promise-native'); +const Parser = require('rss-parser'); +const parser = new Parser(); +let server; + +afterAll(() => { + delete process.env.HOTLINK_TEMPLATE; +}); + +afterEach(() => { + delete process.env.HOTLINK_TEMPLATE; + jest.resetModules(); + server.close(); +}); + +describe('anti-hotlink', () => { + it('template', async () => { + process.env.HOTLINK_TEMPLATE = 'https://i3.wp.com/${host}${pathname}'; + server = require('../../lib/index'); + const request = supertest(server); + + const response = await request.get('/test/complicated'); + const parsed = await parser.parseString(response.text); + expect(parsed.items[0].content).toBe( + ` + + + + + + + + +` + ); + expect(parsed.items[1].content).toBe(` +`); + }); + it('url', async () => { + process.env.HOTLINK_TEMPLATE = '${protocol}//${host}${pathname}'; + server = require('../../lib/index'); + const request = supertest(server); + + const response = await request.get('/test/complicated'); + const parsed = await parser.parseString(response.text); + expect(parsed.items[0].content).toBe( + ` + + + + + + + + +` + ); + expect(parsed.items[1].content).toBe(` +`); + }); + it('no-template', async () => { + process.env.HOTLINK_TEMPLATE = ''; + server = require('../../lib/index'); + const request = supertest(server); + + const response = await request.get('/test/complicated'); + const parsed = await parser.parseString(response.text); + expect(parsed.items[0].content).toBe( + ` + + + + + + + + +` + ); + expect(parsed.items[1].content).toBe(` +`); + }); +});