From cde5d9af45c6e7383f48ab3ced3de60b2e310db1 Mon Sep 17 00:00:00 2001
From: NeverBehave <gayhub@never.pet>
Date: Mon, 27 Apr 2020 22:27:47 -0400
Subject: [PATCH] feat: anti-hotlink for images (#4481)

---
 docs/en/install/README.md       |  2 +
 docs/install/README.md          |  2 +
 lib/app.js                      |  4 ++
 lib/config.js                   |  3 ++
 lib/middleware/anti-hotlink.js  | 54 +++++++++++++++++++++
 test/middleware/anti-hotlink.js | 84 +++++++++++++++++++++++++++++++++
 6 files changed, 149 insertions(+)
 create mode 100644 lib/middleware/anti-hotlink.js
 create mode 100644 test/middleware/anti-hotlink.js

diff --git a/docs/en/install/README.md b/docs/en/install/README.md
index f61113f08b..433a249316 100644
--- a/docs/en/install/README.md
+++ b/docs/en/install/README.md
@@ -349,6 +349,8 @@ Access control includes a whitelist and a blacklist, support IP and route, use `
 
 `DISALLOW_ROBOT`: prevent indexing by search engine
 
+`HOTLINK_TEMPLATE`: Replace image link in description to avoid anti-hotlink protection, leave blank to disable this function. Usage reference [#2769](https://github.com/DIYgod/RSSHub/issues/2769). You may use any properity listed in [URL](https://developer.mozilla.org/en-US/docs/Web/API/URL#Properties), format of JS template literal. e.g. `${protocol}//${host}${pathname}`, `https://i3.wp.com/${host}${pathname}`
+
 ### Route-specific Configurations
 
 -   pixiv: [Registration](https://accounts.pixiv.net/signup)
diff --git a/docs/install/README.md b/docs/install/README.md
index 5393e32fa9..205d30f8c9 100644
--- a/docs/install/README.md
+++ b/docs/install/README.md
@@ -353,6 +353,8 @@ RSSHub 支持 `memory` 和 `redis` 两种缓存方式
 
 `DISALLOW_ROBOT`: 防止被搜索引擎收录
 
+`HOTLINK_TEMPLATE`: 用于处理描述中图片的链接，绕过防盗链等限制，留空不生效。用法参考[#2769](https://github.com/DIYgod/RSSHub/issues/2769)。可以使用[URL](https://developer.mozilla.org/en-US/docs/Web/API/URL#Properties)的所有属性，格式为 JS 变量模板。例子：`${protocol}//${host}${pathname}`, `https://i3.wp.com/${host}${pathname}`
+
 ### 部分 RSS 模块配置
 
 -   pixiv 全部路由: [注册地址](https://accounts.pixiv.net/signup)
diff --git a/lib/app.js b/lib/app.js
index a0021206e1..904adf3001 100644
--- a/lib/app.js
+++ b/lib/app.js
@@ -15,6 +15,7 @@ const template = require('./middleware/template');
 const favicon = require('koa-favicon');
 const debug = require('./middleware/debug');
 const accessControl = require('./middleware/access-control');
+const antiHotlink = require('./middleware/anti-hotlink');
 
 const router = require('./router');
 const protected_router = require('./protected_router');
@@ -63,6 +64,9 @@ app.use(apiResponseHandler());
 
 // 4 generate body
 app.use(template);
+// anti-hotlink
+app.use(antiHotlink);
+
 // 3 filter content
 app.use(parameter);
 
diff --git a/lib/config.js b/lib/config.js
index cf17473e7b..9573cc46f6 100644
--- a/lib/config.js
+++ b/lib/config.js
@@ -128,6 +128,9 @@ const calculateValue = () => {
         scihub: {
             host: envs.SCIHUB_HOST || 'https://sci-hub.tw/',
         },
+        hotlink: {
+            template: envs.HOTLINK_TEMPLATE,
+        },
     };
 };
 calculateValue();
diff --git a/lib/middleware/anti-hotlink.js b/lib/middleware/anti-hotlink.js
new file mode 100644
index 0000000000..ffb53bea51
--- /dev/null
+++ b/lib/middleware/anti-hotlink.js
@@ -0,0 +1,54 @@
+const config = require('@/config').value;
+const cheerio = require('cheerio');
+const logger = require('@/utils/logger');
+
+const interpolate = (str, obj) => str.replace(/\${([^}]+)}/g, (_, prop) => obj[prop]);
+// I don't want to keep another regex and
+// URL will be the standard way to parse URL
+const parseUrl = (str) => {
+    let url;
+    try {
+        url = new URL(str);
+    } catch (e) {
+        logger.error(`Failed to parse ${str}`);
+    }
+
+    return url;
+};
+const replaceUrls = (body, template) => {
+    const $ = cheerio.load(body, { decodeEntities: false, xmlMode: true });
+    $('img').each(function () {
+        const old_src = $(this).attr('src');
+        const url = parseUrl(old_src);
+        if (url) {
+            const new_src = interpolate(template, url);
+            $(this).attr('src', new_src);
+        }
+    });
+
+    return $.root().html();
+};
+
+module.exports = async (ctx, next) => {
+    await next();
+
+    const template = config.hotlink.template;
+    // Assume that only description include image link
+    // and here we will only check them in description.
+    // Use Cherrio to load the description as html and filter all
+    // image link
+    if (template) {
+        if (ctx.state.data) {
+            if (ctx.state.data.description) {
+                ctx.state.data.description = replaceUrls(ctx.state.data.description, template);
+            }
+
+            ctx.state.data.item &&
+                ctx.state.data.item.forEach((item) => {
+                    if (item.description) {
+                        item.description = replaceUrls(item.description, template);
+                    }
+                });
+        }
+    }
+};
diff --git a/test/middleware/anti-hotlink.js b/test/middleware/anti-hotlink.js
new file mode 100644
index 0000000000..23a02ff785
--- /dev/null
+++ b/test/middleware/anti-hotlink.js
@@ -0,0 +1,84 @@
+const supertest = require('supertest');
+jest.mock('request-promise-native');
+const Parser = require('rss-parser');
+const parser = new Parser();
+let server;
+
+afterAll(() => {
+    delete process.env.HOTLINK_TEMPLATE;
+});
+
+afterEach(() => {
+    delete process.env.HOTLINK_TEMPLATE;
+    jest.resetModules();
+    server.close();
+});
+
+describe('anti-hotlink', () => {
+    it('template', async () => {
+        process.env.HOTLINK_TEMPLATE = 'https://i3.wp.com/${host}${pathname}';
+        server = require('../../lib/index');
+        const request = supertest(server);
+
+        const response = await request.get('/test/complicated');
+        const parsed = await parser.parseString(response.text);
+        expect(parsed.items[0].content).toBe(
+            `<a href="https://mock.com/DIYgod/RSSHub"/>
+<img src="https://i3.wp.com/mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+
+<a href="http://mock.com/DIYgod/RSSHub"/>
+<img src="https://i3.wp.com/mock.com/DIYgod/RSSHub.jpg" data-src="/DIYgod/RSSHub0.jpg" referrerpolicy="no-referrer">
+<img data-src="/DIYgod/RSSHub.jpg" src="https://i3.wp.com/mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+<img data-mock="/DIYgod/RSSHub.png" src="https://i3.wp.com/mock.com/DIYgod/RSSHub.png" referrerpolicy="no-referrer">
+<img mock="/DIYgod/RSSHub.gif" src="https://i3.wp.com/mock.com/DIYgod/RSSHub.gif" referrerpolicy="no-referrer">
+<img src="https://i3.wp.com/mock.com/DIYgod/DIYgod/RSSHub" referrerpolicy="no-referrer">
+<img src="https://i3.wp.com/mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer"/></img></img></img></img></img></img>`
+        );
+        expect(parsed.items[1].content).toBe(`<a href="https://mock.com/DIYgod/RSSHub"/>
+<img src="https://i3.wp.com/mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer"/>`);
+    });
+    it('url', async () => {
+        process.env.HOTLINK_TEMPLATE = '${protocol}//${host}${pathname}';
+        server = require('../../lib/index');
+        const request = supertest(server);
+
+        const response = await request.get('/test/complicated');
+        const parsed = await parser.parseString(response.text);
+        expect(parsed.items[0].content).toBe(
+            `<a href="https://mock.com/DIYgod/RSSHub"/>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+
+<a href="http://mock.com/DIYgod/RSSHub"/>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" data-src="/DIYgod/RSSHub0.jpg" referrerpolicy="no-referrer">
+<img data-src="/DIYgod/RSSHub.jpg" src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+<img data-mock="/DIYgod/RSSHub.png" src="https://mock.com/DIYgod/RSSHub.png" referrerpolicy="no-referrer">
+<img mock="/DIYgod/RSSHub.gif" src="https://mock.com/DIYgod/RSSHub.gif" referrerpolicy="no-referrer">
+<img src="http://mock.com/DIYgod/DIYgod/RSSHub" referrerpolicy="no-referrer">
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer"/></img></img></img></img></img></img>`
+        );
+        expect(parsed.items[1].content).toBe(`<a href="https://mock.com/DIYgod/RSSHub"/>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer"/>`);
+    });
+    it('no-template', async () => {
+        process.env.HOTLINK_TEMPLATE = '';
+        server = require('../../lib/index');
+        const request = supertest(server);
+
+        const response = await request.get('/test/complicated');
+        const parsed = await parser.parseString(response.text);
+        expect(parsed.items[0].content).toBe(
+            `<a href="https://mock.com/DIYgod/RSSHub"></a>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+
+<a href="http://mock.com/DIYgod/RSSHub"></a>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" data-src="/DIYgod/RSSHub0.jpg" referrerpolicy="no-referrer">
+<img data-src="/DIYgod/RSSHub.jpg" src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">
+<img data-mock="/DIYgod/RSSHub.png" src="https://mock.com/DIYgod/RSSHub.png" referrerpolicy="no-referrer">
+<img mock="/DIYgod/RSSHub.gif" src="https://mock.com/DIYgod/RSSHub.gif" referrerpolicy="no-referrer">
+<img src="http://mock.com/DIYgod/DIYgod/RSSHub" referrerpolicy="no-referrer">
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">`
+        );
+        expect(parsed.items[1].content).toBe(`<a href="https://mock.com/DIYgod/RSSHub"></a>
+<img src="https://mock.com/DIYgod/RSSHub.jpg" referrerpolicy="no-referrer">`);
+    });
+});