mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-11 15:47:48 +08:00
feat(parameter): use re2js instead (#13072)
* feat(parameter): use re2js instead * fix: add back comment
This commit is contained in:
@@ -98,8 +98,8 @@ const validateTemplate = (template) => {
|
|||||||
module.exports = async (ctx, next) => {
|
module.exports = async (ctx, next) => {
|
||||||
await next();
|
await next();
|
||||||
|
|
||||||
let image_hotlink_template = undefined;
|
let image_hotlink_template;
|
||||||
let multimedia_hotlink_template = undefined;
|
let multimedia_hotlink_template;
|
||||||
const shouldWrapInIframe = ctx.query.wrap_multimedia_in_iframe === '1';
|
const shouldWrapInIframe = ctx.query.wrap_multimedia_in_iframe === '1';
|
||||||
|
|
||||||
// Read params if enabled
|
// Read params if enabled
|
||||||
|
|||||||
@@ -3,8 +3,7 @@ const cheerio = require('cheerio');
|
|||||||
const { simplecc } = require('simplecc-wasm');
|
const { simplecc } = require('simplecc-wasm');
|
||||||
const got = require('@/utils/got');
|
const got = require('@/utils/got');
|
||||||
const config = require('@/config').value;
|
const config = require('@/config').value;
|
||||||
const { RE2 } = require('re2-wasm');
|
const { RE2JS } = require('re2js');
|
||||||
const { resolve } = require('path');
|
|
||||||
|
|
||||||
let mercury_parser;
|
let mercury_parser;
|
||||||
|
|
||||||
@@ -28,17 +27,6 @@ module.exports = async (ctx, next) => {
|
|||||||
await next();
|
await next();
|
||||||
|
|
||||||
if (!ctx.state.data && !ctx._matchedRoute) {
|
if (!ctx.state.data && !ctx._matchedRoute) {
|
||||||
// Given that the official demo has a cache TTL of 2h, a "wrong path" page will be cached by CloudFlare for
|
|
||||||
// 200h (8.33d).
|
|
||||||
// What makes it worse is that the documentation contains status badges to detect the availability of routes,
|
|
||||||
// but the documentation is updated more timely than the official demo, so the every example path of every
|
|
||||||
// new route will probably have a "wrong path" page cached for at least 200h soon after accepted. That is to
|
|
||||||
// say, the example paths of a new route will probably be unavailable on the public demo in the first 200h
|
|
||||||
// after accepted.
|
|
||||||
// As a conclusion, the next 3 lines has been commented out. (exactly the same behavior as any internal error)
|
|
||||||
// ctx.set({
|
|
||||||
// 'Cache-Control': `public, max-age=${config.cache.routeExpire * 100}`,
|
|
||||||
// });
|
|
||||||
throw Error('wrong path');
|
throw Error('wrong path');
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,41 +148,37 @@ module.exports = async (ctx, next) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// filter
|
// filter
|
||||||
|
const engine = config.feature.filter_regex_engine;
|
||||||
const makeRegex = (string) => {
|
const makeRegex = (string) => {
|
||||||
|
if (!string) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
// default: case_senstivie = true
|
// default: case_senstivie = true
|
||||||
const engine = config.feature.filter_regex_engine;
|
const insensitive = ctx.query.filter_case_sensitive === 'false';
|
||||||
if (ctx.query.filter_case_sensitive === 'false') {
|
switch (engine) {
|
||||||
switch (engine) {
|
case 'regexp':
|
||||||
case 'regexp':
|
return new RegExp(string, insensitive ? 'i' : '');
|
||||||
return new RegExp(string, 'i');
|
case 're2':
|
||||||
case 're2':
|
return RE2JS.compile(string, insensitive ? RE2JS.CASE_INSENSITIVE : 0);
|
||||||
return new RE2(string, 'iu');
|
default:
|
||||||
default:
|
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
|
||||||
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
switch (engine) {
|
|
||||||
case 'regexp':
|
|
||||||
return new RegExp(string);
|
|
||||||
case 're2':
|
|
||||||
return new RE2(string, 'u');
|
|
||||||
default:
|
|
||||||
throw Error(`Invalid Engine Value: ${engine}, please check your config.`);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if (ctx.query.filter) {
|
if (ctx.query.filter) {
|
||||||
// workaround for @vercel/nft removing wasm file
|
const regex = makeRegex(ctx.query.filter);
|
||||||
resolve('node_modules/re2-wasm/build/wasm/re2.wasm');
|
|
||||||
|
|
||||||
ctx.state.data.item = ctx.state.data.item.filter((item) => {
|
ctx.state.data.item = ctx.state.data.item.filter((item) => {
|
||||||
const title = item.title || '';
|
const title = item.title || '';
|
||||||
const description = item.description || title;
|
const description = item.description || title;
|
||||||
const author = item.author || '';
|
const author = item.author || '';
|
||||||
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
|
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
|
||||||
|
const category = item.category ? categoryArray : [];
|
||||||
const isFilter =
|
const isFilter =
|
||||||
title.match(makeRegex(ctx.query.filter)) || description.match(makeRegex(ctx.query.filter)) || author.match(makeRegex(ctx.query.filter)) || category.some((c) => c.match(makeRegex(ctx.query.filter)));
|
engine === 're2'
|
||||||
|
? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find())
|
||||||
|
: title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex));
|
||||||
|
|
||||||
return isFilter;
|
return isFilter;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -205,12 +189,20 @@ module.exports = async (ctx, next) => {
|
|||||||
const title = item.title || '';
|
const title = item.title || '';
|
||||||
const description = item.description || title;
|
const description = item.description || title;
|
||||||
const author = item.author || '';
|
const author = item.author || '';
|
||||||
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
|
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
|
||||||
|
const category = item.category ? categoryArray : [];
|
||||||
let isFilter = true;
|
let isFilter = true;
|
||||||
ctx.query.filter_title && (isFilter = title.match(makeRegex(ctx.query.filter_title)));
|
|
||||||
ctx.query.filter_description && (isFilter = isFilter && description.match(makeRegex(ctx.query.filter_description)));
|
const titleRegex = makeRegex(ctx.query.filter_title);
|
||||||
ctx.query.filter_author && (isFilter = isFilter && author.match(makeRegex(ctx.query.filter_author)));
|
const descriptionRegex = makeRegex(ctx.query.filter_description);
|
||||||
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => c.match(makeRegex(ctx.query.filter_category))));
|
const authorRegex = makeRegex(ctx.query.filter_author);
|
||||||
|
const categoryRegex = makeRegex(ctx.query.filter_category);
|
||||||
|
|
||||||
|
ctx.query.filter_title && (isFilter = engine === 're2' ? titleRegex.matcher(title).find() : title.match(titleRegex));
|
||||||
|
ctx.query.filter_description && (isFilter = isFilter && (engine === 're2' ? descriptionRegex.matcher(description).find() : description.match(descriptionRegex)));
|
||||||
|
ctx.query.filter_author && (isFilter = isFilter && (engine === 're2' ? authorRegex.matcher(author).find() : author.match(authorRegex)));
|
||||||
|
ctx.query.filter_category && (isFilter = isFilter && category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));
|
||||||
|
|
||||||
return isFilter;
|
return isFilter;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -224,12 +216,20 @@ module.exports = async (ctx, next) => {
|
|||||||
const title = item.title;
|
const title = item.title;
|
||||||
const description = item.description || title;
|
const description = item.description || title;
|
||||||
const author = item.author || '';
|
const author = item.author || '';
|
||||||
const category = item.category ? (Array.isArray(item.category) ? item.category : [item.category]) : [];
|
const categoryArray = Array.isArray(item.category) ? item.category : [item.category];
|
||||||
|
const category = item.category ? categoryArray : [];
|
||||||
let isFilter = true;
|
let isFilter = true;
|
||||||
ctx.query.filterout_title && (isFilter = !title.match(makeRegex(ctx.query.filterout_title)));
|
|
||||||
ctx.query.filterout_description && (isFilter = isFilter && !description.match(makeRegex(ctx.query.filterout_description)));
|
const titleRegex = makeRegex(ctx.query.filterout_title);
|
||||||
ctx.query.filterout_author && (isFilter = isFilter && !author.match(makeRegex(ctx.query.filterout_author)));
|
const descriptionRegex = makeRegex(ctx.query.filterout_description);
|
||||||
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => c.match(makeRegex(ctx.query.filterout_category))));
|
const authorRegex = makeRegex(ctx.query.filterout_author);
|
||||||
|
const categoryRegex = makeRegex(ctx.query.filterout_category);
|
||||||
|
|
||||||
|
ctx.query.filterout_title && (isFilter = engine === 're2' ? !titleRegex.matcher(title).find() : !title.match(titleRegex));
|
||||||
|
ctx.query.filterout_description && (isFilter = isFilter && (engine === 're2' ? !descriptionRegex.matcher(description).find() : !description.match(descriptionRegex)));
|
||||||
|
ctx.query.filterout_author && (isFilter = isFilter && (engine === 're2' ? !authorRegex.matcher(author).find() : !author.match(authorRegex)));
|
||||||
|
ctx.query.filterout_category && (isFilter = isFilter && !category.some((c) => (engine === 're2' ? categoryRegex.matcher(c).find() : c.match(categoryRegex))));
|
||||||
|
|
||||||
return isFilter;
|
return isFilter;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -308,7 +308,7 @@ module.exports = async (ctx, next) => {
|
|||||||
if (item.description) {
|
if (item.description) {
|
||||||
text = item.description.replace(/<\/?[^>]+(>|$)/g, '');
|
text = item.description.replace(/<\/?[^>]+(>|$)/g, '');
|
||||||
}
|
}
|
||||||
if (text && text.length) {
|
if (text?.length) {
|
||||||
if (text.length > ctx.query.brief) {
|
if (text.length > ctx.query.brief) {
|
||||||
item.description = `<p>${text.substring(0, ctx.query.brief)}…</p>`;
|
item.description = `<p>${text.substring(0, ctx.query.brief)}…</p>`;
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -134,7 +134,7 @@
|
|||||||
"puppeteer-extra-plugin-user-preferences": "2.4.1",
|
"puppeteer-extra-plugin-user-preferences": "2.4.1",
|
||||||
"query-string": "7.1.3",
|
"query-string": "7.1.3",
|
||||||
"rand-user-agent": "1.0.109",
|
"rand-user-agent": "1.0.109",
|
||||||
"re2-wasm": "1.0.2",
|
"re2js": "0.3.2",
|
||||||
"require-all": "3.0.0",
|
"require-all": "3.0.0",
|
||||||
"rfc4648": "1.5.2",
|
"rfc4648": "1.5.2",
|
||||||
"rss-parser": "3.13.0",
|
"rss-parser": "3.13.0",
|
||||||
|
|||||||
11
pnpm-lock.yaml
generated
11
pnpm-lock.yaml
generated
@@ -158,9 +158,9 @@ dependencies:
|
|||||||
rand-user-agent:
|
rand-user-agent:
|
||||||
specifier: 1.0.109
|
specifier: 1.0.109
|
||||||
version: 1.0.109
|
version: 1.0.109
|
||||||
re2-wasm:
|
re2js:
|
||||||
specifier: 1.0.2
|
specifier: 0.3.2
|
||||||
version: 1.0.2
|
version: 0.3.2
|
||||||
require-all:
|
require-all:
|
||||||
specifier: 3.0.0
|
specifier: 3.0.0
|
||||||
version: 3.0.0
|
version: 3.0.0
|
||||||
@@ -6341,9 +6341,8 @@ packages:
|
|||||||
resolution: {integrity: sha512-mnAH0jDJQ0SJtEXjoW5aQILEc+33RwtKzKxwK9JG1a06M6nn8WDWheD+kmc5ucs+ux4FEWX3+PZuEB8r3x15yQ==}
|
resolution: {integrity: sha512-mnAH0jDJQ0SJtEXjoW5aQILEc+33RwtKzKxwK9JG1a06M6nn8WDWheD+kmc5ucs+ux4FEWX3+PZuEB8r3x15yQ==}
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
/re2-wasm@1.0.2:
|
/re2js@0.3.2:
|
||||||
resolution: {integrity: sha512-VXUdgSiUrE/WZXn6gUIVVIsg0+Hp6VPZPOaHCay+OuFKy6u/8ktmeNEf+U5qSA8jzGGFsg8jrDNu1BeHpz2pJA==}
|
resolution: {integrity: sha512-N++IGnIi5bCcNxtQHEpmPJ1AwKnfStk7NNjZj51ewNDtmCPTG4CyGN+mv8zphaSB8FlelVGRGcWz4fnqU+Be0A==}
|
||||||
engines: {node: '>=10'}
|
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
/react-is@18.2.0:
|
/react-is@18.2.0:
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ describe('filter-engine', () => {
|
|||||||
|
|
||||||
const response = await request.get('/test/1?filter=abc(%3F%3Ddef)');
|
const response = await request.get('/test/1?filter=abc(%3F%3Ddef)');
|
||||||
expect(response.status).toBe(404);
|
expect(response.status).toBe(404);
|
||||||
expect(response.text).toMatch(/SyntaxError/);
|
expect(response.text).toMatch(/RE2JSSyntaxException/);
|
||||||
server.close();
|
server.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user