import * as entities from 'entities'; import { load, type CheerioAPI, type Element } from 'cheerio'; import { simplecc } from 'simplecc-wasm'; import got from '@/utils/got'; import { config } from '@/config'; import { RE2JS } from 're2js'; import markdownit from 'markdown-it' import htmlToText from 'html-to-text' import { MiddlewareHandler } from 'hono'; import cache from '@/utils/cache'; import Parser from '@postlight/parser'; import { Data, DataItem } from '@/types'; const md = markdownit({ html: true, }) const resolveRelativeLink = ($: CheerioAPI, elem: Element, attr: string, baseUrl?: string) => { const $elem = $(elem); if (baseUrl) { try { const oldAttr = $elem.attr(attr); if (oldAttr) { // e.g. should leave unchanged $elem.attr(attr, new URL(oldAttr, baseUrl).href); } } catch { // no-empty } } }; const summarizeArticle = async (articleText: string) => { const apiUrl = `${config.openai.endpoint}/chat/completions`; const response = await got.post(apiUrl, { json: { model: config.openai.model, max_tokens: config.openai.maxTokens, messages: [ { role: 'system', content: config.openai.prompt }, { role: 'user', content: articleText }, ], temperature: config.openai.temperature, }, headers: { Authorization: `Bearer ${config.openai.apiKey}`, }, }); return response.data.choices[0].message.content; }; const middleware: MiddlewareHandler = async (ctx, next) => { await next(); const data = ctx.get('data') as Data; if (!data) { // throw new Error('wrong path'); } else { if ((!data.item || data.item.length === 0) && !data.allowEmpty) { throw new Error('this route is empty, please check the original site or create an issue'); } // fix allowEmpty data.item = data.item || []; // decode HTML entities data.title && (data.title = entities.decodeXML(data.title + '')); data.description && (data.description = entities.decodeXML(data.description + '')); // sort items if (ctx.req.query('sorted') !== 'false') { data.item = data.item.sort((a: DataItem, b: DataItem) => +new Date(b.pubDate || 0) - +new Date(a.pubDate || 0)); } const handleItem = (item: DataItem) => { item.title && (item.title = entities.decodeXML(item.title + '')); // handle pubDate if (item.pubDate) { item.pubDate = new Date(item.pubDate).toUTCString(); } // handle link if (item.link) { let baseUrl = data.link; if (baseUrl && !/^https?:\/\//.test(baseUrl)) { baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; } item.link = new URL(item.link, baseUrl).href; } // handle description if (item.description) { const $ = load(item.description); let baseUrl = item.link || data.link; if (baseUrl && !/^https?:\/\//.test(baseUrl)) { baseUrl = /^\/\//.test(baseUrl) ? 'http:' + baseUrl : 'http://' + baseUrl; } $('script').remove(); $('img').each((_, ele) => { const $ele = $(ele); // fix lazyload if (!$ele.attr('src')) { const lazySrc = $ele.attr('data-src') || $ele.attr('data-original'); if (lazySrc) { $ele.attr('src', lazySrc); } else { for (const key in ele.attribs) { const value = ele.attribs[key].trim(); if (['.gif', '.png', '.jpg', '.webp'].some((suffix) => value.includes(suffix))) { $ele.attr('src', value); break; } } } } // redundant attributes for (const e of ['onclick', 'onerror', 'onload']) { $ele.removeAttr(e); } }); // resolve relative link & fix referrer policy // https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy // https://www.w3schools.com/tags/att_href.asp $('a, area').each((_, elem) => { resolveRelativeLink($, elem, 'href', baseUrl); // $(elem).attr('rel', 'noreferrer'); // currently no such a need }); // https://www.w3schools.com/tags/att_src.asp $('img, video, audio, source, iframe, embed, track').each((_, elem) => { resolveRelativeLink($, elem, 'src', baseUrl); }); $('video[poster]').each((_, elem) => { resolveRelativeLink($, elem, 'poster', baseUrl); }); $('img, iframe').each((_, elem) => { $(elem).attr('referrerpolicy', 'no-referrer'); }); item.description = $('body').html() + '' + (config.suffix || ''); if (item._extra?.links && $('.rsshub-quote').length) { item._extra?.links?.map((e) => { e.content_html = $.html($('.rsshub-quote')); return e; }); } } // handle category if (item.category) { // convert single string to array, and filter only string type category Array.isArray(item.category) || (item.category = [item.category]); item.category = item.category.filter((e) => typeof e === 'string'); } return item; }; data.item = await Promise.all(data.item.map((itm) => handleItem(itm))); // filter const engine = config.feature.filter_regex_engine; const makeRegex = (str: string) => { // default: case_senstivie = true const insensitive = ctx.req.query('filter_case_sensitive') === 'false'; switch (engine) { case 'regexp': return new RegExp(str, insensitive ? 'i' : ''); case 're2': return RE2JS.compile(str, insensitive ? RE2JS.CASE_INSENSITIVE : 0); default: throw new Error(`Invalid Engine Value: ${engine}, please check your config.`); } }; if (ctx.req.query('filter')) { const regex = makeRegex(ctx.req.query('filter')!); data.item = data.item.filter((item) => { const title = item.title || ''; const description = item.description || title; const author = item.author || ''; const category = item.category || []; const isFilter = regex instanceof RE2JS ? regex.matcher(title).find() || regex.matcher(description).find() || regex.matcher(author).find() || category.some((c) => regex.matcher(c).find()) : title.match(regex) || description.match(regex) || author.match(regex) || category.some((c) => c.match(regex)); return isFilter; }); } // 启用filter参数时,无效filter_title/description/author/category if (!ctx.req.query('filter') && (ctx.req.query('filter_title') || ctx.req.query('filter_description') || ctx.req.query('filter_author') || ctx.req.query('filter_category'))) { data.item = data.item.filter((item) => { const title = item.title || ''; const description = item.description || title; const author = item.author || ''; const category = item.category || []; let isFilter = true; if (ctx.req.query('filter_title')) { const titleRegex = makeRegex(ctx.req.query('filter_title')!); isFilter = titleRegex instanceof RE2JS ? titleRegex.matcher(title).find() : !!title.match(titleRegex); } if (ctx.req.query('filter_description')) { const descriptionRegex = makeRegex(ctx.req.query('filter_description')!); isFilter = isFilter && (descriptionRegex instanceof RE2JS ? descriptionRegex.matcher(description).find() : !!description.match(descriptionRegex)); } if (ctx.req.query('filter_author')) { const authorRegex = makeRegex(ctx.req.query('filter_author')!); isFilter = isFilter && (authorRegex instanceof RE2JS ? authorRegex.matcher(author).find() : !!author.match(authorRegex)); } if (ctx.req.query('filter_category')) { const categoryRegex = makeRegex(ctx.req.query('filter_category')!); isFilter = isFilter && category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); } return isFilter; }); } if (ctx.req.query('filterout') || ctx.req.query('filterout_title') || ctx.req.query('filterout_description') || ctx.req.query('filterout_author') || ctx.req.query('filterout_category')) { data.item = data.item.filter((item) => { const title = item.title; const description = item.description || title; const author = item.author || ''; const category = item.category || []; let isFilter = true; if (ctx.req.query('filterout')) { const titleRegex = makeRegex(ctx.req.query('filterout_title') || ctx.req.query('filterout')!); const descriptionRegex = makeRegex(ctx.req.query('filterout_description') || ctx.req.query('filterout')!); isFilter = titleRegex instanceof RE2JS ? !titleRegex.matcher(title).find() : !title.match(titleRegex); isFilter = isFilter && (descriptionRegex instanceof RE2JS ? !descriptionRegex.matcher(description).find() : !description.match(descriptionRegex)); } if (ctx.req.query('filterout_author')) { const authorRegex = makeRegex(ctx.req.query('filterout_author')!); isFilter = isFilter && (authorRegex instanceof RE2JS ? !authorRegex.matcher(author).find() : !author.match(authorRegex)); } if (ctx.req.query('filterout_category')) { const categoryRegex = makeRegex(ctx.req.query('filterout_category')!); isFilter = isFilter && !category.some((c) => (categoryRegex instanceof RE2JS ? categoryRegex.matcher(c).find() : c.match(categoryRegex))); } return isFilter; }); } if (ctx.req.query('filter_time')) { const now = Date.now(); data.item = data.item.filter(({ pubDate }) => { let isFilter = true; try { isFilter = !pubDate || now - new Date(pubDate).getTime() <= Number.parseInt(ctx.req.query('filter_time')!) * 1000; } catch { // no-empty } return isFilter; }); } // limit if (ctx.req.query('limit')) { data.item = data.item.slice(0, Number.parseInt(ctx.req.query('limit')!)); } // telegram instant view if (ctx.req.query('tgiv')) { data.item.map((item) => { if (item.link) { const encodedlink = encodeURIComponent(item.link); item.link = `https://t.me/iv?url=${encodedlink}&rhash=${ctx.req.query('tgiv')}`; return item; } }); } // fulltext if (ctx.req.query('mode')?.toLowerCase() === 'fulltext') { const tasks = data.item.map(async (item) => { const { link, author, description } = item; const parsed_result: any = await cache.tryGet(`mercury-cache-${link}`, async () => { if (link) { // if parser failed, return default description and not report error try { const { data: res } = await got(link); const $ = load(res); const result = await Parser.parse(link, { html: $.html(), }); return result; } catch { // no-empty } } }); item.author = author || parsed_result?.author; item.description = parsed_result && parsed_result.content.length > 40 ? entities.decodeXML(parsed_result.content) : description; }); await Promise.all(tasks); } // openai if (ctx.req.query('chatgpt') && config.openai.apiKey) { data.item = await Promise.all( data.item.map(async (item) => { if (item.description) { try { const summary = await cache.tryGet(`openai:${item.link}`, async () => { const text = htmlToText.htmlToText(item.description!); if (text.length < 300) { return ''; } const summary_md = await summarizeArticle(text); return md.render(summary_md); }); // 将总结结果添加到文章数据中 if (summary !== '') { item.description = summary + '' + item.description; } } catch { // when openai failed, return default description and not write cache } } return item; }) ); } // scihub if (ctx.req.query('scihub')) { data.item.map((item) => { item.link = item.doi ? `${config.scihub.host}${item.doi}` : `${config.scihub.host}${item.link}`; return item; }); } // opencc if (ctx.req.query('opencc')) { for (const item of data.item) { item.title = simplecc(item.title ?? item.link, ctx.req.query('opencc')!); item.description = simplecc(item.description ?? item.title ?? item.link, ctx.req.query('opencc')!); } } // brief if (ctx.req.query('brief')) { const num = /[1-9]\d{2,}/; if (num.test(ctx.req.query('brief')!)) { const brief = Number.parseInt(ctx.req.query('brief')!); for (const item of data.item) { let text; if (item.description) { text = item.description.replaceAll(/<\/?[^>]+(>|$)/g, ''); item.description = text.length > brief ? `${text.substring(0, brief)}…` : `${text}`; } } } else { throw new Error(`Invalid parameter brief. Please check the doc https://docs.rsshub.app/parameter#shu-chu-jian-xun`); } } // some parameters are processed in `anti-hotlink.js` ctx.set('data', data) } }; export default middleware;
${text.substring(0, brief)}…
${text}