feat(core): true ua (#10761)

* feat(core): true ua

* test: skip mercury parser temporary
This commit is contained in:
Tony
2022-09-11 19:13:38 +02:00
committed by GitHub
parent 16a25f5243
commit 878266c535
6 changed files with 53 additions and 14 deletions

View File

@@ -1,7 +1,7 @@
const entities = require('entities');
const cheerio = require('cheerio');
const { simplecc } = require('simplecc-wasm');
const got = require('@/utils/got');
// const got = require('@/utils/got');
const config = require('@/config').value;
const RE2 = require('re2');
@@ -248,18 +248,21 @@ module.exports = async (ctx, next) => {
// fulltext
if (ctx.query.mode && ctx.query.mode.toLowerCase() === 'fulltext') {
const tasks = ctx.state.data.item.map(async (item) => {
const { link, author, description } = item;
const { title, link, author, description } = item;
const parsed_result = await ctx.cache.tryGet(`mercury-cache-${link}`, async () => {
// if parser failed, return default description and not report error
try {
mercury_parser = mercury_parser || require('@postlight/mercury-parser');
const res = await got(link);
const $ = cheerio.load(res.data, {
xmlMode: true,
});
// const res = await got(link);
// const $ = cheerio.load(res.data, {
// xmlMode: true,
// });
const result = await mercury_parser.parse(link, {
html: $.html(),
// html: $.html(),
headers: {
'User-Agent': config.ua,
},
});
return result;
} catch (e) {
@@ -267,8 +270,9 @@ module.exports = async (ctx, next) => {
}
});
item.title = parsed_result?.title || title;
item.author = author || (parsed_result ? parsed_result.author : '');
item.description = parsed_result ? parsed_result.content : description;
item.description = parsed_result ? entities.decodeXML(parsed_result.content) : description;
});
await Promise.all(tasks);
}