mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-15 18:30:53 +08:00
fix(route): cw (#11613)
This commit is contained in:
@@ -1,26 +1,27 @@
|
||||
const cheerio = require('cheerio');
|
||||
const { parseDate } = require('@/utils/parse-date');
|
||||
const { Cookie, CookieJar } = require('tough-cookie');
|
||||
let cookieJar;
|
||||
const config = require('@/config').value;
|
||||
const { getCookies, setCookies } = require('@/utils/puppeteer-utils');
|
||||
let cookie;
|
||||
|
||||
const baseUrl = 'https://www.cw.com.tw';
|
||||
|
||||
const got = require('@/utils/got').extend({
|
||||
headers: {
|
||||
'User-Agent': config.trueUA,
|
||||
},
|
||||
});
|
||||
|
||||
const getCookie = async () => {
|
||||
const response = await got(`${baseUrl}/user/get/cookie-bar`);
|
||||
const cookies = response.headers['set-cookie'];
|
||||
if (Array.isArray(cookies)) {
|
||||
cookieJar = cookies.map(Cookie.parse);
|
||||
} else {
|
||||
cookieJar = [Cookie.parse(cookieJar)];
|
||||
const getCookie = async (browser, tryGet) => {
|
||||
if (!cookie) {
|
||||
cookie = await tryGet('cw:cookie', async () => {
|
||||
const page = await browser.newPage();
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (request) => {
|
||||
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||
});
|
||||
await page.goto(`${baseUrl}/user/get/cookie-bar`, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
cookie = await getCookies(page);
|
||||
await page.close();
|
||||
return cookie;
|
||||
});
|
||||
}
|
||||
cookieJar = CookieJar.fromJSON({ cookies: cookieJar });
|
||||
return cookie;
|
||||
};
|
||||
|
||||
const parseList = ($, limit) =>
|
||||
@@ -36,15 +37,24 @@ const parseList = ($, limit) =>
|
||||
})
|
||||
.slice(0, limit);
|
||||
|
||||
const parseItems = (list, tryGet) =>
|
||||
const parseItems = (list, browser, tryGet) =>
|
||||
Promise.all(
|
||||
list.map((item) =>
|
||||
tryGet(item.link, async () => {
|
||||
const { data: response } = await got(item.link, {
|
||||
cookieJar,
|
||||
const page = await browser.newPage();
|
||||
await page.setRequestInterception(true);
|
||||
page.on('request', (request) => {
|
||||
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||
});
|
||||
await setCookies(page, cookie, 'cw.com.tw');
|
||||
await page.goto(item.link, {
|
||||
waitUntil: 'domcontentloaded',
|
||||
});
|
||||
|
||||
const response = await page.evaluate(() => document.documentElement.innerHTML);
|
||||
await page.close();
|
||||
const $ = cheerio.load(response);
|
||||
|
||||
const meta = JSON.parse($('head script[type="application/ld+json"]').eq(0).text());
|
||||
$('.article__head .breadcrumb, .article__head h1, .article__provideViews, .ad').remove();
|
||||
$('img.lazyload').each((_, img) => {
|
||||
@@ -67,9 +77,8 @@ const parseItems = (list, tryGet) =>
|
||||
|
||||
module.exports = {
|
||||
baseUrl,
|
||||
cookieJar,
|
||||
got,
|
||||
getCookie,
|
||||
setCookies,
|
||||
parseList,
|
||||
parseItems,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user