fix(route): cw (#11613)

This commit is contained in:
Tony
2023-01-12 21:42:59 -05:00
committed by GitHub
parent a008fbeff8
commit 4e7c633dc2
5 changed files with 91 additions and 64 deletions

View File

@@ -2043,11 +2043,11 @@ category 对应的关键词有
### 最新上線
<Route author="TonyRL" example="/cw/today" path="/cw/today" radar="1" rssbud="1"/>
<Route author="TonyRL" example="/cw/today" path="/cw/today" radar="1" rssbud="1" puppeteer="1"/>
### 主頻道
<Route author="TonyRL" example="/cw/master/8" path="/cw/master/:channel" :paramsDesc="['主頻道 ID可在 URL 中找到']" radar="1" rssbud="1">
<Route author="TonyRL" example="/cw/master/8" path="/cw/master/:channel" :paramsDesc="['主頻道 ID可在 URL 中找到']" radar="1" rssbud="1" puppeteer="1">
| 主頻道名稱 | 主頻道 ID |
| ----- | ------ |
@@ -2069,7 +2069,7 @@ category 对应的关键词有
### 子頻道
<Route author="TonyRL" example="/cw/sub/615" path="/cw/sub/:channel" :paramsDesc="['子頻道 ID可在 URL 中找到']" radar="1" rssbud="1"/>
<Route author="TonyRL" example="/cw/sub/615" path="/cw/sub/:channel" :paramsDesc="['子頻道 ID可在 URL 中找到']" radar="1" rssbud="1" puppeteer="1"/>
## 卫报 The Guardian

View File

@@ -1,25 +1,30 @@
const cheerio = require('cheerio');
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const { channel } = ctx.params;
const pageUrl = `${baseUrl}/masterChannel.action`;
if (!cookieJar) {
await getCookie();
}
const { data: response } = await got(pageUrl, {
headers: {
Referer: baseUrl,
},
cookieJar,
searchParams: {
idMasterChannel: channel,
},
const browser = await require('@/utils/puppeteer')();
const cookie = await getCookie(browser, ctx.cache.tryGet);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await setCookies(page, cookie, 'cw.com.tw');
await page.goto(`${pageUrl}?idMasterChannel=${channel}`, {
waitUntil: 'domcontentloaded',
});
const response = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = cheerio.load(response);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
const items = await parseItems(list, ctx.cache.tryGet);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
const items = await parseItems(list, browser, ctx.cache.tryGet);
await browser.close();
ctx.state.data = {
title: $('head title').text(),

View File

@@ -1,25 +1,30 @@
const cheerio = require('cheerio');
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const { channel } = ctx.params;
const pageUrl = `${baseUrl}/subchannel.action`;
if (!cookieJar) {
await getCookie();
}
const { data: response } = await got(pageUrl, {
headers: {
Referer: baseUrl,
},
cookieJar,
searchParams: {
idSubChannel: channel,
},
const browser = await require('@/utils/puppeteer')();
const cookie = await getCookie(browser, ctx.cache.tryGet);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await setCookies(page, cookie, 'cw.com.tw');
await page.goto(`${pageUrl}?idSubChannel=${channel}`, {
waitUntil: 'domcontentloaded',
});
const response = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = cheerio.load(response);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
const items = await parseItems(list, ctx.cache.tryGet);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
const items = await parseItems(list, browser, ctx.cache.tryGet);
await browser.close();
ctx.state.data = {
title: $('head title').text(),

View File

@@ -1,21 +1,29 @@
const cheerio = require('cheerio');
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const pageUrl = `${baseUrl}/today`;
if (!cookieJar) {
await getCookie();
}
const { data: response } = await got(pageUrl, {
headers: {
Referer: baseUrl,
},
cookieJar,
const browser = await require('@/utils/puppeteer')();
const cookie = await getCookie(browser, ctx.cache.tryGet);
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await setCookies(page, cookie, 'cw.com.tw');
await page.goto(pageUrl, {
waitUntil: 'domcontentloaded',
});
const response = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = cheerio.load(response);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
const items = await parseItems(list, ctx.cache.tryGet);
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 30);
const items = await parseItems(list, browser, ctx.cache.tryGet);
await browser.close();
ctx.state.data = {
title: $('head title').text(),

View File

@@ -1,26 +1,27 @@
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const { Cookie, CookieJar } = require('tough-cookie');
let cookieJar;
const config = require('@/config').value;
const { getCookies, setCookies } = require('@/utils/puppeteer-utils');
let cookie;
const baseUrl = 'https://www.cw.com.tw';
const got = require('@/utils/got').extend({
headers: {
'User-Agent': config.trueUA,
},
});
const getCookie = async () => {
const response = await got(`${baseUrl}/user/get/cookie-bar`);
const cookies = response.headers['set-cookie'];
if (Array.isArray(cookies)) {
cookieJar = cookies.map(Cookie.parse);
} else {
cookieJar = [Cookie.parse(cookieJar)];
const getCookie = async (browser, tryGet) => {
if (!cookie) {
cookie = await tryGet('cw:cookie', async () => {
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await page.goto(`${baseUrl}/user/get/cookie-bar`, {
waitUntil: 'domcontentloaded',
});
cookie = await getCookies(page);
await page.close();
return cookie;
});
}
cookieJar = CookieJar.fromJSON({ cookies: cookieJar });
return cookie;
};
const parseList = ($, limit) =>
@@ -36,15 +37,24 @@ const parseList = ($, limit) =>
})
.slice(0, limit);
const parseItems = (list, tryGet) =>
const parseItems = (list, browser, tryGet) =>
Promise.all(
list.map((item) =>
tryGet(item.link, async () => {
const { data: response } = await got(item.link, {
cookieJar,
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (request) => {
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
await setCookies(page, cookie, 'cw.com.tw');
await page.goto(item.link, {
waitUntil: 'domcontentloaded',
});
const response = await page.evaluate(() => document.documentElement.innerHTML);
await page.close();
const $ = cheerio.load(response);
const meta = JSON.parse($('head script[type="application/ld+json"]').eq(0).text());
$('.article__head .breadcrumb, .article__head h1, .article__provideViews, .ad').remove();
$('img.lazyload').each((_, img) => {
@@ -67,9 +77,8 @@ const parseItems = (list, tryGet) =>
module.exports = {
baseUrl,
cookieJar,
got,
getCookie,
setCookies,
parseList,
parseItems,
};