diff --git a/docs/traditional-media.md b/docs/traditional-media.md
index e7614727e0..17bb39843a 100644
--- a/docs/traditional-media.md
+++ b/docs/traditional-media.md
@@ -2043,11 +2043,11 @@ category 对应的关键词有
### 最新上線
-
+
### 主頻道
-
+
| 主頻道名稱 | 主頻道 ID |
| ----- | ------ |
@@ -2069,7 +2069,7 @@ category 对应的关键词有
### 子頻道
-
+
## 卫报 The Guardian
diff --git a/lib/v2/cw/master.js b/lib/v2/cw/master.js
index e2e81022e3..aa34918c10 100644
--- a/lib/v2/cw/master.js
+++ b/lib/v2/cw/master.js
@@ -1,25 +1,30 @@
const cheerio = require('cheerio');
-const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
+const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const { channel } = ctx.params;
const pageUrl = `${baseUrl}/masterChannel.action`;
- if (!cookieJar) {
- await getCookie();
- }
- const { data: response } = await got(pageUrl, {
- headers: {
- Referer: baseUrl,
- },
- cookieJar,
- searchParams: {
- idMasterChannel: channel,
- },
+
+ const browser = await require('@/utils/puppeteer')();
+ const cookie = await getCookie(browser, ctx.cache.tryGet);
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (request) => {
+ request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
+ await setCookies(page, cookie, 'cw.com.tw');
+ await page.goto(`${pageUrl}?idMasterChannel=${channel}`, {
+ waitUntil: 'domcontentloaded',
+ });
+
+ const response = await page.evaluate(() => document.documentElement.innerHTML);
+ await page.close();
const $ = cheerio.load(response);
- const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
- const items = await parseItems(list, ctx.cache.tryGet);
+ const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
+ const items = await parseItems(list, browser, ctx.cache.tryGet);
+
+ await browser.close();
ctx.state.data = {
title: $('head title').text(),
diff --git a/lib/v2/cw/sub.js b/lib/v2/cw/sub.js
index f995171f62..134ff61669 100644
--- a/lib/v2/cw/sub.js
+++ b/lib/v2/cw/sub.js
@@ -1,25 +1,30 @@
const cheerio = require('cheerio');
-const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
+const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const { channel } = ctx.params;
const pageUrl = `${baseUrl}/subchannel.action`;
- if (!cookieJar) {
- await getCookie();
- }
- const { data: response } = await got(pageUrl, {
- headers: {
- Referer: baseUrl,
- },
- cookieJar,
- searchParams: {
- idSubChannel: channel,
- },
+
+ const browser = await require('@/utils/puppeteer')();
+ const cookie = await getCookie(browser, ctx.cache.tryGet);
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (request) => {
+ request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
+ await setCookies(page, cookie, 'cw.com.tw');
+ await page.goto(`${pageUrl}?idSubChannel=${channel}`, {
+ waitUntil: 'domcontentloaded',
+ });
+
+ const response = await page.evaluate(() => document.documentElement.innerHTML);
+ await page.close();
const $ = cheerio.load(response);
- const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
- const items = await parseItems(list, ctx.cache.tryGet);
+ const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
+ const items = await parseItems(list, browser, ctx.cache.tryGet);
+
+ await browser.close();
ctx.state.data = {
title: $('head title').text(),
diff --git a/lib/v2/cw/today.js b/lib/v2/cw/today.js
index 6e494e37b2..d878b32458 100644
--- a/lib/v2/cw/today.js
+++ b/lib/v2/cw/today.js
@@ -1,21 +1,29 @@
const cheerio = require('cheerio');
-const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
+const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
module.exports = async (ctx) => {
const pageUrl = `${baseUrl}/today`;
- if (!cookieJar) {
- await getCookie();
- }
- const { data: response } = await got(pageUrl, {
- headers: {
- Referer: baseUrl,
- },
- cookieJar,
+
+ const browser = await require('@/utils/puppeteer')();
+ const cookie = await getCookie(browser, ctx.cache.tryGet);
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (request) => {
+ request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
});
+ await setCookies(page, cookie, 'cw.com.tw');
+ await page.goto(pageUrl, {
+ waitUntil: 'domcontentloaded',
+ });
+
+ const response = await page.evaluate(() => document.documentElement.innerHTML);
+ await page.close();
const $ = cheerio.load(response);
- const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
- const items = await parseItems(list, ctx.cache.tryGet);
+ const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 30);
+ const items = await parseItems(list, browser, ctx.cache.tryGet);
+
+ await browser.close();
ctx.state.data = {
title: $('head title').text(),
diff --git a/lib/v2/cw/utils.js b/lib/v2/cw/utils.js
index 2e583e9dd7..a3735a61d1 100644
--- a/lib/v2/cw/utils.js
+++ b/lib/v2/cw/utils.js
@@ -1,26 +1,27 @@
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
-const { Cookie, CookieJar } = require('tough-cookie');
-let cookieJar;
-const config = require('@/config').value;
+const { getCookies, setCookies } = require('@/utils/puppeteer-utils');
+let cookie;
const baseUrl = 'https://www.cw.com.tw';
-const got = require('@/utils/got').extend({
- headers: {
- 'User-Agent': config.trueUA,
- },
-});
-
-const getCookie = async () => {
- const response = await got(`${baseUrl}/user/get/cookie-bar`);
- const cookies = response.headers['set-cookie'];
- if (Array.isArray(cookies)) {
- cookieJar = cookies.map(Cookie.parse);
- } else {
- cookieJar = [Cookie.parse(cookieJar)];
+const getCookie = async (browser, tryGet) => {
+ if (!cookie) {
+ cookie = await tryGet('cw:cookie', async () => {
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (request) => {
+ request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
+ });
+ await page.goto(`${baseUrl}/user/get/cookie-bar`, {
+ waitUntil: 'domcontentloaded',
+ });
+ cookie = await getCookies(page);
+ await page.close();
+ return cookie;
+ });
}
- cookieJar = CookieJar.fromJSON({ cookies: cookieJar });
+ return cookie;
};
const parseList = ($, limit) =>
@@ -36,15 +37,24 @@ const parseList = ($, limit) =>
})
.slice(0, limit);
-const parseItems = (list, tryGet) =>
+const parseItems = (list, browser, tryGet) =>
Promise.all(
list.map((item) =>
tryGet(item.link, async () => {
- const { data: response } = await got(item.link, {
- cookieJar,
+ const page = await browser.newPage();
+ await page.setRequestInterception(true);
+ page.on('request', (request) => {
+ request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
+ });
+ await setCookies(page, cookie, 'cw.com.tw');
+ await page.goto(item.link, {
+ waitUntil: 'domcontentloaded',
});
+ const response = await page.evaluate(() => document.documentElement.innerHTML);
+ await page.close();
const $ = cheerio.load(response);
+
const meta = JSON.parse($('head script[type="application/ld+json"]').eq(0).text());
$('.article__head .breadcrumb, .article__head h1, .article__provideViews, .ad').remove();
$('img.lazyload').each((_, img) => {
@@ -67,9 +77,8 @@ const parseItems = (list, tryGet) =>
module.exports = {
baseUrl,
- cookieJar,
- got,
getCookie,
+ setCookies,
parseList,
parseItems,
};