mirror of
https://github.com/DIYgod/RSSHub.git
synced 2025-12-13 00:35:57 +08:00
fix(route): cw (#11613)
This commit is contained in:
@@ -2043,11 +2043,11 @@ category 对应的关键词有
|
|||||||
|
|
||||||
### 最新上線
|
### 最新上線
|
||||||
|
|
||||||
<Route author="TonyRL" example="/cw/today" path="/cw/today" radar="1" rssbud="1"/>
|
<Route author="TonyRL" example="/cw/today" path="/cw/today" radar="1" rssbud="1" puppeteer="1"/>
|
||||||
|
|
||||||
### 主頻道
|
### 主頻道
|
||||||
|
|
||||||
<Route author="TonyRL" example="/cw/master/8" path="/cw/master/:channel" :paramsDesc="['主頻道 ID,可在 URL 中找到']" radar="1" rssbud="1">
|
<Route author="TonyRL" example="/cw/master/8" path="/cw/master/:channel" :paramsDesc="['主頻道 ID,可在 URL 中找到']" radar="1" rssbud="1" puppeteer="1">
|
||||||
|
|
||||||
| 主頻道名稱 | 主頻道 ID |
|
| 主頻道名稱 | 主頻道 ID |
|
||||||
| ----- | ------ |
|
| ----- | ------ |
|
||||||
@@ -2069,7 +2069,7 @@ category 对应的关键词有
|
|||||||
|
|
||||||
### 子頻道
|
### 子頻道
|
||||||
|
|
||||||
<Route author="TonyRL" example="/cw/sub/615" path="/cw/sub/:channel" :paramsDesc="['子頻道 ID,可在 URL 中找到']" radar="1" rssbud="1"/>
|
<Route author="TonyRL" example="/cw/sub/615" path="/cw/sub/:channel" :paramsDesc="['子頻道 ID,可在 URL 中找到']" radar="1" rssbud="1" puppeteer="1"/>
|
||||||
|
|
||||||
## 卫报 The Guardian
|
## 卫报 The Guardian
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +1,30 @@
|
|||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
|
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
|
||||||
|
|
||||||
module.exports = async (ctx) => {
|
module.exports = async (ctx) => {
|
||||||
const { channel } = ctx.params;
|
const { channel } = ctx.params;
|
||||||
const pageUrl = `${baseUrl}/masterChannel.action`;
|
const pageUrl = `${baseUrl}/masterChannel.action`;
|
||||||
if (!cookieJar) {
|
|
||||||
await getCookie();
|
const browser = await require('@/utils/puppeteer')();
|
||||||
}
|
const cookie = await getCookie(browser, ctx.cache.tryGet);
|
||||||
const { data: response } = await got(pageUrl, {
|
const page = await browser.newPage();
|
||||||
headers: {
|
await page.setRequestInterception(true);
|
||||||
Referer: baseUrl,
|
page.on('request', (request) => {
|
||||||
},
|
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||||
cookieJar,
|
|
||||||
searchParams: {
|
|
||||||
idMasterChannel: channel,
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
await setCookies(page, cookie, 'cw.com.tw');
|
||||||
|
await page.goto(`${pageUrl}?idMasterChannel=${channel}`, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await page.evaluate(() => document.documentElement.innerHTML);
|
||||||
|
await page.close();
|
||||||
const $ = cheerio.load(response);
|
const $ = cheerio.load(response);
|
||||||
|
|
||||||
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
|
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
|
||||||
const items = await parseItems(list, ctx.cache.tryGet);
|
const items = await parseItems(list, browser, ctx.cache.tryGet);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
ctx.state.data = {
|
ctx.state.data = {
|
||||||
title: $('head title').text(),
|
title: $('head title').text(),
|
||||||
|
|||||||
@@ -1,25 +1,30 @@
|
|||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
|
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
|
||||||
|
|
||||||
module.exports = async (ctx) => {
|
module.exports = async (ctx) => {
|
||||||
const { channel } = ctx.params;
|
const { channel } = ctx.params;
|
||||||
const pageUrl = `${baseUrl}/subchannel.action`;
|
const pageUrl = `${baseUrl}/subchannel.action`;
|
||||||
if (!cookieJar) {
|
|
||||||
await getCookie();
|
const browser = await require('@/utils/puppeteer')();
|
||||||
}
|
const cookie = await getCookie(browser, ctx.cache.tryGet);
|
||||||
const { data: response } = await got(pageUrl, {
|
const page = await browser.newPage();
|
||||||
headers: {
|
await page.setRequestInterception(true);
|
||||||
Referer: baseUrl,
|
page.on('request', (request) => {
|
||||||
},
|
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||||
cookieJar,
|
|
||||||
searchParams: {
|
|
||||||
idSubChannel: channel,
|
|
||||||
},
|
|
||||||
});
|
});
|
||||||
|
await setCookies(page, cookie, 'cw.com.tw');
|
||||||
|
await page.goto(`${pageUrl}?idSubChannel=${channel}`, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await page.evaluate(() => document.documentElement.innerHTML);
|
||||||
|
await page.close();
|
||||||
const $ = cheerio.load(response);
|
const $ = cheerio.load(response);
|
||||||
|
|
||||||
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
|
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 12);
|
||||||
const items = await parseItems(list, ctx.cache.tryGet);
|
const items = await parseItems(list, browser, ctx.cache.tryGet);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
ctx.state.data = {
|
ctx.state.data = {
|
||||||
title: $('head title').text(),
|
title: $('head title').text(),
|
||||||
|
|||||||
@@ -1,21 +1,29 @@
|
|||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { baseUrl, cookieJar, got, parseList, parseItems, getCookie } = require('./utils');
|
const { baseUrl, parseList, parseItems, getCookie, setCookies } = require('./utils');
|
||||||
|
|
||||||
module.exports = async (ctx) => {
|
module.exports = async (ctx) => {
|
||||||
const pageUrl = `${baseUrl}/today`;
|
const pageUrl = `${baseUrl}/today`;
|
||||||
if (!cookieJar) {
|
|
||||||
await getCookie();
|
const browser = await require('@/utils/puppeteer')();
|
||||||
}
|
const cookie = await getCookie(browser, ctx.cache.tryGet);
|
||||||
const { data: response } = await got(pageUrl, {
|
const page = await browser.newPage();
|
||||||
headers: {
|
await page.setRequestInterception(true);
|
||||||
Referer: baseUrl,
|
page.on('request', (request) => {
|
||||||
},
|
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||||
cookieJar,
|
|
||||||
});
|
});
|
||||||
|
await setCookies(page, cookie, 'cw.com.tw');
|
||||||
|
await page.goto(pageUrl, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
});
|
||||||
|
|
||||||
|
const response = await page.evaluate(() => document.documentElement.innerHTML);
|
||||||
|
await page.close();
|
||||||
const $ = cheerio.load(response);
|
const $ = cheerio.load(response);
|
||||||
|
|
||||||
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 100);
|
const list = parseList($, ctx.query.limit ? Number(ctx.query.limit) : 30);
|
||||||
const items = await parseItems(list, ctx.cache.tryGet);
|
const items = await parseItems(list, browser, ctx.cache.tryGet);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
ctx.state.data = {
|
ctx.state.data = {
|
||||||
title: $('head title').text(),
|
title: $('head title').text(),
|
||||||
|
|||||||
@@ -1,26 +1,27 @@
|
|||||||
const cheerio = require('cheerio');
|
const cheerio = require('cheerio');
|
||||||
const { parseDate } = require('@/utils/parse-date');
|
const { parseDate } = require('@/utils/parse-date');
|
||||||
const { Cookie, CookieJar } = require('tough-cookie');
|
const { getCookies, setCookies } = require('@/utils/puppeteer-utils');
|
||||||
let cookieJar;
|
let cookie;
|
||||||
const config = require('@/config').value;
|
|
||||||
|
|
||||||
const baseUrl = 'https://www.cw.com.tw';
|
const baseUrl = 'https://www.cw.com.tw';
|
||||||
|
|
||||||
const got = require('@/utils/got').extend({
|
const getCookie = async (browser, tryGet) => {
|
||||||
headers: {
|
if (!cookie) {
|
||||||
'User-Agent': config.trueUA,
|
cookie = await tryGet('cw:cookie', async () => {
|
||||||
},
|
const page = await browser.newPage();
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request) => {
|
||||||
|
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||||
|
});
|
||||||
|
await page.goto(`${baseUrl}/user/get/cookie-bar`, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
});
|
||||||
|
cookie = await getCookies(page);
|
||||||
|
await page.close();
|
||||||
|
return cookie;
|
||||||
});
|
});
|
||||||
|
|
||||||
const getCookie = async () => {
|
|
||||||
const response = await got(`${baseUrl}/user/get/cookie-bar`);
|
|
||||||
const cookies = response.headers['set-cookie'];
|
|
||||||
if (Array.isArray(cookies)) {
|
|
||||||
cookieJar = cookies.map(Cookie.parse);
|
|
||||||
} else {
|
|
||||||
cookieJar = [Cookie.parse(cookieJar)];
|
|
||||||
}
|
}
|
||||||
cookieJar = CookieJar.fromJSON({ cookies: cookieJar });
|
return cookie;
|
||||||
};
|
};
|
||||||
|
|
||||||
const parseList = ($, limit) =>
|
const parseList = ($, limit) =>
|
||||||
@@ -36,15 +37,24 @@ const parseList = ($, limit) =>
|
|||||||
})
|
})
|
||||||
.slice(0, limit);
|
.slice(0, limit);
|
||||||
|
|
||||||
const parseItems = (list, tryGet) =>
|
const parseItems = (list, browser, tryGet) =>
|
||||||
Promise.all(
|
Promise.all(
|
||||||
list.map((item) =>
|
list.map((item) =>
|
||||||
tryGet(item.link, async () => {
|
tryGet(item.link, async () => {
|
||||||
const { data: response } = await got(item.link, {
|
const page = await browser.newPage();
|
||||||
cookieJar,
|
await page.setRequestInterception(true);
|
||||||
|
page.on('request', (request) => {
|
||||||
|
request.resourceType() === 'document' || request.resourceType() === 'script' ? request.continue() : request.abort();
|
||||||
|
});
|
||||||
|
await setCookies(page, cookie, 'cw.com.tw');
|
||||||
|
await page.goto(item.link, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const response = await page.evaluate(() => document.documentElement.innerHTML);
|
||||||
|
await page.close();
|
||||||
const $ = cheerio.load(response);
|
const $ = cheerio.load(response);
|
||||||
|
|
||||||
const meta = JSON.parse($('head script[type="application/ld+json"]').eq(0).text());
|
const meta = JSON.parse($('head script[type="application/ld+json"]').eq(0).text());
|
||||||
$('.article__head .breadcrumb, .article__head h1, .article__provideViews, .ad').remove();
|
$('.article__head .breadcrumb, .article__head h1, .article__provideViews, .ad').remove();
|
||||||
$('img.lazyload').each((_, img) => {
|
$('img.lazyload').each((_, img) => {
|
||||||
@@ -67,9 +77,8 @@ const parseItems = (list, tryGet) =>
|
|||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
baseUrl,
|
baseUrl,
|
||||||
cookieJar,
|
|
||||||
got,
|
|
||||||
getCookie,
|
getCookie,
|
||||||
|
setCookies,
|
||||||
parseList,
|
parseList,
|
||||||
parseItems,
|
parseItems,
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user