feat(route): douyin hashtag (#9905)

This commit is contained in:
Tony
2022-06-07 00:41:20 +11:00
committed by GitHub
parent 3aa43220fa
commit daaa31771b
4 changed files with 151 additions and 11 deletions

View File

@@ -850,16 +850,6 @@ YouTube 官方亦有提供频道 RSS形如 <https://www.youtube.com/feeds/vid
## 抖音
### 博主
<Route author="Max-Tortoise Rongronggg9" example="/douyin/user/MS4wLjABAAAARcAHmmF9mAG3JEixq_CdP72APhBlGlLVbN-1eBcPqao" path="/douyin/user/:uid/:routeParams?" :paramsDesc="['uid可在用户页面 URL 中找到', '额外参数query string 格式,请参阅下面的表格']" anticrawler="1" radar="1" rssbud="1" puppeteer="1">
| 键 | 含义 | 值 | 默认值 |
| -------- | ----------------------------------- | ---------------------- | ------- |
| `embed` | 是否启用内嵌视频 | `0`/`1`/`true`/`false` | `false` |
| `iframe` | 是否启用 iframe 变通解决方案,仅在内嵌视频开启时有效,详见下文 | `0`/`1`/`true`/`false` | `false` |
| `relay` | 视频反代服务的 URL仅在内嵌视频开启时有效详见下文 | | |
::: warning 注意
反爬严格,需要启用 puppeteer。\
@@ -875,7 +865,21 @@ YouTube 官方亦有提供频道 RSS形如 <https://www.youtube.com/feeds/vid
:::
</Route>
额外参数
| 键 | 含义 | 值 | 默认值 |
| -------- | ----------------------------------- | ---------------------- | ------- |
| `embed` | 是否启用内嵌视频 | `0`/`1`/`true`/`false` | `false` |
| `iframe` | 是否启用 iframe 变通解决方案,仅在内嵌视频开启时有效,详见下文 | `0`/`1`/`true`/`false` | `false` |
| `relay` | 视频反代服务的 URL仅在内嵌视频开启时有效详见下文 | | |
### 博主
<Route author="Max-Tortoise Rongronggg9" example="/douyin/user/MS4wLjABAAAARcAHmmF9mAG3JEixq_CdP72APhBlGlLVbN-1eBcPqao" path="/douyin/user/:uid/:routeParams?" :paramsDesc="['uid可在用户页面 URL 中找到', '额外参数query string 格式,请参阅上面的表格']" anticrawler="1" radar="1" rssbud="1" puppeteer="1" />
### 标签
<Route author="TonyRL" example="/douyin/hashtag/1592824105719812" path="/douyin/hashtag/:cid/:routeParams?" :paramsDesc="['标签 ID可在标签页面 URL 中找到', '额外参数query string 格式,请参阅上面的表格']" anticrawler="1" radar="1" rssbud="1" puppeteer="1" />
## 豆瓣

129
lib/v2/douyin/hashtag.js Normal file
View File

@@ -0,0 +1,129 @@
const cheerio = require('cheerio');
const { parseDate } = require('@/utils/parse-date');
const { art } = require('@/utils/render');
const path = require('path');
const config = require('@/config').value;
const { fallback, queryToBoolean } = require('@/utils/readable-social');
const templates = {
desc: path.join(__dirname, 'templates/desc.art'),
cover: path.join(__dirname, 'templates/cover.art'),
embed: path.join(__dirname, 'templates/embed.art'),
iframe: path.join(__dirname, 'templates/iframe.art'),
};
const resolveUrl = (url, tls = true, forceResolve = false) => {
if (!url) {
return '';
}
if (url.startsWith('//')) {
return (tls ? 'https:' : 'http:') + url;
}
if (forceResolve && !url.match(/^https?:\/\//)) {
return (tls ? 'https://' : 'http://') + url;
}
return url;
};
const proxyVideo = (url, proxy) => {
if (!(url && proxy)) {
return url + '';
}
if (proxy.includes('?')) {
if (!proxy.endsWith('=')) {
proxy += '=';
}
return proxy + encodeURIComponent(url);
} else {
if (!proxy.endsWith('/')) {
proxy += '/';
}
return proxy + url;
}
};
const getOriginAvatar = (url) =>
resolveUrl(url)
.replace(/^(.*\.douyinpic\.com\/).*(\/aweme-avatar\/)([^?]*)(\?.*)?$/, '$1origin$2$3')
.replace(/~\w+_\d+x\d+/g, '');
module.exports = async (ctx) => {
const { cid } = ctx.params;
if (isNaN(cid)) {
throw Error('Invalid tag ID. Tag ID should be a number.');
}
const routeParams = Object.fromEntries(new URLSearchParams(ctx.params.routeParams));
const embed = fallback(undefined, queryToBoolean(routeParams.embed), false); // embed video
const iframe = fallback(undefined, queryToBoolean(routeParams.iframe), false); // embed video in iframe
const relay = resolveUrl(routeParams.relay, true, true); // embed video behind a reverse proxy
const tagUrl = `https://www.douyin.com/hashtag/${cid}`;
const tagData = await ctx.cache.tryGet(
`douyin:hashtag:${cid}`,
async () => {
const browser = await require('@/utils/puppeteer')();
const page = await browser.newPage();
await page.goto(tagUrl, {
waitUntil: 'domcontentloaded',
});
const html = await page.evaluate(() => document.documentElement.innerHTML);
await browser.close();
const $ = cheerio.load(html);
const renderData = JSON.parse(decodeURIComponent($('script#RENDER_DATA').text()));
const dataKey = Object.keys(renderData).find((key) => renderData[key].topicDetail && renderData[key].defaultData);
return renderData[dataKey];
},
config.cache.routeExpire,
false
);
const tagInfo = tagData.topicDetail;
const tagName = tagInfo.chaName;
const userAvatar = getOriginAvatar(tagInfo.hashtagProfile);
const posts = tagData.defaultData;
const items = posts.map((post) => {
// parse video
let videoList = post.video && post.video.bitRateList && post.video.bitRateList.map((item) => resolveUrl(item.playApi));
if (relay) {
videoList = videoList.map((item) => proxyVideo(item, relay));
}
let duration = post.video && post.video.duration;
duration = duration && duration / 1000;
let img;
// if (!embed) {
// img = post.video && post.video.dynamicCover; // dynamic cover (webp)
// }
img =
img ||
(post.video &&
((post.video.coverUrlList && post.video.coverUrlList[post.video.coverUrlList.length - 1]) || // HD
post.video.originCover || // LD
post.video.cover)); // even more LD
img = img && resolveUrl(img);
// render description
const desc = post.desc && post.desc.replace(/\n/g, '<br>');
let media = art(embed && videoList ? templates.embed : templates.cover, { img, videoList, duration });
media = embed && videoList && iframe ? art(templates.iframe, { content: media }) : media; // warp in iframe
const description = art(templates.desc, { desc, media });
return {
title: post.desc,
description,
link: `https://www.douyin.com/video/${post.awemeId}`,
pubDate: parseDate(post.createTime * 1000),
category: post.textExtra.map((extra) => extra.hashtagName),
};
});
ctx.state.data = {
title: tagName,
description: `${tagInfo.viewCount} 次播放`,
image: userAvatar,
link: tagUrl,
item: items,
};
};

View File

@@ -2,6 +2,12 @@ module.exports = {
'douyin.com': {
_name: '抖音',
'.': [
{
title: '标签',
docs: 'https://docs.rsshub.app/social-media.html#dou-yin',
source: '/hashtag/:cid',
target: '/douyin/hashtag/:cid',
},
{
title: '博主',
docs: 'https://docs.rsshub.app/social-media.html#dou-yin',

View File

@@ -1,3 +1,4 @@
module.exports = function (router) {
router.get('/hashtag/:cid/:routeParams?', require('./hashtag'));
router.get('/user/:uid/:routeParams?', require('./user'));
};