const got = require('@/utils/got');
const cheerio = require('cheerio');
const fetchPageHtml = async (linkPath, cacheKey, cache) => {
const url = `https://mbasic.facebook.com${linkPath}`;
return cache.tryGet(cacheKey, async () => {
const { data: html } = await got.get(url);
return html;
});
};
const parseStoryPage = async (linkPath, cache) => {
const { searchParams: q } = new URL('https://mbasic.facebook.com' + linkPath);
const storyFbId = q.get('story_fbid');
const storyId = q.get('id');
const cacheKey = `story/${storyFbId}/${storyId}`;
const html = await fetchPageHtml(linkPath, cacheKey, cache);
const $ = cheerio.load(html);
const url = `https://www.facebook.com/story.php?story_fbid=${storyFbId}&id=${storyId}`;
const $story = $('#m_story_permalink_view').first();
const $box = $story.find('div > div > div > div').eq(0);
const $header = $box.find('header').eq(0);
const $content = $box.find('div > div').eq(0);
const $attach = $box.find('div > div').eq(1);
const attachLinkList = $attach
.find('a')
.toArray()
.map((a) => $(a).attr('href'));
const isAttachAreImageSet = attachLinkList.filter((link) => new RegExp('/photos/').test(link)).length === attachLinkList.length;
const title = $header.find('h3').text();
let content = '';
if ($content.find('p').length === 0) {
$content.find('br').replaceWith('\n');
content = $content.text();
} else {
const $ps = $content.find('p');
$ps.find('br').replaceWith('\n');
content = $ps
.toArray()
.map((p) => $(p).text())
.join('\n');
}
let images = [];
if (isAttachAreImageSet) {
images = await Promise.all(attachLinkList.map((link) => parsePhotoPage(link, cache)));
}
return {
url,
title,
content,
images,
};
};
const parsePhotoPage = async (linkPath, cache) => {
const { pathname } = new URL('https://mbasic.facebook.com' + linkPath);
const cacheKey = `photos${pathname}`;
const html = await fetchPageHtml(linkPath, cacheKey, cache);
const $ = cheerio.load(html);
const title = $('#MPhotoContent div.msg > a > strong').first().text();
const url = `https://www.facebook.com${pathname}`;
const $content = $('#MPhotoContent div.msg > div');
$content.find('br').replaceWith('\n');
const content = $content.text();
const image = $('#MPhotoContent div.desc.attachment > span > div > span > a[target=_blank].sec').attr('href');
return {
title,
url,
content,
image,
};
};
module.exports = async (ctx) => {
const { id } = ctx.params;
const pageId = encodeURIComponent(id);
const linkPath = `/${pageId}`;
const html = await fetchPageHtml(linkPath, pageId, ctx.cache);
const $ = cheerio.load(html);
const itemLinks = $('footer > div:nth-child(2) > a:nth-child(1)')
.toArray()
.map((a) => $(a).attr('href'));
const items = await Promise.all(
itemLinks.map(async (itemLink) => {
if (new RegExp(`^/.+/photos/`).test(itemLink)) {
const data = await parsePhotoPage(itemLink, ctx.cache);
return {
title: data.title,
link: data.url,
description: `
${data.content.replace(/\n/g, '
')}`,
};
}
if (new RegExp(`^/story.php`).test(itemLink)) {
const data = await parseStoryPage(itemLink, ctx.cache);
const isSingleImageStory = data.images.length === 1;
const isEmptyImageList = data.images.length === 0;
let desc = '';
desc += data.images.map((image) => `
${image.content.replace(/\n/g, '
')}`).join('
');
if (!isSingleImageStory) {
!isEmptyImageList && (desc += '
');
desc += data.content.replace(/\n/g, '
');
}
return {
title: data.title,
link: data.url,
description: desc,
};
}
})
);
ctx.state.data = {
title: $('#m-timeline-cover-section h1 span').text(),
link: `https://www.facebook.com/${pageId}`,
description: $('#sub_profile_pic_content>div>div:nth-child(3) div>span').find('br').replaceWith('\n').text(),
item: items.filter((item) => !!item),
};
};