// Warning: The author knows nothing about javascript!
// params:
// count? : count of new-books
const got = require('@/utils/got'); // get web content
const cheerio = require('cheerio'); // html parser
const he = require('he');
const base_url = 'https://www.aozora.gr.jp/index_pages/';
module.exports = async (ctx) => {
// get the update list
const response = await got({
method: 'get',
url: base_url + 'whatsnew1.html',
});
const data = response.data; // content is html format
const $ = cheerio.load(data);
const list = $('table.list tr');
// get how many new-books. amount in this page is 50
let count = parseInt(ctx.params.count);
if (Number.isNaN(count) || count < 1) {
count = 10; // default count of new-book list
} else if (count > 50) {
count = 50;
}
// parse book urls
const detail_urls = [];
for (let i = 1; i < count + 1; ++i) {
// i = 1: first tr is table title, ignore
const link =
base_url +
$(list[i])
.find('a')
.attr('href');
detail_urls.push(link);
}
// get book-cards
const responses = await Promise.all(detail_urls.map((url) => got(url)));
const cards = responses.map(({ data }) => data);
// get real data to feed
const book_list = [];
for (let i = 0; i < count; ++i) {
const $ = cheerio.load(cards[i]);
const link = $('meta[property="og:url"]').attr('content');
const link_dir = link.replace(/\/[^/]*$/g, '/');
const title_info = $('table[summary="タイトルデータ"] > tbody > tr');
let author = '';
let title = '';
let title_sub = '';
for (let j = 0; j < title_info.length; ++j) {
const tmp = he.decode($(title_info[j]).html()); // should convert from escaped to unicode
if (tmp.includes('作品名:')) {
title = $(title_info[j])
.find('td:nth-child(2)')
.text();
}
if (tmp.includes('副題:')) {
title_sub = $(title_info[j])
.find('td:nth-child(2)')
.text();
}
if (tmp.includes('著者名:')) {
author = $(title_info[j])
.find('td:nth-child(2)')
.text();
}
}
if (title_sub !== '') {
title += ' —— ' + title_sub;
}
const pub_date_raw = $('table[summary="底本データ"] > tbody > tr:nth-child(3) > td:nth-child(2)').text();
const pub_date_num = pub_date_raw.replace(/(.*)|日/g, '').replace(/[年月]/g, '-');
const pub_date = new Date(pub_date_num).toUTCString();
const full_text_relative_link = $('table.download > tbody > tr:nth-child(3) > td:nth-child(3) > a').attr('href');
const full_text_link = link_dir + full_text_relative_link;
const full_text_link_html = 'いますぐXHTML版で読む
';
const summury = $('table[summary="作品データ"]')
.html()
.replace(/href="/g, 'href="' + link_dir);
const item = {
title: title,
author: author,
pubDate: pub_date,
link: link,
description: full_text_link_html + summury,
};
book_list.push(item);
}
// feed the data
ctx.state.data = {
title: '青空文庫新着リスト',
link: base_url + 'whatsnew1.html',
item: book_list,
};
};