Files
2016-02-09 02:28:08 -05:00

102 lines
3.1 KiB
JavaScript
Executable File

#!/usr/local/bin/node --use_strict
"use strict";
const child_process = require('child_process');
const fs = require('fs');
const readline = require('readline');
const match = require('./../lib/matcher');
const delimiter = 'BREAK';
const util = {
add: (point1, point2) => [point1[0] + point2[0], point1[1] + point2[1]],
subtract: (point1, point2) => [point1[0] - point2[0], point1[1] - point2[1]],
};
const mapToSquare = (median) => {
return median.map((x) => [x[0], 900 - x[1]]);
}
const encode = (matcher, row) => {
const result = [];
// TODO(skishore): Figure out how to properly decode UTF-8 or -16 in
// Javascript and then use one of those encodings here instead of this hack.
const codepoint = row.character.charCodeAt(0);
result.push(codepoint & 0xff);
result.push(codepoint >> 8);
// Push the medians into the binary representation.
const medians = row.medians.map(mapToSquare);
const preprocessed = matcher.preprocess(medians);
result.push(preprocessed.length);
preprocessed.map((median) => {
result.push(median.length);
median.map((x) => result.push(x));
});
result.map((x) => { if (!(0 <= x && x < 256)) throw preprocessed; });
return new Buffer(result);
}
const dumpParts = (parts, rows) => {
for (const part in parts) {
parts[part].map((row) => {
row.dependencies = {};
Array.from(row.decomposition).map((character) => {
const data = rows[character];
if (data) {
row.dependencies[character] =
`${data.pinyin.join(', ')} - ${data.definition}`;
}
});
});
const writer = fs.createWriteStream(`public/characters/part-${part}.txt`);
writer.write(JSON.stringify(parts[part]));
writer.end();
}
}
const callback = () => {
const input = fs.createReadStream('makemeahanzi.txt');
const reader = readline.createInterface({input: input});
const writer = fs.createWriteStream('public/medians.bin');
const matcher = new match.makemeahanzi.Matcher([]);
const parts = {};
const rows = {};
reader.on('line', (line) => {
const pieces = line.trim().split(delimiter)
if (pieces.length !== 2) throw new Error(line);
const row = JSON.parse(pieces[0]);
const row2 = JSON.parse(pieces[1]);
for (let key in row2) {
row[key] = row2[key];
}
delete row.normalized_medians;
writer.write(encode(matcher, row));
const part = Math.floor(row.character.charCodeAt(0) / 256);
parts[part] = parts[part] || [];
parts[part].push(row);
rows[row.character] = row;
});
reader.on('close', () => {
writer.end();
dumpParts(parts, rows);
child_process.exec('rm makemeahanzi.txt', () => 0);
});
}
const main = () => {
// TODO(skishore): This whole function is a terrible hack!
const spacers = [];
Array.from(delimiter).slice(1).map((x) => spacers.push('/dev/null'));
const spacer = spacers.join(' ');
child_process.exec(`paste -d ${delimiter} makemeahanzi/dictionary.txt ` +
`${spacer} makemeahanzi/graphics.txt > makemeahanzi.txt`,
(error, stdout, stderr) => {
if (error != null) throw new Error(error);
callback();
});
}
main();