mirror of
https://github.com/skishore/makemeahanzi.git
synced 2025-10-29 01:46:07 +08:00
102 lines
3.1 KiB
JavaScript
Executable File
102 lines
3.1 KiB
JavaScript
Executable File
#!/usr/local/bin/node --use_strict
|
|
"use strict";
|
|
|
|
const child_process = require('child_process');
|
|
const fs = require('fs');
|
|
const readline = require('readline');
|
|
|
|
const match = require('./../lib/matcher');
|
|
|
|
const delimiter = 'BREAK';
|
|
|
|
const util = {
|
|
add: (point1, point2) => [point1[0] + point2[0], point1[1] + point2[1]],
|
|
subtract: (point1, point2) => [point1[0] - point2[0], point1[1] - point2[1]],
|
|
};
|
|
|
|
const mapToSquare = (median) => {
|
|
return median.map((x) => [x[0], 900 - x[1]]);
|
|
}
|
|
|
|
const encode = (matcher, row) => {
|
|
const result = [];
|
|
// TODO(skishore): Figure out how to properly decode UTF-8 or -16 in
|
|
// Javascript and then use one of those encodings here instead of this hack.
|
|
const codepoint = row.character.charCodeAt(0);
|
|
result.push(codepoint & 0xff);
|
|
result.push(codepoint >> 8);
|
|
// Push the medians into the binary representation.
|
|
const medians = row.medians.map(mapToSquare);
|
|
const preprocessed = matcher.preprocess(medians);
|
|
result.push(preprocessed.length);
|
|
preprocessed.map((median) => {
|
|
result.push(median.length);
|
|
median.map((x) => result.push(x));
|
|
});
|
|
result.map((x) => { if (!(0 <= x && x < 256)) throw preprocessed; });
|
|
return new Buffer(result);
|
|
}
|
|
|
|
const dumpParts = (parts, rows) => {
|
|
for (const part in parts) {
|
|
parts[part].map((row) => {
|
|
row.dependencies = {};
|
|
Array.from(row.decomposition).map((character) => {
|
|
const data = rows[character];
|
|
if (data) {
|
|
row.dependencies[character] =
|
|
`${data.pinyin.join(', ')} - ${data.definition}`;
|
|
}
|
|
});
|
|
});
|
|
const writer = fs.createWriteStream(`public/characters/part-${part}.txt`);
|
|
writer.write(JSON.stringify(parts[part]));
|
|
writer.end();
|
|
}
|
|
}
|
|
|
|
const callback = () => {
|
|
const input = fs.createReadStream('makemeahanzi.txt');
|
|
const reader = readline.createInterface({input: input});
|
|
const writer = fs.createWriteStream('public/medians.bin');
|
|
const matcher = new match.makemeahanzi.Matcher([]);
|
|
const parts = {};
|
|
const rows = {};
|
|
reader.on('line', (line) => {
|
|
const pieces = line.trim().split(delimiter)
|
|
if (pieces.length !== 2) throw new Error(line);
|
|
const row = JSON.parse(pieces[0]);
|
|
const row2 = JSON.parse(pieces[1]);
|
|
for (let key in row2) {
|
|
row[key] = row2[key];
|
|
}
|
|
delete row.normalized_medians;
|
|
|
|
writer.write(encode(matcher, row));
|
|
const part = Math.floor(row.character.charCodeAt(0) / 256);
|
|
parts[part] = parts[part] || [];
|
|
parts[part].push(row);
|
|
rows[row.character] = row;
|
|
});
|
|
reader.on('close', () => {
|
|
writer.end();
|
|
dumpParts(parts, rows);
|
|
child_process.exec('rm makemeahanzi.txt', () => 0);
|
|
});
|
|
}
|
|
|
|
const main = () => {
|
|
// TODO(skishore): This whole function is a terrible hack!
|
|
const spacers = [];
|
|
Array.from(delimiter).slice(1).map((x) => spacers.push('/dev/null'));
|
|
const spacer = spacers.join(' ');
|
|
child_process.exec(`paste -d ${delimiter} makemeahanzi/dictionary.txt ` +
|
|
`${spacer} makemeahanzi/graphics.txt > makemeahanzi.txt`,
|
|
(error, stdout, stderr) => {
|
|
if (error != null) throw new Error(error);
|
|
callback();
|
|
});
|
|
}
|
|
|
|
main();
|