#!/usr/local/bin/node --use_strict "use strict"; const child_process = require('child_process'); const fs = require('fs'); const readline = require('readline'); const match = require('./../lib/matcher'); const delimiter = 'BREAK'; const util = { add: (point1, point2) => [point1[0] + point2[0], point1[1] + point2[1]], subtract: (point1, point2) => [point1[0] - point2[0], point1[1] - point2[1]], }; const mapToSquare = (median) => { return median.map((x) => [x[0], 900 - x[1]]); } const encode = (matcher, row) => { const result = []; // TODO(skishore): Figure out how to properly decode UTF-8 or -16 in // Javascript and then use one of those encodings here instead of this hack. const codepoint = row.character.charCodeAt(0); result.push(codepoint & 0xff); result.push(codepoint >> 8); // Push the medians into the binary representation. const medians = row.medians.map(mapToSquare); const preprocessed = matcher.preprocess(medians); result.push(preprocessed.length); preprocessed.map((median) => { result.push(median.length); median.map((x) => result.push(x)); }); result.map((x) => { if (!(0 <= x && x < 256)) throw preprocessed; }); return new Buffer(result); } const dumpParts = (parts, rows) => { for (const part in parts) { parts[part].map((row) => { row.dependencies = {}; Array.from(row.decomposition).map((character) => { const data = rows[character]; if (data) { row.dependencies[character] = `${data.pinyin.join(', ')} - ${data.definition}`; } }); }); const writer = fs.createWriteStream(`public/characters/part-${part}.txt`); writer.write(JSON.stringify(parts[part])); writer.end(); } } const callback = () => { const input = fs.createReadStream('makemeahanzi.txt'); const reader = readline.createInterface({input: input}); const writer = fs.createWriteStream('public/medians.bin'); const matcher = new match.makemeahanzi.Matcher([]); const parts = {}; const rows = {}; reader.on('line', (line) => { const pieces = line.trim().split(delimiter) if (pieces.length !== 2) throw new Error(line); const row = JSON.parse(pieces[0]); const row2 = JSON.parse(pieces[1]); for (let key in row2) { row[key] = row2[key]; } delete row.normalized_medians; writer.write(encode(matcher, row)); const part = Math.floor(row.character.charCodeAt(0) / 256); parts[part] = parts[part] || []; parts[part].push(row); rows[row.character] = row; }); reader.on('close', () => { writer.end(); dumpParts(parts, rows); child_process.exec('rm makemeahanzi.txt', () => 0); }); } const main = () => { // TODO(skishore): This whole function is a terrible hack! const spacers = []; Array.from(delimiter).slice(1).map((x) => spacers.push('/dev/null')); const spacer = spacers.join(' '); child_process.exec(`paste -d ${delimiter} makemeahanzi/dictionary.txt ` + `${spacer} makemeahanzi/graphics.txt > makemeahanzi.txt`, (error, stdout, stderr) => { if (error != null) throw new Error(error); callback(); }); } main();