mirror of
https://github.com/chanind/hanzi-writer-data.git
synced 2025-10-29 07:16:28 +08:00
Adding initial files
This commit is contained in:
94
stroke_data_parser.py
Normal file
94
stroke_data_parser.py
Normal file
@ -0,0 +1,94 @@
|
||||
import json
|
||||
import os
|
||||
from copy import copy
|
||||
|
||||
root = os.path.dirname(__file__)
|
||||
dictionary_file = os.path.join(root, 'vendor/makemeahanzi/dictionary.txt')
|
||||
graphics_file = os.path.join(root, 'vendor/makemeahanzi/graphics.txt')
|
||||
output_dir = os.path.join(root, 'data')
|
||||
|
||||
positioners = {
|
||||
'⿰': 2,
|
||||
'⿱': 2,
|
||||
'⿲': 3,
|
||||
'⿳': 3,
|
||||
'⿴': 2,
|
||||
'⿵': 2,
|
||||
'⿶': 2,
|
||||
'⿷': 2,
|
||||
'⿸': 2,
|
||||
'⿹': 2,
|
||||
'⿺': 2,
|
||||
'⿻': 2,
|
||||
}
|
||||
missing_marker = '?'
|
||||
|
||||
graphics_data = {}
|
||||
dict_data = {}
|
||||
|
||||
with open(dictionary_file) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
decoded_line = json.loads(line)
|
||||
dict_data[decoded_line['character']] = decoded_line
|
||||
|
||||
with open(graphics_file) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
decoded_line = json.loads(line)
|
||||
char = decoded_line.pop('character')
|
||||
graphics_data[char] = decoded_line
|
||||
|
||||
|
||||
def get_decomp_index(char, subchar):
|
||||
"Parse the decomposition tree to figure out what the index of the subchar is within the char"
|
||||
stack = []
|
||||
for piece in dict_data[char]['decomposition']:
|
||||
last_node = None
|
||||
path = []
|
||||
if len(stack) > 0:
|
||||
last_node = stack.pop()
|
||||
path = copy(last_node['path'])
|
||||
path.append(last_node['children'])
|
||||
last_node['children'] += 1
|
||||
if last_node['children'] < last_node['size']:
|
||||
stack.append(last_node)
|
||||
if piece in positioners:
|
||||
node = {
|
||||
'size': positioners[piece],
|
||||
'children': 0,
|
||||
'path': path,
|
||||
}
|
||||
stack.append(node)
|
||||
elif piece == subchar:
|
||||
return path
|
||||
return None
|
||||
|
||||
def get_radical_strokes(char):
|
||||
radical = dict_data[char]['radical']
|
||||
if char == radical:
|
||||
return None
|
||||
decomp_index = get_decomp_index(char, radical)
|
||||
if not decomp_index:
|
||||
return None
|
||||
rad_strokes = []
|
||||
for stroke_num, match_index in enumerate(dict_data[char]['matches']):
|
||||
if match_index == decomp_index:
|
||||
rad_strokes.append(stroke_num)
|
||||
return rad_strokes
|
||||
|
||||
|
||||
# write out data
|
||||
|
||||
for char in graphics_data:
|
||||
radical = get_radical_strokes(char)
|
||||
if radical:
|
||||
graphics_data[char]['radStrokes'] = radical
|
||||
|
||||
for char, data in graphics_data.items():
|
||||
out_file = os.path.join(output_dir, f'{char}.json')
|
||||
with open(out_file, 'w') as f:
|
||||
f.write(json.dumps(data, ensure_ascii=False))
|
||||
|
||||
with open(os.path.join(output_dir, 'all.json'), 'w') as f:
|
||||
f.write(json.dumps(graphics_data, ensure_ascii=False))
|
||||
Reference in New Issue
Block a user