mirror of
https://github.com/skishore/makemeahanzi.git
synced 2025-11-07 08:16:13 +08:00
Drop arch_radicals and stroke count column
This commit is contained in:
@ -16,9 +16,6 @@ def MutableNamedTuple(name, fields):
|
||||
tostr(self.__dict__[key]) for key in fields))
|
||||
return TemporaryClass
|
||||
|
||||
RADICAL_VARIANTS_TO_SKIP = (u'𠆢', u'𠘨')
|
||||
SIMPLIFIED_RADICALS_TO_SKIP = (27,)
|
||||
|
||||
def in_cjk_block(character):
|
||||
if not (len(character) == 1 and 0x4e00 <= ord(character) <= 0x9fff):
|
||||
print '%s is U+%s' % (character, hex(ord(character))[2:].upper())
|
||||
@ -31,64 +28,21 @@ with open('scripts/glyphs') as f:
|
||||
glyph_set = set(glyphs)
|
||||
assert(len(glyphs) == len(glyph_set) == 6763)
|
||||
|
||||
ArchRadical = MutableNamedTuple(
|
||||
'Radical', ['number', 'character', 'definition', 'pinyin', 'strokes'])
|
||||
|
||||
with open('scripts/arch_radicals') as f:
|
||||
rows = [line.strip().decode('utf8').split(' ') for line in f.readlines()]
|
||||
arch_radicals = [ArchRadical(*row) for row in rows]
|
||||
arch_radical_map = dict((radical.character, radical)
|
||||
for radical in arch_radicals)
|
||||
assert(len(arch_radicals) == len(arch_radical_map) == 214)
|
||||
|
||||
Radical = MutableNamedTuple(
|
||||
'Radical', ['number', 'character', 'definition', 'pinyin',
|
||||
'strokes', 'traditional', 'variants'])
|
||||
Radical = MutableNamedTuple('Radical', ['number', 'character', 'definition',
|
||||
'pinyin', 'traditional', 'variants'])
|
||||
|
||||
with open('scripts/radicals') as f:
|
||||
rows = [line[:-1].decode('utf8').split('\t') for line in f.readlines()[1:]]
|
||||
all_radicals = [Radical(*row) for row in rows]
|
||||
radicals = []
|
||||
for radical in all_radicals:
|
||||
if not radicals or radical.number != radicals[-1].number:
|
||||
radicals.append(radical)
|
||||
radical_map = dict((radical.character, radical)
|
||||
for radical in radicals)
|
||||
assert(len(radicals) == len(radical_map) == 214)
|
||||
|
||||
print 'Homogenizing Arch radicals:'
|
||||
for radical in arch_radicals:
|
||||
radical.number = int(radical.number)
|
||||
radical.variants = ''
|
||||
if ' ' in radical.strokes:
|
||||
index = radical.strokes.find(' ')
|
||||
radical.variants = radical.strokes[index + 1:]
|
||||
radical.strokes = radical.strokes[:index]
|
||||
radical.strokes = int(radical.strokes)
|
||||
if radical.variants.startswith('('):
|
||||
assert(radical.variants.endswith(')'))
|
||||
radical.traditional = radical.variants[1:-1]
|
||||
radical.variants = ''
|
||||
else:
|
||||
radical.traditional = None
|
||||
if radical.variants:
|
||||
radical.variants = tuple(sorted(radical.variants.split()))
|
||||
else:
|
||||
radical.variants = ()
|
||||
in_cjk_block(radical.character)
|
||||
if radical.traditional is not None:
|
||||
in_cjk_block(radical.traditional)
|
||||
[in_cjk_block(variant) for variant in radical.variants]
|
||||
assert(radical.definition)
|
||||
assert(radical.pinyin)
|
||||
radicals = [Radical(*row) for row in rows]
|
||||
radical_map = dict((radical.character, radical) for radical in radicals)
|
||||
assert(len(radicals) == len(radical_map) == 216)
|
||||
|
||||
print 'Homogenizing derived radicals:'
|
||||
for radical in radicals:
|
||||
radical.number = int(radical.number)
|
||||
radical.strokes = int(radical.strokes)
|
||||
radical.traditional = radical.traditional or None
|
||||
radical.variants = \
|
||||
tuple(radical.variants.split(',')) if radical.variants else ()
|
||||
tuple(sorted(radical.variants.split(','))) if radical.variants else ()
|
||||
in_cjk_block(radical.character)
|
||||
if radical.traditional is not None:
|
||||
in_cjk_block(radical.traditional)
|
||||
@ -96,26 +50,6 @@ for radical in radicals:
|
||||
assert(radical.definition)
|
||||
assert(radical.pinyin)
|
||||
|
||||
for (arch_radical, radical) in zip(arch_radicals, radicals):
|
||||
assert(arch_radical.number == radical.number)
|
||||
if arch_radical.character != radical.character:
|
||||
print 'Different characters for radical %s: %s vs. %s' % (
|
||||
arch_radical.number, arch_radical.character, radical.character)
|
||||
if arch_radical.definition != radical.definition:
|
||||
print 'Different definitions for radical %s: "%s" vs. "%s"' % (
|
||||
arch_radical.number, arch_radical.definition, radical.definition)
|
||||
if arch_radical.pinyin != radical.pinyin:
|
||||
print 'Different pronunciation for radical %s: "%s" vs. "%s"' % (
|
||||
arch_radical.number, arch_radical.pinyin, radical.pinyin)
|
||||
if arch_radical.traditional != radical.traditional:
|
||||
print 'Different variants for radical %s: "%s" vs. "%s"' % (
|
||||
arch_radical.number, arch_radical.traditional, radical.traditional)
|
||||
if arch_radical.variants != radical.variants:
|
||||
print 'Different variants for radical %s: (%s) vs. (%s)' % (
|
||||
arch_radical.number,
|
||||
', '.join(variant.encode('utf8') for variant in arch_radical.variants),
|
||||
', '.join(variant.encode('utf8') for variant in radical.variants))
|
||||
|
||||
Decomposition = MutableNamedTuple(
|
||||
'Decomposition', ['character', 'strokes', 'type', 'part1', 'strokes1',
|
||||
'warning1', 'part2', 'strokes2', 'warning2',
|
||||
|
||||
Reference in New Issue
Block a user