Add high-recall phonetic-semantic inference

This commit is contained in:
Shaunak Kishore
2015-11-07 02:53:04 -05:00
parent a646d34f3c
commit 57cf6ce948
4 changed files with 44 additions and 2 deletions

View File

@ -21,13 +21,16 @@ const vowels = tokenSet('a ai an ang ao e ei en eng er i ia ian iang iao ie ' +
const two_syllables = tokenSet('ia ian iang iao ie io iong iu ua uai uan ' +
'uang ue ui uo van');
pinyin_util.dropTones = (pinyin) => {
pinyin_util.dropTones = (pinyin, append_number) => {
for (let i = 0; i < pinyin.length; i++) {
for (let option = 1; option <= 4; option++) {
const index = vowel_to_tone[option].indexOf(pinyin[i]);
if (index >= 0) {
const toneless = 'aeiouv'[index];
pinyin = pinyin.substr(0, i) + toneless + pinyin.substr(i + 1);
if (append_number) {
return `${pinyin}${option}`;
}
}
}
}
@ -72,3 +75,7 @@ pinyin_util.numberedPinyinToTonePinyin = (numbered) => {
}
return consonant + vowel.replace('v', 'ü');
}
pinyin_util.tonePinyinToNumberedPinyin = (tone) => {
return pinyin_util.dropTones(tone, true /* append_number */);
}