feat: 补全通用汉字规范表字符

This commit is contained in:
zhoulixiang
2024-06-09 06:56:20 +08:00
parent 4832bb1fd9
commit e2d20119f8
7 changed files with 17648 additions and 5074 deletions

View File

@@ -28,15 +28,6 @@ export function splitString(text: string): string[] {
return result;
}
// todo: 双 unicode 编码字符的适配
export function isZhChar(char: string) {
if (typeof char !== "string") {
return false;
}
let code = char.charCodeAt(0);
return code >= 19968 && code <= 40869;
}
export class FastDictFactory {
NumberDICT: string[];
StringDICT: Map<string, string>;
@@ -55,10 +46,8 @@ export class FastDictFactory {
}
}
set(word: string | number, pinyin: string) {
if (typeof word === 'number') {
this.NumberDICT[word] = pinyin;
} else if (word.length > 1) {
set(word: string, pinyin: string) {
if (word.length > 1) {
this.StringDICT.set(word, pinyin);
} else {
const code = word.charCodeAt(0);

View File

@@ -1,9 +1,5 @@
import { stringLength, isZhChar } from "@/common/utils";
import { stringLength } from "@/common/utils";
import type { SingleWordResult } from "../../common/type";
import {
DoubleUnicodePrefixReg,
DoubleUnicodeSuffixReg,
} from "@/common/constant";
import { getAllPinyin, getMultiplePinyin } from "./handle";
import { CompleteOptions } from "./index";
import {
@@ -14,6 +10,7 @@ import {
getPinyinWithoutTone,
getPinyinWithNum,
} from "./handle";
import DICT1 from "@/data/dict1";
// 验证输入是否为字符串
export const validateType = (word: unknown) => {
@@ -202,7 +199,7 @@ export const middlewareType = (
num: Number(getNumOfTone(item.originPinyin)),
isZh: item.isZh,
polyphonic,
inZhRange: isZhChar(item.origin),
inZhRange: !!DICT1.get(item.origin),
};
});
}

View File

@@ -14,7 +14,7 @@ import {
getFinalParts,
} from '@/core/pinyin/handle';
import { getCustomPolyphonicDict } from '../custom';
import { isZhChar, splitString } from '@/common/utils';
import { splitString } from '@/common/utils';
interface BasicOptions {
/**
@@ -255,7 +255,7 @@ export const handleType = (
finalTail: tail,
num: Number(getNumOfTone(item.originPinyin)),
isZh: item.isZh,
inZhRange: isZhChar(item.origin),
inZhRange: !!DICT1.get(item.origin),
};
});
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,7 @@ import {
getNumOfTone,
getPinyinWithoutTone,
} from '@/core/pinyin/handle';
import { isZhChar } from '@/common/utils';
import DICT1 from './dict1';
export const InitialList = [
'zh',
@@ -225,7 +225,7 @@ export function processToneSandhi(cur: string, pre: string, next: string) {
// 处理「了」字的变调
export function processToneSandhiLiao(cur: string, pre: string) {
if (cur === '了' && !isZhChar(pre)) {
if (cur === '了' && (!pre || !DICT1.get(pre))) {
return 'liǎo';
}
}

View File

@@ -28,4 +28,10 @@ describe('double unicode', () => {
});
expect(result5).to.be.equal('cè shì a𧒽𧒽a cè shì a𧒽𧒽a cè shì');
});
it('[double unicode]dp consecutive', () => {
const result4 = pinyin('测试𬭬𬭬测试𬭬测试');
expect(result4).to.be.equal('cè shì huì huì cè shì huì cè shì');
});
});

View File

@@ -1,11 +1,10 @@
export declare function stringLength(text: string): number;
export declare function splitString(text: string): string[];
export declare function isZhChar(char: string): boolean;
export declare class FastDictFactory {
NumberDICT: string[];
StringDICT: Map<string, string>;
constructor();
get(word: string): string;
set(word: string | number, pinyin: string): void;
set(word: string, pinyin: string): void;
clear(): void;
}