diff --git a/lib/common/type.ts b/lib/common/type.ts
index 2163c24..e1ba167 100644
--- a/lib/common/type.ts
+++ b/lib/common/type.ts
@@ -5,6 +5,7 @@ export interface SingleWordResult {
result: string;
isZh: boolean;
delete?: boolean;
+ traditional?: string;
}
// toneType 属性可选参数
@@ -72,4 +73,10 @@ export type CommonOptions = {
* @value standard:不将 `y`、`w` 视为声母
*/
initialPattern?: InitialPattern;
+ /**
+ * @description 是否启用繁体字模式(可以更好地识别繁体字)
+ * @value false:不启用繁体字模式 (默认值)
+ * @value true:启用繁体字模式
+ */
+ traditional?: boolean;
};
diff --git a/lib/core/html/index.ts b/lib/core/html/index.ts
index d935645..464ef81 100644
--- a/lib/core/html/index.ts
+++ b/lib/core/html/index.ts
@@ -1,6 +1,12 @@
import { pinyin } from "@/core/pinyin";
+import type { BasicOptions } from "../../core/pinyin";
-interface HtmlOptions {
+type HtmlBaseOptions = Pick<
+ BasicOptions,
+ "toneType" | "v" | "toneSandhi" | "segmentit" | "traditional" | "surname" | "mode"
+>;
+
+interface HtmlOptions extends HtmlBaseOptions {
/**
* @description html 结果中每个字+拼音外层 span 标签的类名。默认为 py-result-item
*/
@@ -21,35 +27,18 @@ interface HtmlOptions {
* @description html 非汉字字符外层 span 标签的类名,仅当 wrapNonChinese 为 true 时生效。默认为 py-non-chinese-item
*/
nonChineseClass?: string;
- /**
- * @description 拼音上是否标注音调
- */
- toneType?: "symbol" | "num" | "none";
/**
* @description 对于指定的汉字及字符,在 result 上额外补充的拼音
*/
customClassMap?: {
[classname: string]: string[];
};
- /**
- * @description 是否开启「一」和 「不」字的变调。默认开启。参考:https://zh.wiktionary.org/wiki/Appendix:%E2%80%9C%E4%B8%80%E2%80%9D%E5%8F%8A%E2%80%9C%E4%B8%8D%E2%80%9D%E7%9A%84%E5%8F%98%E8%B0%83
- * @value true:开启
- * @value false:不开启
- */
- toneSandhi?: boolean;
/**
* @description 是否保留 标签,默认为保留
* @value true:保留
* @value false:移除
*/
rp?: boolean;
- /**
- * @description 对于 ü 的返回是否转换成 v(仅在 toneType: none 启用时生效)
- * @value false:返回值中保留 ü (默认值)
- * @value true:返回值中 ü 转换成 v
- * @value string:返回值中 ü 转换成指定字符
- */
- v?: boolean | string;
}
const DefaultHtmlOptions: HtmlOptions = {
@@ -63,6 +52,7 @@ const DefaultHtmlOptions: HtmlOptions = {
toneSandhi: true,
rp: true,
v: false,
+ traditional: false,
};
/**
@@ -78,9 +68,7 @@ export const html = (text: string, options?: HtmlOptions) => {
} as Required;
const pinyinArray = pinyin(text, {
type: "all",
- toneType: completeOptions.toneType,
- toneSandhi: options?.toneSandhi,
- v: completeOptions.v,
+ ...completeOptions,
});
const result = pinyinArray.map((item) => {
let additionalClass = "";
diff --git a/lib/core/pinyin/handle.ts b/lib/core/pinyin/handle.ts
index 7a0e3a3..58fe23a 100644
--- a/lib/core/pinyin/handle.ts
+++ b/lib/core/pinyin/handle.ts
@@ -18,6 +18,7 @@ import {
} from "../../common/segmentit";
import { Priority } from "@/common/constant";
import { splitString } from "@/common/utils";
+import { getTraditionalDict } from "../traditional";
/**
* @description: 获取单个字符的拼音
@@ -31,13 +32,30 @@ export const getSingleWordPinyin: GetSingleWordPinyin = (char) => {
return pinyin ? pinyin.split(" ")[0] : char;
};
+const getTraditionalWords = (word: string): string => {
+ const traditionalWords: string[] = [];
+ const traditionalDict = getTraditionalDict();
+ for (let i = 0; i < word.length; i++) {
+ const key = word[i];
+ const code = key.charCodeAt(0);
+ if (traditionalDict[code]) {
+ traditionalWords[i] = traditionalDict[code];
+ } else {
+ traditionalWords[i] = key;
+ }
+ }
+ return traditionalWords.join("");
+};
+
export const getPinyin = (
word: string,
list: SingleWordResult[],
surname: SurnameMode,
- segmentit: TokenizationAlgorithm
+ segmentit: TokenizationAlgorithm,
+ traditional?: boolean,
): { list: SingleWordResult[]; matches: MatchPattern[] } => {
- const matches = acTree.search(word, surname, segmentit);
+ const searchWord = traditional ? getTraditionalWords(word) : word;
+ const matches = acTree.search(searchWord, surname, segmentit);
let matchIndex = 0;
const zhChars = splitString(word);
for (let i = 0; i < zhChars.length; ) {
@@ -45,6 +63,7 @@ export const getPinyin = (
if (match && i === match.index) {
if (match.length === 1 && match.priority <= Priority.Normal) {
const char = zhChars[i];
+ match.zh = char;
let pinyin: string = "";
pinyin = processSepecialPinyin(char, zhChars[i - 1], zhChars[i + 1]);
list[i] = {
@@ -59,10 +78,12 @@ export const getPinyin = (
}
const pinyins = match.pinyin.split(" ");
let pinyinIndex = 0;
+ if (traditional) {
+ match.zh = zhChars.slice(match.index, match.index + match.length).join("");
+ }
for (let j = 0; j < match.length; j++) {
- const zhChars = splitString(match.zh);
list[i + j] = {
- origin: zhChars[j],
+ origin: zhChars[j + match.index],
result: pinyins[pinyinIndex] || "",
isZh: true,
originPinyin: pinyins[pinyinIndex] || "",
@@ -121,7 +142,7 @@ export const getAllPinyin: GetAllPinyin = (char, surname = "off") => {
const surnamePinyin = Surnames[char];
if (surnamePinyin) {
pinyin = [surnamePinyin].concat(
- pinyin.filter((py) => py !== surnamePinyin)
+ pinyin.filter((py) => py !== surnamePinyin),
);
}
}
@@ -135,7 +156,7 @@ export const getAllPinyin: GetAllPinyin = (char, surname = "off") => {
*/
type GetMultiplePinyin = (
word: string,
- surname?: SurnameMode
+ surname?: SurnameMode,
) => SingleWordResult[];
const getMultiplePinyin: GetMultiplePinyin = (word, surname = "off") => {
let pinyin = getAllPinyin(word, surname);
@@ -166,7 +187,7 @@ const getMultiplePinyin: GetMultiplePinyin = (word, surname = "off") => {
*/
type GetInitialAndFinal = (
pinyin: string,
- initialPattern?: InitialPattern
+ initialPattern?: InitialPattern,
) => {
final: string;
initial: string;
diff --git a/lib/core/pinyin/index.ts b/lib/core/pinyin/index.ts
index dc630c5..062716c 100644
--- a/lib/core/pinyin/index.ts
+++ b/lib/core/pinyin/index.ts
@@ -163,7 +163,7 @@ function pinyin(word: string, options?: OptionsReturnAll): AllData[];
*/
function pinyin(
word: string,
- options?: CompleteOptions
+ options?: CompleteOptions,
): string | string[] | AllData[] {
options = { ...DEFAULT_OPTIONS, ...(options || {}) };
// 校验 word 类型是否正确
@@ -203,7 +203,8 @@ function pinyin(
word,
_list,
options.surname as SurnameMode,
- options.segmentit as TokenizationAlgorithm
+ options.segmentit as TokenizationAlgorithm,
+ options.traditional,
);
// 一和不变调处理
diff --git a/lib/core/segment/index.ts b/lib/core/segment/index.ts
index 93122d2..e40a8b0 100644
--- a/lib/core/segment/index.ts
+++ b/lib/core/segment/index.ts
@@ -1,4 +1,4 @@
-import { BasicOptions } from "../pinyin";
+import type { BasicOptions } from "../pinyin";
import { TokenizationAlgorithm } from "../../common/segmentit";
import { stringLength } from "@/common/utils";
import { middleWareNonZh, middlewareToneSandhi, middlewareToneType, middlewareV, validateType } from "@/core/pinyin/middlewares";
@@ -8,7 +8,7 @@ import { middlewareOutputFormat, middlewareSegment, Output, OutputFormat } from
type SegmentBaseOptions = Pick<
BasicOptions,
- "toneType" | "mode" | "surname" | "nonZh" | "v" | "toneSandhi" | "segmentit"
+ "toneType" | "mode" | "surname" | "nonZh" | "v" | "toneSandhi" | "segmentit" | "traditional"
>;
interface AllSegmentReturnOptions extends SegmentBaseOptions {
@@ -103,6 +103,7 @@ const DEFAULT_OPTIONS: SegmentCompleteOptions = {
toneSandhi: true,
segmentit: TokenizationAlgorithm.MaxProbability,
format: OutputFormat.AllSegment,
+ traditional: false,
};
export function segment(word: string, options?: AllSegmentReturnOptions): Output['AllSegment'];
@@ -138,7 +139,8 @@ export function segment(word: string, options?: SegmentCompleteOptions) {
word,
_list,
options.surname as SurnameMode,
- options.segmentit as TokenizationAlgorithm
+ options.segmentit as TokenizationAlgorithm,
+ options.traditional as boolean
);
// 一和不变调处理
diff --git a/lib/core/traditional/index.ts b/lib/core/traditional/index.ts
new file mode 100644
index 0000000..ab4fdcb
--- /dev/null
+++ b/lib/core/traditional/index.ts
@@ -0,0 +1,13 @@
+const traditionalDict: string[] = [];
+
+export function addTraditionalDict(dict: Record) {
+ for (let key in dict) {
+ const value = dict[key];
+ const code = key.charCodeAt(0);
+ traditionalDict[code] = value;
+ }
+}
+
+export function getTraditionalDict() {
+ return traditionalDict;
+}
diff --git a/lib/index.ts b/lib/index.ts
index db88293..65541c2 100644
--- a/lib/index.ts
+++ b/lib/index.ts
@@ -1,9 +1,14 @@
-export { getInitialAndFinal, getFinalParts, getNumOfTone } from './core/pinyin/handle';
-export { pinyin } from './core/pinyin';
-export { customPinyin, clearCustomDict } from './core/custom';
-export { addDict, removeDict } from './core/dict';
-export { match } from './core/match';
-export { html } from './core/html';
-export { polyphonic } from './core/polyphonic';
-export { convert } from './core/convert';
-export { segment, OutputFormat } from './core/segment';
\ No newline at end of file
+export {
+ getInitialAndFinal,
+ getFinalParts,
+ getNumOfTone,
+} from "./core/pinyin/handle";
+export { pinyin } from "./core/pinyin";
+export { customPinyin, clearCustomDict } from "./core/custom";
+export { addDict, removeDict } from "./core/dict";
+export { match } from "./core/match";
+export { html } from "./core/html";
+export { polyphonic } from "./core/polyphonic";
+export { convert } from "./core/convert";
+export { segment, OutputFormat } from "./core/segment";
+export { addTraditionalDict, getTraditionalDict } from "./core/traditional";
diff --git a/package.json b/package.json
index 0ce9051..a21fdc4 100644
--- a/package.json
+++ b/package.json
@@ -54,7 +54,7 @@
"devDependencies": {
"@commitlint/cli": "^11.0.0",
"@commitlint/config-conventional": "^11.0.0",
- "@pinyin-pro/data": "1.0.3",
+ "@pinyin-pro/data": "1.3.0",
"@rollup/plugin-commonjs": "^17.1.0",
"@rollup/plugin-json": "^4.1.0",
"@rollup/plugin-node-resolve": "^11.2.0",
diff --git a/test/traditional.test.js b/test/traditional.test.js
new file mode 100644
index 0000000..44177e9
--- /dev/null
+++ b/test/traditional.test.js
@@ -0,0 +1,69 @@
+import { pinyin, html, addTraditionalDict, segment } from '../lib/index';
+import traditionalDict from '@pinyin-pro/data/traditional'
+import { expect, describe, it } from 'vitest';
+
+describe("without traditional", () => {
+ it("[pinyin traditional]轉盤", () => {
+ const result = pinyin("轉盤");
+ expect(result).to.be.equal("zhuǎn pán");
+ });
+
+ it("[segment traditional]一个轉盤", () => {
+ const result = segment("一个轉盤");
+ expect(result).to.deep.equal([
+ {
+ "origin": "一",
+ "result": "yí",
+ },
+ {
+ "origin": "个",
+ "result": "gè",
+ },
+ {
+ "origin": "轉",
+ "result": "zhuǎn",
+ },
+ {
+ "origin": "盤",
+ "result": "pán",
+ },
+ ]);
+ });
+
+ it("[html traditional]轉盤", () => {
+ const result = html("轉盤");
+ expect(result).to.be.equal('轉盤');
+ });
+});
+
+
+describe("with traditional", () => {
+ addTraditionalDict(traditionalDict);
+ it("[pinyin with traditional]一个🌛轉盤", () => {
+ const result = pinyin("一个🌛轉盤", { traditional: true });
+ expect(result).to.be.equal("yí gè 🌛 zhuàn pán");
+ });
+
+ it("[segment traditional]一个轉盤", () => {
+ const result = segment("一个轉盤", { traditional: true });
+ expect(result).to.deep.equal([
+ {
+ "origin": "一",
+ "result": "yí",
+ },
+ {
+ "origin": "个",
+ "result": "gè",
+ },
+ {
+ "origin": "轉盤",
+ "result": "zhuànpán",
+ },
+ ]);
+ });
+
+ it("[html with traditional]轉盤", () => {
+ const result = html("轉盤", { traditional: true });
+ expect(result).to.be.equal('轉盤');
+ });
+});
diff --git a/types/common/type.d.ts b/types/common/type.d.ts
index 12740aa..6906f7c 100644
--- a/types/common/type.d.ts
+++ b/types/common/type.d.ts
@@ -4,6 +4,7 @@ export interface SingleWordResult {
result: string;
isZh: boolean;
delete?: boolean;
+ traditional?: string;
}
export type ToneType = "symbol" | "num" | "none";
export type PinyinMode = "normal" | "surname";
@@ -59,4 +60,10 @@ export type CommonOptions = {
* @value standard:不将 `y`、`w` 视为声母
*/
initialPattern?: InitialPattern;
+ /**
+ * @description 是否启用繁体字模式(可以更好地识别繁体字)
+ * @value false:不启用繁体字模式 (默认值)
+ * @value true:启用繁体字模式
+ */
+ traditional?: boolean;
};
diff --git a/types/core/html/index.d.ts b/types/core/html/index.d.ts
index ffbfddf..22f7126 100644
--- a/types/core/html/index.d.ts
+++ b/types/core/html/index.d.ts
@@ -1,4 +1,6 @@
-interface HtmlOptions {
+import type { BasicOptions } from "../../core/pinyin";
+type HtmlBaseOptions = Pick;
+interface HtmlOptions extends HtmlBaseOptions {
/**
* @description html 结果中每个字+拼音外层 span 标签的类名。默认为 py-result-item
*/
@@ -19,35 +21,18 @@ interface HtmlOptions {
* @description html 非汉字字符外层 span 标签的类名,仅当 wrapNonChinese 为 true 时生效。默认为 py-non-chinese-item
*/
nonChineseClass?: string;
- /**
- * @description 拼音上是否标注音调
- */
- toneType?: "symbol" | "num" | "none";
/**
* @description 对于指定的汉字及字符,在 result 上额外补充的拼音
*/
customClassMap?: {
[classname: string]: string[];
};
- /**
- * @description 是否开启「一」和 「不」字的变调。默认开启。参考:https://zh.wiktionary.org/wiki/Appendix:%E2%80%9C%E4%B8%80%E2%80%9D%E5%8F%8A%E2%80%9C%E4%B8%8D%E2%80%9D%E7%9A%84%E5%8F%98%E8%B0%83
- * @value true:开启
- * @value false:不开启
- */
- toneSandhi?: boolean;
/**
* @description 是否保留 标签,默认为保留
* @value true:保留
* @value false:移除
*/
rp?: boolean;
- /**
- * @description 对于 ü 的返回是否转换成 v(仅在 toneType: none 启用时生效)
- * @value false:返回值中保留 ü (默认值)
- * @value true:返回值中 ü 转换成 v
- * @value string:返回值中 ü 转换成指定字符
- */
- v?: boolean | string;
}
/**
* @description: 获取带拼音汉字的 html 字符串
diff --git a/types/core/pinyin/handle.d.ts b/types/core/pinyin/handle.d.ts
index c1f4b9d..5da30b1 100644
--- a/types/core/pinyin/handle.d.ts
+++ b/types/core/pinyin/handle.d.ts
@@ -8,7 +8,7 @@ import { MatchPattern, TokenizationAlgorithm } from "../../common/segmentit";
*/
type GetSingleWordPinyin = (char: string) => string;
export declare const getSingleWordPinyin: GetSingleWordPinyin;
-export declare const getPinyin: (word: string, list: SingleWordResult[], surname: SurnameMode, segmentit: TokenizationAlgorithm) => {
+export declare const getPinyin: (word: string, list: SingleWordResult[], surname: SurnameMode, segmentit: TokenizationAlgorithm, traditional?: boolean) => {
list: SingleWordResult[];
matches: MatchPattern[];
};
diff --git a/types/core/segment/index.d.ts b/types/core/segment/index.d.ts
index e8f0524..9065b68 100644
--- a/types/core/segment/index.d.ts
+++ b/types/core/segment/index.d.ts
@@ -1,6 +1,6 @@
-import { BasicOptions } from "../pinyin";
+import type { BasicOptions } from "../pinyin";
import { Output, OutputFormat } from "./middlewares";
-type SegmentBaseOptions = Pick;
+type SegmentBaseOptions = Pick;
interface AllSegmentReturnOptions extends SegmentBaseOptions {
/**
* @description 以片段格式返回全部信息
diff --git a/types/core/traditional/index.d.ts b/types/core/traditional/index.d.ts
new file mode 100644
index 0000000..e6f3fdc
--- /dev/null
+++ b/types/core/traditional/index.d.ts
@@ -0,0 +1,2 @@
+export declare function addTraditionalDict(dict: Record): void;
+export declare function getTraditionalDict(): string[];
diff --git a/types/data/traditional-to-simplified.d.ts b/types/data/traditional-to-simplified.d.ts
new file mode 100644
index 0000000..5702477
--- /dev/null
+++ b/types/data/traditional-to-simplified.d.ts
@@ -0,0 +1,2 @@
+declare const TraditionalListMap: string[];
+export { TraditionalListMap };
diff --git a/types/index.d.ts b/types/index.d.ts
index 51e49f7..adae192 100644
--- a/types/index.d.ts
+++ b/types/index.d.ts
@@ -1,9 +1,10 @@
-export { getInitialAndFinal, getFinalParts, getNumOfTone } from './core/pinyin/handle';
-export { pinyin } from './core/pinyin';
-export { customPinyin, clearCustomDict } from './core/custom';
-export { addDict, removeDict } from './core/dict';
-export { match } from './core/match';
-export { html } from './core/html';
-export { polyphonic } from './core/polyphonic';
-export { convert } from './core/convert';
-export { segment, OutputFormat } from './core/segment';
+export { getInitialAndFinal, getFinalParts, getNumOfTone, } from "./core/pinyin/handle";
+export { pinyin } from "./core/pinyin";
+export { customPinyin, clearCustomDict } from "./core/custom";
+export { addDict, removeDict } from "./core/dict";
+export { match } from "./core/match";
+export { html } from "./core/html";
+export { polyphonic } from "./core/polyphonic";
+export { convert } from "./core/convert";
+export { segment, OutputFormat } from "./core/segment";
+export { addTraditionalDict, getTraditionalDict } from "./core/traditional";