diff --git a/lib/common/type.ts b/lib/common/type.ts index 20b45ac..2163c24 100644 --- a/lib/common/type.ts +++ b/lib/common/type.ts @@ -12,6 +12,7 @@ export type ToneType = "symbol" | "num" | "none"; export type PinyinMode = "normal" | "surname"; export type SurnameMode = "all" | "head" | "off"; +export type InitialPattern = "yw" | "standard"; export type CommonOptions = { /** @@ -65,4 +66,10 @@ export type CommonOptions = { * @value string:返回值中 ü 转换成指定字符 */ v?: boolean | string; + /** + * @description 是否将 `y`、`w` 视为声母 + * @value yw:将 `y`、`w` 视为声母 + * @value standard:不将 `y`、`w` 视为声母 + */ + initialPattern?: InitialPattern; }; diff --git a/lib/core/pinyin/handle.ts b/lib/core/pinyin/handle.ts index c8b4d80..7a0e3a3 100644 --- a/lib/core/pinyin/handle.ts +++ b/lib/core/pinyin/handle.ts @@ -10,9 +10,13 @@ import Surnames from "@/data/surname"; import DICT1 from "@/data/dict1"; import { getCustomMultpileDict } from "@/core/custom"; import { SingleWordResult } from "../../common/type"; -import type { SurnameMode } from "../../common/type"; -import { acTree, MatchPattern, TokenizationAlgorithm } from "../../common/segmentit"; -import { Priority } from "@/common/constant"; +import type { SurnameMode, InitialPattern } from "../../common/type"; +import { + acTree, + MatchPattern, + TokenizationAlgorithm, +} from "../../common/segmentit"; +import { Priority } from "@/common/constant"; import { splitString } from "@/common/utils"; /** @@ -32,7 +36,7 @@ export const getPinyin = ( list: SingleWordResult[], surname: SurnameMode, segmentit: TokenizationAlgorithm -): { list: SingleWordResult[], matches: MatchPattern[] } => { +): { list: SingleWordResult[]; matches: MatchPattern[] } => { const matches = acTree.search(word, surname, segmentit); let matchIndex = 0; const zhChars = splitString(word); @@ -59,9 +63,9 @@ export const getPinyin = ( const zhChars = splitString(match.zh); list[i + j] = { origin: zhChars[j], - result: pinyins[pinyinIndex] || '', + result: pinyins[pinyinIndex] || "", isZh: true, - originPinyin: pinyins[pinyinIndex] || '', + originPinyin: pinyins[pinyinIndex] || "", }; pinyinIndex++; } @@ -157,13 +161,17 @@ const getMultiplePinyin: GetMultiplePinyin = (word, surname = "off") => { /** * @description: 获取拼音的声母和韵母 * @param {string} pinyin + * @param {InitialPattern} initialPattern * @return {*} */ -type GetInitialAndFinal = (pinyin: string) => { +type GetInitialAndFinal = ( + pinyin: string, + initialPattern?: InitialPattern +) => { final: string; initial: string; }; -const getInitialAndFinal: GetInitialAndFinal = (pinyin) => { +const getInitialAndFinal: GetInitialAndFinal = (pinyin, initialPattern) => { const pinyin_arr = pinyin.split(" "); const initial_arr: string[] = []; const final_arr: string[] = []; @@ -184,6 +192,13 @@ const getInitialAndFinal: GetInitialAndFinal = (pinyin) => { } } } + if (initialPattern === "standard") { + initial_arr.forEach((initial, index) => { + if (initial === "y" || initial === "w") { + initial_arr[index] = ""; + } + }); + } return { final: final_arr.join(" "), // 韵母 initial: initial_arr.join(" "), // 声母 diff --git a/lib/core/pinyin/middlewares.ts b/lib/core/pinyin/middlewares.ts index 304caf7..80bcba2 100644 --- a/lib/core/pinyin/middlewares.ts +++ b/lib/core/pinyin/middlewares.ts @@ -41,12 +41,19 @@ export const middleWareNonZh = ( let nonZh = options.nonZh; if (nonZh === "removed") { - return list.filter((item) => item.isZh || !isNonZhScope(item.origin, options.nonZhScope)); + return list.filter( + (item) => item.isZh || !isNonZhScope(item.origin, options.nonZhScope) + ); } else if (nonZh === "consecutive") { for (let i = list.length - 2; i >= 0; i--) { const cur = list[i]; const pre = list[i + 1]; - if (!cur.isZh && !pre.isZh && isNonZhScope(cur.origin, options.nonZhScope) && isNonZhScope(pre.origin, options.nonZhScope)) { + if ( + !cur.isZh && + !pre.isZh && + isNonZhScope(cur.origin, options.nonZhScope) && + isNonZhScope(pre.origin, options.nonZhScope) + ) { cur.origin += pre.origin; cur.result += pre.result; pre.delete = true; @@ -85,12 +92,16 @@ export const middlewarePattern = ( break; case "initial": list.forEach((item) => { - item.result = item.isZh ? getInitialAndFinal(item.result).initial : ""; + item.result = item.isZh + ? getInitialAndFinal(item.result, options.initialPattern).initial + : ""; }); break; case "final": list.forEach((item) => { - item.result = item.isZh ? getInitialAndFinal(item.result).final : ""; + item.result = item.isZh + ? getInitialAndFinal(item.result, options.initialPattern).final + : ""; }); break; case "first": @@ -157,7 +168,10 @@ export const middlewareV = ( if (options.v) { list.forEach((item) => { if (item.isZh) { - item.result = item.result.replace(/ü/g, typeof options.v === 'string' ? options.v : "v"); + item.result = item.result.replace( + /ü/g, + typeof options.v === "string" ? options.v : "v" + ); } }); } @@ -183,7 +197,10 @@ export const middlewareType = ( if (options.type === "all") { return list.map((item) => { const pinyin = item.isZh ? item.result : ""; - const { initial, final } = getInitialAndFinal(pinyin); + const { initial, final } = getInitialAndFinal( + pinyin, + options.initialPattern + ); const { head, body, tail } = getFinalParts(pinyin); let polyphonic: string[] = []; if (pinyin !== "") { diff --git a/lib/core/polyphonic/index.ts b/lib/core/polyphonic/index.ts index 5c95cb8..78e1915 100644 --- a/lib/core/polyphonic/index.ts +++ b/lib/core/polyphonic/index.ts @@ -204,7 +204,10 @@ export const handleType = ( if (options.type === "all") { return list.map((item) => { const pinyin = item.isZh ? item.result : ""; - const { initial, final } = getInitialAndFinal(pinyin); + const { initial, final } = getInitialAndFinal( + pinyin, + options.initialPattern + ); const { head, body, tail } = getFinalParts(pinyin); return { origin: item.origin, diff --git a/test/pattern.test.js b/test/pattern.test.js index 80a24f9..0471554 100644 --- a/test/pattern.test.js +++ b/test/pattern.test.js @@ -1,103 +1,112 @@ -import { pinyin } from '../lib/index'; -import { expect, describe, it } from 'vitest'; +import { pinyin } from "../lib/index"; +import { expect, describe, it } from "vitest"; -describe('pattern', () => { - it('[pattern]num', () => { - const result = pinyin('汉语拼音', { pattern: 'num' }); - expect(result).to.be.equal('4 3 1 1'); +describe("pattern", () => { + it("[pattern]num", () => { + const result = pinyin("汉语拼音", { pattern: "num" }); + expect(result).to.be.equal("4 3 1 1"); }); - it('[pattern]num-array', () => { - const result = pinyin('汉语拼音', { pattern: 'num', type: 'array' }); - expect(result).to.deep.equal(['4', '3', '1', '1']); + it("[pattern]num-array", () => { + const result = pinyin("汉语拼音", { pattern: "num", type: "array" }); + expect(result).to.deep.equal(["4", "3", "1", "1"]); }); - it('[pattern]final', () => { - const result = pinyin('汉语拼音', { pattern: 'final' }); - expect(result).to.be.equal('àn ǔ īn īn'); + it("[pattern]final", () => { + const result = pinyin("汉语拼音", { pattern: "final" }); + expect(result).to.be.equal("àn ǔ īn īn"); }); - it('[pattern]final-array', () => { - const result = pinyin('汉语拼音', { pattern: 'final', type: 'array' }); - expect(result).to.deep.equal(['àn', 'ǔ', 'īn', 'īn']); + it("[pattern]final-array", () => { + const result = pinyin("汉语拼音", { pattern: "final", type: "array" }); + expect(result).to.deep.equal(["àn", "ǔ", "īn", "īn"]); }); - it('[pattern]initial', () => { - const result = pinyin('汉语拼音', { pattern: 'initial' }); - expect(result).to.be.equal('h y p y'); + it("[pattern]initial", () => { + const result = pinyin("汉语拼音", { pattern: "initial" }); + expect(result).to.be.equal("h y p y"); }); - it('[pattern]initial-array', () => { - const result = pinyin('汉语拼音', { pattern: 'initial', type: 'array' }); - expect(result).to.deep.equal(['h', 'y', 'p', 'y']); - }); - - it('[pattern]num-all', () => { - const resultNumStr = pinyin('赵钱孙李吧', { pattern: 'num' }); - expect(resultNumStr).to.be.equal('4 2 1 3 0'); - }); - - it('[pattern]num-array', () => { - const resultNumArr = pinyin('赵钱孙李吧', { - pattern: 'num', - type: 'array', + it("[pattern]initial-yw", () => { + const result = pinyin("汉语拼音", { + pattern: "initial", + initialPattern: "standard", + type: "array", }); - expect(resultNumArr).to.deep.equal(['4', '2', '1', '3', '0']); + expect(result).to.deep.equal(["h", "", "p", ""]); }); - it('[pattern]initial-all', () => { - const resultInitial = pinyin('赵钱孙李吧', { - pattern: 'initial', - }); - expect(resultInitial).to.be.equal('zh q s l b'); + it("[pattern]initial-array", () => { + const result = pinyin("汉语拼音", { pattern: "initial", type: "array" }); + expect(result).to.deep.equal(["h", "y", "p", "y"]); }); - it('[pattern]final-all', () => { - const resultFinal = pinyin('赵钱孙李吧', { - pattern: 'final', - }); - expect(resultFinal).to.be.equal('ào ián ūn ǐ a'); + it("[pattern]num-all", () => { + const resultNumStr = pinyin("赵钱孙李吧", { pattern: "num" }); + expect(resultNumStr).to.be.equal("4 2 1 3 0"); }); - it('[pattern]first-all', () => { - const resultFirst = pinyin('赵钱孙李额', { - pattern: 'first', + it("[pattern]num-array", () => { + const resultNumArr = pinyin("赵钱孙李吧", { + pattern: "num", + type: "array", }); - const resultFirst1 = pinyin('赵钱孙李very', { - pattern: 'first', - }); - expect(resultFirst).to.be.equal('z q s l é'); - expect(resultFirst1).to.be.equal('z q s l v e r y'); + expect(resultNumArr).to.deep.equal(["4", "2", "1", "3", "0"]); }); - it('[pattern]first-all-none', () => { - const resultFirstNone = pinyin('赵钱孙李额', { - pattern: 'first', - toneType: 'none', + it("[pattern]initial-all", () => { + const resultInitial = pinyin("赵钱孙李吧", { + pattern: "initial", }); - expect(resultFirstNone).to.be.equal('z q s l e'); + expect(resultInitial).to.be.equal("zh q s l b"); }); - it('[pattern]nonZh', () => { - const resultNonZhInitial = pinyin('a', { - pattern: 'initial', + it("[pattern]final-all", () => { + const resultFinal = pinyin("赵钱孙李吧", { + pattern: "final", }); - const resultNonZhFinal = pinyin('a', { - pattern: 'final', + expect(resultFinal).to.be.equal("ào ián ūn ǐ a"); + }); + + it("[pattern]first-all", () => { + const resultFirst = pinyin("赵钱孙李额", { + pattern: "first", }); - const resultNonZhFinalHead = pinyin('a', { - pattern: 'finalHead', + const resultFirst1 = pinyin("赵钱孙李very", { + pattern: "first", }); - const resultNonZhFinalBody = pinyin('a', { - pattern: 'finalBody', + expect(resultFirst).to.be.equal("z q s l é"); + expect(resultFirst1).to.be.equal("z q s l v e r y"); + }); + + it("[pattern]first-all-none", () => { + const resultFirstNone = pinyin("赵钱孙李额", { + pattern: "first", + toneType: "none", }); - const resultNonZhFinalTail = pinyin('a', { - pattern: 'finalTail', + expect(resultFirstNone).to.be.equal("z q s l e"); + }); + + it("[pattern]nonZh", () => { + const resultNonZhInitial = pinyin("a", { + pattern: "initial", }); - expect(resultNonZhInitial).to.deep.equal(''); - expect(resultNonZhFinal).to.deep.equal(''); - expect(resultNonZhFinalHead).to.deep.equal(''); - expect(resultNonZhFinalBody).to.deep.equal(''); - expect(resultNonZhFinalTail).to.deep.equal(''); + const resultNonZhFinal = pinyin("a", { + pattern: "final", + }); + const resultNonZhFinalHead = pinyin("a", { + pattern: "finalHead", + }); + const resultNonZhFinalBody = pinyin("a", { + pattern: "finalBody", + }); + const resultNonZhFinalTail = pinyin("a", { + pattern: "finalTail", + }); + expect(resultNonZhInitial).to.deep.equal(""); + expect(resultNonZhFinal).to.deep.equal(""); + expect(resultNonZhFinalHead).to.deep.equal(""); + expect(resultNonZhFinalBody).to.deep.equal(""); + expect(resultNonZhFinalTail).to.deep.equal(""); }); }); diff --git a/types/common/type.d.ts b/types/common/type.d.ts index fcae737..12740aa 100644 --- a/types/common/type.d.ts +++ b/types/common/type.d.ts @@ -8,6 +8,7 @@ export interface SingleWordResult { export type ToneType = "symbol" | "num" | "none"; export type PinyinMode = "normal" | "surname"; export type SurnameMode = "all" | "head" | "off"; +export type InitialPattern = "yw" | "standard"; export type CommonOptions = { /** * @description 返回的拼音音调类型 @@ -52,4 +53,10 @@ export type CommonOptions = { * @value string:返回值中 ü 转换成指定字符 */ v?: boolean | string; + /** + * @description 是否将 `y`、`w` 视为声母 + * @value yw:将 `y`、`w` 视为声母 + * @value standard:不将 `y`、`w` 视为声母 + */ + initialPattern?: InitialPattern; }; diff --git a/types/core/pinyin/handle.d.ts b/types/core/pinyin/handle.d.ts index 450071a..c1f4b9d 100644 --- a/types/core/pinyin/handle.d.ts +++ b/types/core/pinyin/handle.d.ts @@ -1,5 +1,5 @@ import { SingleWordResult } from "../../common/type"; -import type { SurnameMode } from "../../common/type"; +import type { SurnameMode, InitialPattern } from "../../common/type"; import { MatchPattern, TokenizationAlgorithm } from "../../common/segmentit"; /** * @description: 获取单个字符的拼音 @@ -36,9 +36,10 @@ declare const getMultiplePinyin: GetMultiplePinyin; /** * @description: 获取拼音的声母和韵母 * @param {string} pinyin + * @param {InitialPattern} initialPattern * @return {*} */ -type GetInitialAndFinal = (pinyin: string) => { +type GetInitialAndFinal = (pinyin: string, initialPattern?: InitialPattern) => { final: string; initial: string; };