Merge pull request #295 from zh-lx/feature/initial-yw

feat: 支持声母中排除 yw
This commit is contained in:
zhoulixiang
2025-08-03 11:12:17 +08:00
committed by GitHub
7 changed files with 148 additions and 89 deletions

View File

@@ -12,6 +12,7 @@ export type ToneType = "symbol" | "num" | "none";
export type PinyinMode = "normal" | "surname";
export type SurnameMode = "all" | "head" | "off";
export type InitialPattern = "yw" | "standard";
export type CommonOptions = {
/**
@@ -65,4 +66,10 @@ export type CommonOptions = {
* @value string返回值中 ü 转换成指定字符
*/
v?: boolean | string;
/**
* @description 是否将 `y`、`w` 视为声母
* @value yw将 `y`、`w` 视为声母
* @value standard不将 `y`、`w` 视为声母
*/
initialPattern?: InitialPattern;
};

View File

@@ -10,9 +10,13 @@ import Surnames from "@/data/surname";
import DICT1 from "@/data/dict1";
import { getCustomMultpileDict } from "@/core/custom";
import { SingleWordResult } from "../../common/type";
import type { SurnameMode } from "../../common/type";
import { acTree, MatchPattern, TokenizationAlgorithm } from "../../common/segmentit";
import { Priority } from "@/common/constant";
import type { SurnameMode, InitialPattern } from "../../common/type";
import {
acTree,
MatchPattern,
TokenizationAlgorithm,
} from "../../common/segmentit";
import { Priority } from "@/common/constant";
import { splitString } from "@/common/utils";
/**
@@ -32,7 +36,7 @@ export const getPinyin = (
list: SingleWordResult[],
surname: SurnameMode,
segmentit: TokenizationAlgorithm
): { list: SingleWordResult[], matches: MatchPattern[] } => {
): { list: SingleWordResult[]; matches: MatchPattern[] } => {
const matches = acTree.search(word, surname, segmentit);
let matchIndex = 0;
const zhChars = splitString(word);
@@ -59,9 +63,9 @@ export const getPinyin = (
const zhChars = splitString(match.zh);
list[i + j] = {
origin: zhChars[j],
result: pinyins[pinyinIndex] || '',
result: pinyins[pinyinIndex] || "",
isZh: true,
originPinyin: pinyins[pinyinIndex] || '',
originPinyin: pinyins[pinyinIndex] || "",
};
pinyinIndex++;
}
@@ -157,13 +161,17 @@ const getMultiplePinyin: GetMultiplePinyin = (word, surname = "off") => {
/**
* @description: 获取拼音的声母和韵母
* @param {string} pinyin
* @param {InitialPattern} initialPattern
* @return {*}
*/
type GetInitialAndFinal = (pinyin: string) => {
type GetInitialAndFinal = (
pinyin: string,
initialPattern?: InitialPattern
) => {
final: string;
initial: string;
};
const getInitialAndFinal: GetInitialAndFinal = (pinyin) => {
const getInitialAndFinal: GetInitialAndFinal = (pinyin, initialPattern) => {
const pinyin_arr = pinyin.split(" ");
const initial_arr: string[] = [];
const final_arr: string[] = [];
@@ -184,6 +192,13 @@ const getInitialAndFinal: GetInitialAndFinal = (pinyin) => {
}
}
}
if (initialPattern === "standard") {
initial_arr.forEach((initial, index) => {
if (initial === "y" || initial === "w") {
initial_arr[index] = "";
}
});
}
return {
final: final_arr.join(" "), // 韵母
initial: initial_arr.join(" "), // 声母

View File

@@ -41,12 +41,19 @@ export const middleWareNonZh = (
let nonZh = options.nonZh;
if (nonZh === "removed") {
return list.filter((item) => item.isZh || !isNonZhScope(item.origin, options.nonZhScope));
return list.filter(
(item) => item.isZh || !isNonZhScope(item.origin, options.nonZhScope)
);
} else if (nonZh === "consecutive") {
for (let i = list.length - 2; i >= 0; i--) {
const cur = list[i];
const pre = list[i + 1];
if (!cur.isZh && !pre.isZh && isNonZhScope(cur.origin, options.nonZhScope) && isNonZhScope(pre.origin, options.nonZhScope)) {
if (
!cur.isZh &&
!pre.isZh &&
isNonZhScope(cur.origin, options.nonZhScope) &&
isNonZhScope(pre.origin, options.nonZhScope)
) {
cur.origin += pre.origin;
cur.result += pre.result;
pre.delete = true;
@@ -85,12 +92,16 @@ export const middlewarePattern = (
break;
case "initial":
list.forEach((item) => {
item.result = item.isZh ? getInitialAndFinal(item.result).initial : "";
item.result = item.isZh
? getInitialAndFinal(item.result, options.initialPattern).initial
: "";
});
break;
case "final":
list.forEach((item) => {
item.result = item.isZh ? getInitialAndFinal(item.result).final : "";
item.result = item.isZh
? getInitialAndFinal(item.result, options.initialPattern).final
: "";
});
break;
case "first":
@@ -157,7 +168,10 @@ export const middlewareV = (
if (options.v) {
list.forEach((item) => {
if (item.isZh) {
item.result = item.result.replace(/ü/g, typeof options.v === 'string' ? options.v : "v");
item.result = item.result.replace(
/ü/g,
typeof options.v === "string" ? options.v : "v"
);
}
});
}
@@ -183,7 +197,10 @@ export const middlewareType = (
if (options.type === "all") {
return list.map((item) => {
const pinyin = item.isZh ? item.result : "";
const { initial, final } = getInitialAndFinal(pinyin);
const { initial, final } = getInitialAndFinal(
pinyin,
options.initialPattern
);
const { head, body, tail } = getFinalParts(pinyin);
let polyphonic: string[] = [];
if (pinyin !== "") {

View File

@@ -204,7 +204,10 @@ export const handleType = (
if (options.type === "all") {
return list.map((item) => {
const pinyin = item.isZh ? item.result : "";
const { initial, final } = getInitialAndFinal(pinyin);
const { initial, final } = getInitialAndFinal(
pinyin,
options.initialPattern
);
const { head, body, tail } = getFinalParts(pinyin);
return {
origin: item.origin,

View File

@@ -1,103 +1,112 @@
import { pinyin } from '../lib/index';
import { expect, describe, it } from 'vitest';
import { pinyin } from "../lib/index";
import { expect, describe, it } from "vitest";
describe('pattern', () => {
it('[pattern]num', () => {
const result = pinyin('汉语拼音', { pattern: 'num' });
expect(result).to.be.equal('4 3 1 1');
describe("pattern", () => {
it("[pattern]num", () => {
const result = pinyin("汉语拼音", { pattern: "num" });
expect(result).to.be.equal("4 3 1 1");
});
it('[pattern]num-array', () => {
const result = pinyin('汉语拼音', { pattern: 'num', type: 'array' });
expect(result).to.deep.equal(['4', '3', '1', '1']);
it("[pattern]num-array", () => {
const result = pinyin("汉语拼音", { pattern: "num", type: "array" });
expect(result).to.deep.equal(["4", "3", "1", "1"]);
});
it('[pattern]final', () => {
const result = pinyin('汉语拼音', { pattern: 'final' });
expect(result).to.be.equal('àn ǔ īn īn');
it("[pattern]final", () => {
const result = pinyin("汉语拼音", { pattern: "final" });
expect(result).to.be.equal("àn ǔ īn īn");
});
it('[pattern]final-array', () => {
const result = pinyin('汉语拼音', { pattern: 'final', type: 'array' });
expect(result).to.deep.equal(['àn', 'ǔ', 'īn', 'īn']);
it("[pattern]final-array", () => {
const result = pinyin("汉语拼音", { pattern: "final", type: "array" });
expect(result).to.deep.equal(["àn", "ǔ", "īn", "īn"]);
});
it('[pattern]initial', () => {
const result = pinyin('汉语拼音', { pattern: 'initial' });
expect(result).to.be.equal('h y p y');
it("[pattern]initial", () => {
const result = pinyin("汉语拼音", { pattern: "initial" });
expect(result).to.be.equal("h y p y");
});
it('[pattern]initial-array', () => {
const result = pinyin('汉语拼音', { pattern: 'initial', type: 'array' });
expect(result).to.deep.equal(['h', 'y', 'p', 'y']);
});
it('[pattern]num-all', () => {
const resultNumStr = pinyin('赵钱孙李吧', { pattern: 'num' });
expect(resultNumStr).to.be.equal('4 2 1 3 0');
});
it('[pattern]num-array', () => {
const resultNumArr = pinyin('赵钱孙李吧', {
pattern: 'num',
type: 'array',
it("[pattern]initial-yw", () => {
const result = pinyin("汉语拼音", {
pattern: "initial",
initialPattern: "standard",
type: "array",
});
expect(resultNumArr).to.deep.equal(['4', '2', '1', '3', '0']);
expect(result).to.deep.equal(["h", "", "p", ""]);
});
it('[pattern]initial-all', () => {
const resultInitial = pinyin('赵钱孙李吧', {
pattern: 'initial',
});
expect(resultInitial).to.be.equal('zh q s l b');
it("[pattern]initial-array", () => {
const result = pinyin("汉语拼音", { pattern: "initial", type: "array" });
expect(result).to.deep.equal(["h", "y", "p", "y"]);
});
it('[pattern]final-all', () => {
const resultFinal = pinyin('赵钱孙李吧', {
pattern: 'final',
});
expect(resultFinal).to.be.equal('ào ián ūn ǐ a');
it("[pattern]num-all", () => {
const resultNumStr = pinyin("赵钱孙李吧", { pattern: "num" });
expect(resultNumStr).to.be.equal("4 2 1 3 0");
});
it('[pattern]first-all', () => {
const resultFirst = pinyin('赵钱孙李额', {
pattern: 'first',
it("[pattern]num-array", () => {
const resultNumArr = pinyin("赵钱孙李吧", {
pattern: "num",
type: "array",
});
const resultFirst1 = pinyin('赵钱孙李very', {
pattern: 'first',
});
expect(resultFirst).to.be.equal('z q s l é');
expect(resultFirst1).to.be.equal('z q s l v e r y');
expect(resultNumArr).to.deep.equal(["4", "2", "1", "3", "0"]);
});
it('[pattern]first-all-none', () => {
const resultFirstNone = pinyin('赵钱孙李额', {
pattern: 'first',
toneType: 'none',
it("[pattern]initial-all", () => {
const resultInitial = pinyin("赵钱孙李吧", {
pattern: "initial",
});
expect(resultFirstNone).to.be.equal('z q s l e');
expect(resultInitial).to.be.equal("zh q s l b");
});
it('[pattern]nonZh', () => {
const resultNonZhInitial = pinyin('a', {
pattern: 'initial',
it("[pattern]final-all", () => {
const resultFinal = pinyin("赵钱孙李吧", {
pattern: "final",
});
const resultNonZhFinal = pinyin('a', {
pattern: 'final',
expect(resultFinal).to.be.equal("ào ián ūn ǐ a");
});
it("[pattern]first-all", () => {
const resultFirst = pinyin("赵钱孙李额", {
pattern: "first",
});
const resultNonZhFinalHead = pinyin('a', {
pattern: 'finalHead',
const resultFirst1 = pinyin("赵钱孙李very", {
pattern: "first",
});
const resultNonZhFinalBody = pinyin('a', {
pattern: 'finalBody',
expect(resultFirst).to.be.equal("z q s l é");
expect(resultFirst1).to.be.equal("z q s l v e r y");
});
it("[pattern]first-all-none", () => {
const resultFirstNone = pinyin("赵钱孙李额", {
pattern: "first",
toneType: "none",
});
const resultNonZhFinalTail = pinyin('a', {
pattern: 'finalTail',
expect(resultFirstNone).to.be.equal("z q s l e");
});
it("[pattern]nonZh", () => {
const resultNonZhInitial = pinyin("a", {
pattern: "initial",
});
expect(resultNonZhInitial).to.deep.equal('');
expect(resultNonZhFinal).to.deep.equal('');
expect(resultNonZhFinalHead).to.deep.equal('');
expect(resultNonZhFinalBody).to.deep.equal('');
expect(resultNonZhFinalTail).to.deep.equal('');
const resultNonZhFinal = pinyin("a", {
pattern: "final",
});
const resultNonZhFinalHead = pinyin("a", {
pattern: "finalHead",
});
const resultNonZhFinalBody = pinyin("a", {
pattern: "finalBody",
});
const resultNonZhFinalTail = pinyin("a", {
pattern: "finalTail",
});
expect(resultNonZhInitial).to.deep.equal("");
expect(resultNonZhFinal).to.deep.equal("");
expect(resultNonZhFinalHead).to.deep.equal("");
expect(resultNonZhFinalBody).to.deep.equal("");
expect(resultNonZhFinalTail).to.deep.equal("");
});
});

View File

@@ -8,6 +8,7 @@ export interface SingleWordResult {
export type ToneType = "symbol" | "num" | "none";
export type PinyinMode = "normal" | "surname";
export type SurnameMode = "all" | "head" | "off";
export type InitialPattern = "yw" | "standard";
export type CommonOptions = {
/**
* @description 返回的拼音音调类型
@@ -52,4 +53,10 @@ export type CommonOptions = {
* @value string返回值中 ü 转换成指定字符
*/
v?: boolean | string;
/**
* @description 是否将 `y`、`w` 视为声母
* @value yw将 `y`、`w` 视为声母
* @value standard不将 `y`、`w` 视为声母
*/
initialPattern?: InitialPattern;
};

View File

@@ -1,5 +1,5 @@
import { SingleWordResult } from "../../common/type";
import type { SurnameMode } from "../../common/type";
import type { SurnameMode, InitialPattern } from "../../common/type";
import { MatchPattern, TokenizationAlgorithm } from "../../common/segmentit";
/**
* @description: 获取单个字符的拼音
@@ -36,9 +36,10 @@ declare const getMultiplePinyin: GetMultiplePinyin;
/**
* @description: 获取拼音的声母和韵母
* @param {string} pinyin
* @param {InitialPattern} initialPattern
* @return {*}
*/
type GetInitialAndFinal = (pinyin: string) => {
type GetInitialAndFinal = (pinyin: string, initialPattern?: InitialPattern) => {
final: string;
initial: string;
};