網上有很多說自己整理的漢字轉拼音是完美的,但使用后才發現都是半吊的瓶子,問題多多。
常見的生僻字,或多音字識別,轉換后簡直讓人感覺可怕。
主流的轉換有三種:hash匹配,Npinyin,微軟PinYinConverter。
但單用這三個,都沒法做到完美,為什么沒人考慮融合呢?
我的方案:Npinyin+微軟PinYinConverter(首選Npinyin)
微軟PinYinConverter
為什么:微軟PinYinConverter很強大,但在多音字面前,犯了傳統的錯誤,按拼音字母排序。如【強】微軟居然優先【jiang】而不是】【qiang】
所以不能優選 PinYinConverter。
Npinyin
很人性,很不錯的第三方庫,在傳統多音字前優先使用率較高的,但在生僻字面前有點無法轉換。(GetInitials(strChinese) 有Bug 如【洺】無法識別,但GetPinyin可以正常轉換。)
總結:優先Npinyin 翻譯失敗的使用微軟PinYinConverter。目測完美。
上代碼:
public class PingYinHelper
{
private static Encoding gb2312 = Encoding.GetEncoding("GB2312");
/// <summary>
/// 漢字轉全拼
/// </summary>
/// <param name="strChinese"></param>
/// <returns></returns>
public static string ConvertToAllSpell(string strChinese)
{
try
{
if (strChinese.Length != 0)
{
StringBuilder fullSpell = new StringBuilder();
for (int i = 0; i < strChinese.Length; i++)
{
var chr = strChinese[i];
fullSpell.Append(GetSpell(chr));
}
return fullSpell.ToString().ToUpper();
}
}
catch (Exception e)
{
Console.WriteLine("全拼轉化出錯!" + e.Message);
}
return string.Empty;
}
/// <summary>
/// 漢字轉首字母
/// </summary>
/// <param name="strChinese"></param>
/// <returns></returns>
public static string GetFirstSpell(string strChinese)
{
//NPinyin.Pinyin.GetInitials(strChinese) 有Bug 洺無法識別
//return NPinyin.Pinyin.GetInitials(strChinese);
try
{
if (strChinese.Length != 0)
{
StringBuilder fullSpell = new StringBuilder();
for (int i = 0; i < strChinese.Length; i++)
{
var chr = strChinese[i];
fullSpell.Append(GetSpell(chr)[0]);
}
return fullSpell.ToString().ToUpper();
}
}
catch (Exception e)
{
Console.WriteLine("首字母轉化出錯!" + e.Message);
}
return string.Empty;
}
private static string GetSpell(char chr)
{
var coverchr = NPinyin.Pinyin.GetPinyin(chr);
bool isChineses = ChineseChar.IsValidChar(coverchr[0]);
if (isChineses)
{
ChineseChar chineseChar = new ChineseChar(coverchr[0]);
foreach (string value in chineseChar.Pinyins)
{
if (!string.IsNullOrEmpty(value))
{
return value.Remove(value.Length - 1, 1);
}
}
}
return coverchr;
}
}
抽了幾個常見錯字和姓名
測試如下:
[TestMethod]
public void PingyinTest()
{
Dictionary<string, Tuple<string, string>> dict = new
Dictionary<string, Tuple<string, string>>() {
{"梅鈺", new Tuple<string,string>( "meiyu","MY")},
{"張洺", new Tuple<string,string>( "zhangming","ZM")},
{"王玥", new Tuple<string,string>( "wangyue","WY")},
{"王思琪", new Tuple<string,string>( "wangsiqi","WSQ")},
{"董雲強", new Tuple<string,string>( "dongyunqiang","DYQ")},
{"宋紅培", new Tuple<string,string>( "songhongpei","SHP")},
{"石磊", new Tuple<string,string>( "shilei","SL")},
};
foreach (var keyval in dict)
{
var name = keyval.Key;
var spell1 = keyval.Value.Item1;
var spell2 = keyval.Value.Item2;
var val = ChineseSpell.ConvertToAllSpell(name).TrimAll();
val = FlexLogicFramework.Library.CommonLib.PingYinHelper.ConvertToAllSpell(name)
.TrimAll().ToLower();
Assert.IsTrue(val == spell1, "轉換錯誤");
val = FlexLogicFramework.Library.CommonLib.ChineseSpell.GetFirstSpell(name).TrimAll();
val = FlexLogicFramework.Library.CommonLib.PingYinHelper.GetFirstSpell(name).TrimAll();
Assert.IsTrue(val == spell2, "轉換錯誤");
}
}

