下午花了點時間寫了個正則表達式和編碼轉換的工具,正則表達式工具是模仿YART Yet Another Regex Tester寫的,而里面
涉及到編碼轉換的核心算法如下,工具在下面可以下載。
1、unicode編碼轉漢字
核心算法:
public
string StrToGB (
string s)
{
if (s.Length > 0)
{
MatchCollection mc = Regex.Matches(s, @" \\u([\w]{2})([\w]{2}) ", RegexOptions.Compiled | RegexOptions.IgnoreCase);
foreach (Match m in mc)
{
byte[] buffer = new byte[ 2];
string r = string.Empty;
string m0 = m.Groups[ 0].Value;
string m1 = m.Groups[ 1].Value;
string m2 = m.Groups[ 2].Value;
buffer[ 0] = ( byte) int.Parse(m2, NumberStyles.HexNumber);
buffer[ 1] = ( byte) int.Parse(m1, NumberStyles.HexNumber);
r += Encoding.Unicode.GetString(buffer);
// \\u8f6c\\u53d1
// \\u8bc4\\u8bba
s = s.Replace(m0, r);
}
}
return s;
{
if (s.Length > 0)
{
MatchCollection mc = Regex.Matches(s, @" \\u([\w]{2})([\w]{2}) ", RegexOptions.Compiled | RegexOptions.IgnoreCase);
foreach (Match m in mc)
{
byte[] buffer = new byte[ 2];
string r = string.Empty;
string m0 = m.Groups[ 0].Value;
string m1 = m.Groups[ 1].Value;
string m2 = m.Groups[ 2].Value;
buffer[ 0] = ( byte) int.Parse(m2, NumberStyles.HexNumber);
buffer[ 1] = ( byte) int.Parse(m1, NumberStyles.HexNumber);
r += Encoding.Unicode.GetString(buffer);
// \\u8f6c\\u53d1
// \\u8bc4\\u8bba
s = s.Replace(m0, r);
}
}
return s;
}
2、漢字轉unicode
public
string GBToUnicode(
string text)
{
byte[] bytes = System.Text.Encoding.Unicode.GetBytes(text);
string lowCode = "", temp = "";
for ( int i = 0; i < bytes.Length; i++)
{
if (i % 2 == 0)
{
temp = System.Convert.ToString(bytes[i], 16); // 取出元素4編碼內容(兩位16進制)
if (temp.Length < 2) temp = " 0 " + temp;
}
else
{
string mytemp = Convert.ToString(bytes[i], 16);
if (mytemp.Length < 2) mytemp = " 0 " + mytemp;
lowCode = lowCode + @" \u " + mytemp + temp; // 取出元素4編碼內容(兩位16進制)
}
}
return lowCode;
{
byte[] bytes = System.Text.Encoding.Unicode.GetBytes(text);
string lowCode = "", temp = "";
for ( int i = 0; i < bytes.Length; i++)
{
if (i % 2 == 0)
{
temp = System.Convert.ToString(bytes[i], 16); // 取出元素4編碼內容(兩位16進制)
if (temp.Length < 2) temp = " 0 " + temp;
}
else
{
string mytemp = Convert.ToString(bytes[i], 16);
if (mytemp.Length < 2) mytemp = " 0 " + mytemp;
lowCode = lowCode + @" \u " + mytemp + temp; // 取出元素4編碼內容(兩位16進制)
}
}
return lowCode;
}
3、字符轉16進制
public string StrToHex(string mStr) //返回處理后的十六進制字符串
{
return BitConverter.ToString(ASCIIEncoding.Default.GetBytes(mStr)).Replace( " - ", " ");
}
return BitConverter.ToString(ASCIIEncoding.Default.GetBytes(mStr)).Replace( " - ", " ");
}
4、16進制轉字符
public
string HexToStr(
string mHex)
//
返回十六進制代表的字符串
{
try
{
mHex = mHex.Replace( " ", "");
mHex = mHex.Replace( " - ", "");
if (mHex.Length <= 0)
return "";
byte[] vBytes = new byte[mHex.Length / 2];
for ( int i = 0; i < mHex.Length; i += 2)
if (! byte.TryParse(mHex.Substring(i, 2), NumberStyles.HexNumber, null, out vBytes[i / 2]))
vBytes[i / 2] = 0;
return ASCIIEncoding.Default.GetString(vBytes);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
return "";
}
{
try
{
mHex = mHex.Replace( " ", "");
mHex = mHex.Replace( " - ", "");
if (mHex.Length <= 0)
return "";
byte[] vBytes = new byte[mHex.Length / 2];
for ( int i = 0; i < mHex.Length; i += 2)
if (! byte.TryParse(mHex.Substring(i, 2), NumberStyles.HexNumber, null, out vBytes[i / 2]))
vBytes[i / 2] = 0;
return ASCIIEncoding.Default.GetString(vBytes);
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
return "";
}
}
喜歡的拿去,
已經附上源碼 。。。