public class CodecTest { public static void main(String[] args) throws UnsupportedEncodingException { String s = "我是中國人"; //GBK編碼后的字節 //gbkEncode[-50, -46, -54, -57, -42, -48, -71, -6, -56, -53] byte[] gbkEncode = s.getBytes("GBK"); System.out.println("gbkEncode" + Arrays.toString(gbkEncode)); //utf-8編碼后的字節
//utfEncode[-26, -120, -111, -26, -104, -81, -28, -72, -83, -27, -101, -67, -28, -70, -70] byte[] utfEncode = s.getBytes("utf-8"); System.out.println("utfEncode" + Arrays.toString(utfEncode)); //用utf-8解碼gbk編碼后的字節,形成的字符串:�����й��� String gbkDecodedByUTF = new String(gbkEncode,"utf-8"); System.out.println(gbkDecodedByUTF); // 對上一步的字符串 �����й��� 用utf-8編碼后的字節 //[-17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -48, -71, -17, -65, -67, -17, -65, -67, -17, -65, -67] byte[] gbkDecodedByUTF_EncodeByUTF = gbkDecodedByUTF.getBytes("utf-8"); System.out.println(Arrays.toString(gbkDecodedByUTF_EncodeByUTF)); //不出意外,這里解碼應該是 �����й��� //所以,問題的源頭出在 我們用 utf-8 gbk編碼的字節上,形成了錯誤的字符。
// 用utf-8 對錯誤的字符串編碼和解碼 都是錯誤的字符串,是一致的(字符串沒變),形成錯誤的字符串的原因是 utf-8 和GBK解碼的不兼容 String gbkDecodedByUTF_EncodeByUTF_DecodeByUTF = new String(gbkEncode,"utf-8"); System.out.println(gbkDecodedByUTF_EncodeByUTF_DecodeByUTF); } }
Print:
gbkEncode[-50, -46, -54, -57, -42, -48, -71, -6, -56, -53]
utfEncode[-26, -120, -111, -26, -104, -81, -28, -72, -83, -27, -101, -67, -28, -70, -70]
�������
[-17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67, -48, -71, -17, -65, -67, -17, -65, -67, -17, -65, -67]
�������
utf-8編碼格式:
ref:
字符編碼筆記:ASCII,Unicode 和 UTF-8