-- 字符串保存到table function stringToTable(s) local tb = {} --[[ UTF8的編碼規則: 1. 字符的第一個字節范圍: 0x00—0x7F(0-127),或者 0xC2—0xF4(194-244); UTF8 是兼容 ascii 的,所以 0~127 就和 ascii 完全一致 2. 0xC0, 0xC1,0xF5—0xFF(192, 193 和 245-255)不會出現在UTF8編碼中 3. 0x80—0xBF(128-191)只會出現在第二個及隨后的編碼中(針對多字節編碼,如漢字) ]] for utfChar in string.gmatch(s, "[%z\1-\127\194-\244][\128-\191]*") do table.insert(tb, utfChar) end return tb end -- 獲取字符串長度,英文字符為一個單位長, 中文字符為2個單位長 function getUTFLen(s) local sTable = stringToTable(s) local len = 0 local charLen = 0 for i=1,#sTable do local utfCharLen = string.len(sTable[i]) -- 長度大於1可認為為中文 if utfCharLen > 1 then charLen = 2 --將charLen設為1,可獲取中文,英文的字符個數,以下舉例,將其方法命名為:function getNewUTFLen(s) else charLen = 1 end -- charLen = 1 len = len + charLen end return len end -- 獲取字符串長度,不管中文,英文一律一個字符為1單位長 function getNewUTFLen(s) local sTable = stringToTable(s) local len = 0 local charLen = 0 for i = 1, #sTable do local utfCharLen = string.len(sTable[i]) if utfCharLen > 1 then charLen = 1 -- 修改為1 else charLen = 1 end len = len + charLen end return len end -- 獲取中文,英文字符個數 local str = "一二三@#[]【】789&*():" print(getNewUTFLen(str)) -- 17 local s = "①貳aA#}。" local sTab = stringToTable(s) for i = 1, #sTab do local outStr = string.format("sTab index:%d,str:\"%s\",Len:%s,byte:%d", i, sTab[i], string.len(sTab[i]), string.byte(sTab[i]) ) print(outStr) end print("#sTab = " .. #sTab) print("getUTFLen = " .. getUTFLen(s))
輸出結果:
sTab index:1,str:"①",Len:3,byte:226 sTab index:2,str:"貳",Len:3,byte:232 sTab index:3,str:"a",Len:1,byte:97 sTab index:4,str:"A",Len:1,byte:65 sTab index:5,str:"#",Len:1,byte:35 sTab index:6,str:"}",Len:1,byte:125 sTab index:7,str:"。",Len:3,byte:227 #sTab = 7 getUTFLen = 10