這段代碼的作用是從一個文本文件里讀出字符串,轉換一下編碼,再寫入另一個文件
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <memory.h> 4 #include <iconv.h> 5 6 const int LENGTH = 80; 7 const int BUFSZ = LENGTH * 2; 8 9 int print_n_str(const char *str, long len) 10 { 11 char buf[BUFSZ]; 12 memset(buf, 0, BUFSZ); 13 memcpy(buf, str, len); 14 return printf("in:[%s](%ld)\n", buf, len); 15 } 16 17 int print_hex_str(const char *str, long len) 18 { 19 printf("out:["); 20 for (int i = 0; i < len; i++) 21 { 22 printf("%%%02x", (unsigned char)str[i]); 23 } 24 printf("](%ld)\n", len); 25 return 0; 26 } 27 28 int main() 29 { 30 iconv_t icv = iconv_open("GB18030", "UTF-8"); 31 FILE *in = fopen("in.txt", "r"); 32 FILE *out = fopen("out.txt", "w"); 33 34 char in_line[BUFSZ]; 35 char out_line[BUFSZ]; 36 size_t left = 0; 37 while (!feof(in)) 38 { 39 memset(in_line + left, 0, BUFSZ - left); 40 fread(in_line + left, 1, LENGTH, in); 41 left = strlen(in_line); 42 43 char *inbuf = in_line; 44 char *outbuf = out_line; 45 size_t inleft = left; 46 size_t outleft = BUFSZ; 47 iconv(icv, &inbuf, &inleft, &outbuf, &outleft); 48 49 size_t ilen = left - inleft; 50 print_n_str(in_line, ilen); 51 if (!inleft) 52 { 53 memmove(in_line, in_line + ilen, inleft); 54 } 55 left = inleft; 56 57 size_t olen = BUFSZ - outleft; 58 print_hex_str(out_line, olen); 59 fwrite(out_line, 1, olen, out); 60 } 61 fclose(in); 62 fclose(out); 63 iconv_close(icv); 64 return 0; 65 }
但實際上,這段代碼有好幾個坑
1 iconv_t iconv_open (const char* tocode, const char* fromcode); 2 3 size_t iconv (iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft); 4 5 nt iconv_close (iconv_t cd)
1.函數1,兩個參數是dest, src很容易無意中寫錯了,然后還發現不了
2.函數2,后面四個參數都是會變的,不要把原來的變量傻乎乎傳進去到時候就找不回來了
3.函數2,有些時候我們的inbuf里不一定是完整的utf8字符串,可能有一些是被截斷的“半個漢字”,此時iconv()會返回-1,並且會有errno,但是其實在應用層,這未必是錯誤,而是需要處理的情況。此時就需要inbytesleft參數,這個參數存的是剩下沒處理的數據。
4.函數2,outbyetsleft指的是outbuf剩余的空閑空間,不要把它當成輸出字符串的長度