{
關於GBK和UTF-8之間的轉換,很多初學者會很迷茫。
一般來說GBK和UTF-8是文字的編碼方式,其對應的內碼是不一樣的,所以GBK和UTF-8的轉換需要對內碼進行一一映射,然后進行轉換。
對於一般系統上的工程,一般使用libiconv即可,但是對於嵌入式或手機操作系統,libiconv顯得就有點龐大了。
在這里提供GBK和UTF8轉換以及全半角、大小寫轉換等函數,希望對手機開發的同學有所幫助,特別是在iOS上開發的同學。
具體全半角、簡繁體轉換使用方法見下代碼:
#include "strnormalize.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv)
{
str_normalize_init();
unsigned options = SNO_TO_LOWER | SNO_TO_HALF;
if (argc > 1) options = atoi(argv[1]);
char *buffer = (char *)malloc(65536);
memset(buffer, 0, 65536);
while (fgets(buffer, 65536, stdin))
{
str_normalize_utf8(buffer, options);
printf("%s", buffer);
}
free(buffer);
return 0;
}
UTF-8和GBK轉換使用方法如下:
#include "strnormalize.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
int main(int argc, char **argv)
{
str_normalize_init();
const char *utf8 = "我是utf-8字符!";
const char *gbk = "����GBK�ַ���";
uint32_t utf8_len = strlen(utf8);
uint32_t gbk_len = strlen(utf8);
uint32_t utf8buffer_len = utf8_len * 3 + 1;
uint32_t gbkbuffer_len = gbk_len * 2 + 1;
char *utf8buffer = (char *)malloc(utf8buffer_len);
char *gbkbuffer = (char *)malloc(gbkbuffer_len);
memset(utf8buffer, 0, utf8buffer_len);
memset(gbkbuffer, 0, gbkbuffer_len);
utf8_to_gbk(utf8, utf8_len, &gbkbuffer, &gbkbuffer_len);
gbk_to_utf8(gbk, gbk_len, &utf8buffer, &utf8buffer_len);
printf("utf8: %s<=>%d \t gbkbuffer: %s<=>%d\n", utf8, utf8_len, gbkbuffer, gbkbuffer_len);
printf("gbk: %s<=>%d \t utf8buffer: %s<=>%d\n", gbk, gbk_len, utf8buffer, utf8buffer_len);
free(utf8buffer);
free(gbkbuffer);
return 0;
}
/** * Copyright(c) 2012-2013, All Rights Reserved. * * @file strnormalize.h * @details Check GBK character you could do * code >= 0x8000 && _pGbk2Utf16[code - 0x8000] != 0 * @author cnangel * @version 1.0.0 * @date 2012/10/09 11:44:58 */ #ifndef __STRNORMALIZE_H__ #define __STRNORMALIZE_H__ #ifdef __cplusplus extern "C" { #endif #define SNO_TO_LOWER 1 #define SNO_TO_UPPER 2 #define SNO_TO_HALF 4 #define SNO_TO_SIMPLIFIED 8 void str_normalize_init(); void str_normalize_gbk(char *text, unsigned options); void str_normalize_utf8(char *text, unsigned options); int gbk_to_utf8(const char *from, unsigned int from_len, char **to, unsigned int *to_len); int utf8_to_gbk(const char *from, unsigned int from_len, char **to, unsigned int *to_len); #ifdef __cplusplus } #endif #endif /* __STRNORMALIZE_H__ */
}
