代碼地址
https://github.com/gongluck/Code-snippet/tree/master/cpp/code%20conversion
需求
編碼轉換在實際開發中經常遇到,通常是ANSI、Unicode和Utf-8之間相互轉換。實現也有很多種,有查表法、使用C++11、使用boost、使用系統API。C++11和boost幾乎可以實現一套代碼,在linux和windows都能使用,但實際會有很多坑,相當於代碼幾乎不改,但是要改一下系統環境。所以有一種實現就是判斷系統的版本,然后選擇不同的系統api進行編碼轉換。
實現
目前只實現Windows下的編碼轉換,以后需要在linux下使用編碼轉換再做補充。windows下的編碼轉換基本圍繞unicode做處理。例如ANSI->UTF-8,就是先將ANSI->unicode,再將unicode->UTF-8。
// convert.h
/*
* @Author: gongluck
* @Date: 2020-03-23 16:06:23
* @Last Modified by: gongluck
* @Last Modified time: 2020-03-23 16:09:30
*/
// Character encoding conversion
#pragma once
#include <string>
namespace gconvert
{
// ANSI->Unicode
int ansi2uni(const std::string& ansi, std::wstring& uni);
// Unicode->ANSI
int uni2ansi(const std::wstring& uni, std::string& ansi);
// UTF8->Unicode
int utf82uni(const std::string& utf8, std::wstring& uni);
// Unicode->UTF8
int uni2utf8(const std::wstring& uni, std::string& utf8);
// ANSI->UTF8
int ansi2utf8(const std::string& ansi, std::string& utf8);
// UTF8->ANSI
int utf82ansi(const std::string& utf8, std::string& ansi);
} // namespace gconvert
//convert.cpp
/*
* @Author: gongluck
* @Date: 2020-03-23 16:13:01
* @Last Modified by: gongluck
* @Last Modified time: 2020-03-23 16:34:50
*/
#include "convert.h"
#include <iostream>
#ifdef _WIN32
#include <windows.h>
#endif
namespace gconvert
{
#ifdef _WIN32
static int multi2uni(const std::string& multi, std::wstring& uni, UINT code)
{
auto len = MultiByteToWideChar(code, 0, multi.c_str(), -1, nullptr, 0);
if (len <= 0)
{
std::cerr << __FILE__ << " : " << __LINE__ << " : " << GetLastError() << std::endl;
return -1;
}
WCHAR* buf = new WCHAR[len];
if (buf == nullptr)
{
std::cerr << __FILE__ << " : " << __LINE__ << " : " << "can not new buf, size : " << len << std::endl;
return -2;
}
len = MultiByteToWideChar(code, 0, multi.c_str(), -1, buf, len);
uni.assign(buf);
delete[]buf;
buf = nullptr;
return len;
}
static int uni2multi(const std::wstring& uni, std::string& multi, UINT code)
{
auto len = WideCharToMultiByte(code, 0, uni.c_str(), -1, nullptr, 0, nullptr, nullptr);
if (len <= 0)
{
std::cerr << __FILE__ << " : " << __LINE__ << " : " << GetLastError() << std::endl;
return -1;
}
CHAR* buf = new CHAR[len];
if (buf == nullptr)
{
std::cerr << __FILE__ << " : " << __LINE__ << " : " << "can not new buf, size : " << len << std::endl;
return -2;
}
len = WideCharToMultiByte(code, 0, uni.c_str(), -1, buf, len, nullptr, nullptr);
multi.assign(buf);
delete[]buf;
buf = nullptr;
return len;
}
#endif
// ANSI->Unicode
int ansi2uni(const std::string& ansi, std::wstring& uni)
{
#ifdef _WIN32
return multi2uni(ansi, uni, CP_ACP);
#endif
return 0;
}
// Unicode->ANSI
int uni2ansi(const std::wstring &uni, std::string &ansi)
{
#ifdef _WIN32
return uni2multi(uni, ansi, CP_ACP);
#endif
return 0;
}
// UTF8->Unicode
int utf82uni(const std::string& utf8, std::wstring& uni)
{
#ifdef _WIN32
return multi2uni(utf8, uni, CP_UTF8);
#endif
return 0;
}
// Unicode->UTF8
int uni2utf8(const std::wstring& uni, std::string& utf8)
{
#ifdef _WIN32
return uni2multi(uni, utf8, CP_UTF8);
#endif
return 0;
}
// ANSI->UTF8
int ansi2utf8(const std::string &ansi, std::string &utf8)
{
std::wstring uni;
auto len = ansi2uni(ansi, uni);
if (len <= 0)
{
return -3;
}
return uni2utf8(uni, utf8);
}
// UTF8->ANSI
int utf82ansi(const std::string &utf8, std::string &ansi)
{
std::wstring uni;
auto len = utf82uni(utf8, uni);
if (len <= 0)
{
return -3;
}
return uni2ansi(uni, ansi);
}
} // namespace gconvert
//testcode
#include <iostream>
#include "../code conversion/convert.h"
int main()
{
std::string ansi = "你好,世界!";
std::wstring uni;
std::string utf8;
ret = gconvert::ansi2uni(ansi, uni);
ret = gconvert::ansi2utf8(ansi, utf8);
ret = gconvert::uni2ansi(uni, ansi);
ret = gconvert::uni2utf8(uni, utf8);
ret = gconvert::utf82ansi(utf8, ansi);
ret = gconvert::utf82uni(utf8, uni);
return 0;
}