GBK和UTF8之間的轉換 C語言
阿新 • • 發佈:2019-02-02
GBK和UTF8之間的轉換可以使用MultiByteToWideChar和WideCharToMultiByte兩個API,方法是先把它們轉換為中間編碼Unicode,再轉換為對應的編碼即可。
#include <stdio.h>
#include <windows.h>//GBK編碼轉換到UTF8編碼int GBKToUTF8(unsigned char* lpGBKStr,unsigned char* lpUTF8Str,int nUTF8StrLen)
{
wchar_t * lpUnicodeStr = NULL;
int nRetLen =0;
nRetLen = ::MultiByteToWideChar(CP_ACP,0,(char*)lpGBKStr,-1,NULL,NULL); //獲取轉換到Unicode編碼後所需要的字元空間長度 lpUnicodeStr =new WCHAR[nRetLen +1]; //為Unicode字串空間 nRetLen = ::MultiByteToWideChar(CP_ACP,0,(char*)lpGBKStr,-1,lpUnicodeStr,nRetLen); //轉換到Unicode編碼if(!nRetLen)
nRetLen = ::WideCharToMultiByte(CP_UTF8,0,lpUnicodeStr,-1,NULL,0,NULL,NULL); //獲取轉換到UTF8編碼後所需要的字元空間長度if(!lpUTF8Str) //輸出緩衝區為空則返回轉換後需要的空間大小 {
if(lpUnicodeStr)
delete []lpUnicodeStr;
return nRetLen;
}
if(nUTF8StrLen < nRetLen) //如果輸出緩衝區長度不夠則退出
if(lpUnicodeStr)
delete []lpUnicodeStr;
return0;
}
nRetLen = ::WideCharToMultiByte(CP_UTF8,0,lpUnicodeStr,-1,(char*)lpUTF8Str,nUTF8StrLen,NULL,NULL); //轉換到UTF8編碼if(lpUnicodeStr)
delete []lpUnicodeStr;
return nRetLen;
}
// UTF8編碼轉換到GBK編碼int UTF8ToGBK(unsigned char* lpUTF8Str,unsigned char* lpGBKStr,int nGBKStrLen)
{
wchar_t *lpUnicodeStr = NULL;
int nRetLen =0;
if(!lpUTF8Str) //如果UTF8字串為NULL則出錯退出return0;
nRetLen = ::MultiByteToWideChar(CP_UTF8,0,(char*)lpUTF8Str,-1,NULL,NULL); //獲取轉換到Unicode編碼後所需要的字元空間長度 lpUnicodeStr =new WCHAR[nRetLen +1]; //為Unicode字串空間 nRetLen = ::MultiByteToWideChar(CP_UTF8,0,(char*)lpUTF8Str,-1,lpUnicodeStr,nRetLen); //轉換到Unicode編碼if(!nRetLen) //轉換失敗則出錯退出return0;
nRetLen = ::WideCharToMultiByte(CP_ACP,0,lpUnicodeStr,-1,NULL,NULL,NULL,NULL); //獲取轉換到GBK編碼後所需要的字元空間長度if(!lpGBKStr) //輸出緩衝區為空則返回轉換後需要的空間大小 {
if(lpUnicodeStr)
delete []lpUnicodeStr;
return nRetLen;
}
if(nGBKStrLen < nRetLen) //如果輸出緩衝區長度不夠則退出 {
if(lpUnicodeStr)
delete []lpUnicodeStr;
return0;
}
nRetLen = ::WideCharToMultiByte(CP_ACP,0,lpUnicodeStr,-1,(char*)lpGBKStr,nRetLen,NULL,NULL); //轉換到GBK編碼if(lpUnicodeStr)
delete []lpUnicodeStr;
return nRetLen;
}
//使用這兩個函式的例子int main()
{
char cGBKStr[] ="我是中國人!";
char* lpGBKStr = NULL;
char* lpUTF8Str = NULL;
FILE * fp = NULL;
int nRetLen =0;
nRetLen = GBKToUTF8((unsigned char*)cGBKStr,NULL,NULL);
printf("轉換後的字串需要的空間長度為:%d ",nRetLen);
lpUTF8Str =newchar[nRetLen +1];
nRetLen = GBKToUTF8((unsigned char*)cGBKStr,(unsigned char*)lpUTF8Str,nRetLen);
if(nRetLen)
{
printf("GBKToUTF8轉換成功!");
}
else
{
printf("GBKToUTF8轉換失敗!");
goto Ret0;
}
fp = fopen("C:\GBK轉UTF8.txt","wb"); //儲存到文字檔案 fwrite(lpUTF8Str,nRetLen,1,fp);
fclose(fp);
getchar(); //先去開啟那個文字檔案看看,單擊記事本的“檔案”-“另存為”選單,在對話方塊中看到編碼框變為了“UTF-8”說明轉換成功了
nRetLen = UTF8ToGBK((unsigned char*)lpUTF8Str,NULL,NULL); //再轉回來 printf("轉換後的字串需要的空間長度為:%d ",nRetLen);
lpGBKStr =newchar[nRetLen +1];
nRetLen = UTF8ToGBK((unsigned char*)lpUTF8Str,(unsigned char*)lpGBKStr,nRetLen);
if(nRetLen)
{
printf("UTF8ToGBK轉換成功! ");
}
else
{
printf("UTF8ToGBK轉換失敗! ");
goto Ret0;
}
fp = fopen("C:\UTF8轉GBK.txt","wb"); //儲存到文字檔案 fwrite(lpGBKStr,nRetLen,1,fp);
fclose(fp);
getchar(); //再去開啟文字檔案看看,發現編碼框又變為了“ANSI”說明轉換成功了
Ret0:
if(lpGBKStr)
delete []lpGBKStr;
if(lpUTF8Str)
delete []lpUTF8Str;
return0;
}
在網上看到的一些文章說,UTF8轉換為GBK的時候會有問題,特別是當UTF8字串中的漢字數為奇數時。關於這個問題我沒有去驗證過,而且我對UTF8和GB2312的編碼還不是很熟悉,呵呵,等以後有空的時候再去了解一下編碼吧。