MFC/C++用Char*(Byte*)讀取檔案utf-8的檔案亂碼----解碼
阿新 • • 發佈:2020-07-15
//utf8Str:以位元組(char*或者Byte*)讀取中文的字串(亂碼) CString UTF8toUnicode(const char* utf8Str) { UINT theLength=strlen(utf8Str); return UTF8toUnicode(utf8Str,theLength); } CString UTF8toUnicode(const char* utf8Str,UINT length) { CString unicodeStr; unicodeStr=_T(""); if (!utf8Str)return unicodeStr; if (length==0) return unicodeStr; WCHAR chr=0;//一箇中文字元 for (UINT i=0;i<length;) { //UTF8的三種中文格式 if ((0x80&utf8Str[i])==0) //只佔用一個位元組 { chr=utf8Str[i]; i++; } else if((0xE0&utf8Str[i])==0xC0) //佔用兩個位元組 { chr =(utf8Str[i+0]&0x3F)<<6; chr|=(utf8Str[i+1]&0x3F); i+=2; } else if((0xF0&utf8Str[i])==0xE0)//佔用三個位元組 { chr =(utf8Str[i+0]&0x1F)<<12; chr|=(utf8Str[i+1]&0x3F)<<6; chr|=(utf8Str[i+2]&0x3F); i+=3; } else { return unicodeStr; } unicodeStr.AppendChar(chr); } return unicodeStr; }
UTF-8百度百科仔細研究!!!