Jsoncpp 中文漢字unicode亂碼解決及標點問題解決
阿新 • • 發佈:2019-02-11
由於專案中用到了JSON資料結構,客戶端採用VC編寫,服務端資料介面採用PHP編寫,遇到了JSON中文Unicode編碼後,客戶端出現了亂碼的情形。
網上有一個方法比較好用,就是修改json_reader.cpp中的codePointToUTF8函式。
修改為以下:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) { JSONCPP_STRING result; // based on description from http://en.wikipedia.org/wiki/UTF-8 if (cp <= 0x7f) { result.resize(1); result[0] = static_cast<char>(cp); } else if (cp <= 0x7FF) { result.resize(2); result[1] = static_cast<char>(0x80 | (0x3f & cp)); result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6))); } else if (cp <= 0xFFFF) { if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D)) { wchar_t src[2] = { 0 }; char dest[5] = { 0 }; src[0] = static_cast<wchar_t>(cp); std::string curLocale = setlocale(LC_ALL, NULL); setlocale(LC_ALL, "chs"); wcstombs_s(NULL, dest, 5, src, 2); result = dest; setlocale(LC_ALL, curLocale.c_str()); } else { result.resize(3); result[2] = static_cast<char>(0x80 | (0x3f & cp)); //result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); //result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12))); result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6))); result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12))); } } else if (cp <= 0x10FFFF) { result.resize(4); result[3] = static_cast<char>(0x80 | (0x3f & cp)); result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12))); result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18))); } return result; }
經過以上修改,中文確定能顯示了,但是中文的標點符號仍為亂碼,於是查找了中文標點的UNicode編碼,只要在上面處理中文漢字的地方,加入標點編碼的處理即可。
句號 0x3002 。
問號 0xFF1F ?
歎號 0xFF01 !
逗號 0xFF0C ,
頓號 0x3001 、
分號 0xFF1B ;
冒號 0xFF1A :
引號 0x300C 「
0x300D 」
引號 0x300E 『
0x300F 』
引號 0x2018 ‘
0x2019 ’
引號 0x201C “
0x201D ”
括號 0xFF08 (
0xFF09 )
括號 0x3014 〔
0x3015 〕
括號 0x3010 【
0x3011 】
破折號 0x2014 —
省略號 0x2026 …
連線號 0x2013 –
間隔號 0xFF0E .
書名號 0x300A 《
0x300B 》
書名號 0x3008 〈
0x3009 〉
最終修改後的函式為:
static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) { JSONCPP_STRING result; // based on description from http://en.wikipedia.org/wiki/UTF-8 if (cp <= 0x7f) { result.resize(1); result[0] = static_cast<char>(cp); } else if (cp <= 0x7FF) { result.resize(2); result[1] = static_cast<char>(0x80 | (0x3f & cp)); result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6))); } else if (cp <= 0xFFFF) { if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009) { wchar_t src[2] = { 0 }; char dest[5] = { 0 }; src[0] = static_cast<wchar_t>(cp); std::string curLocale = setlocale(LC_ALL, NULL); setlocale(LC_ALL, "chs"); wcstombs_s(NULL, dest, 5, src, 2); result = dest; setlocale(LC_ALL, curLocale.c_str()); } else { result.resize(3); result[2] = static_cast<char>(0x80 | (0x3f & cp)); //result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); //result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12))); result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6))); result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12))); } } else if (cp <= 0x10FFFF) { result.resize(4); result[3] = static_cast<char>(0x80 | (0x3f & cp)); result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6))); result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12))); result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18))); } return result; }
即可完美解決中文亂碼問題。