1. 程式人生 > >Jsoncpp 中文漢字unicode亂碼解決及標點問題解決

Jsoncpp 中文漢字unicode亂碼解決及標點問題解決

由於專案中用到了JSON資料結構,客戶端採用VC編寫,服務端資料介面採用PHP編寫,遇到了JSON中文Unicode編碼後,客戶端出現了亂碼的情形。

網上有一個方法比較好用,就是修改json_reader.cpp中的codePointToUTF8函式。

修改為以下:

static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
  JSONCPP_STRING result;

  // based on description from http://en.wikipedia.org/wiki/UTF-8

  if (cp <= 0x7f) {
    result.resize(1);
    result[0] = static_cast<char>(cp);
  } else if (cp <= 0x7FF) {
    result.resize(2);
    result[1] = static_cast<char>(0x80 | (0x3f & cp));
    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
  } else if (cp <= 0xFFFF) {
	  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D))
    {
		wchar_t src[2] = { 0 };
		char dest[5] = { 0 };
		src[0] = static_cast<wchar_t>(cp);
		std::string curLocale = setlocale(LC_ALL, NULL);
		setlocale(LC_ALL, "chs");
		wcstombs_s(NULL, dest, 5, src, 2);
		result = dest;
		setlocale(LC_ALL, curLocale.c_str());
	}
	else
	{
		result.resize(3);
		result[2] = static_cast<char>(0x80 | (0x3f & cp));
		//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
		//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
		result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
		result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
	}
  } else if (cp <= 0x10FFFF) {
    result.resize(4);
    result[3] = static_cast<char>(0x80 | (0x3f & cp));
    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
  }

  return result;
}

經過以上修改,中文確定能顯示了,但是中文的標點符號仍為亂碼,於是查找了中文標點的UNicode編碼,只要在上面處理中文漢字的地方,加入標點編碼的處理即可。

句號    0x3002  。   
問號    0xFF1F  ?   
歎號       0xFF01  !   
逗號    0xFF0C  ,
頓號    0x3001  、   
分號    0xFF1B  ;   
冒號    0xFF1A  :   
引號    0x300C  「   
      0x300D  」   
引號    0x300E  『   
      0x300F  』   
引號    0x2018  ‘   
      0x2019  ’   
引號    0x201C  “   
      0x201D  ”   
括號    0xFF08  (   
      0xFF09  )   
括號    0x3014  〔   
      0x3015  〕   
括號    0x3010  【   
      0x3011  】   
破折號      0x2014  —   
省略號      0x2026  …   
連線號      0x2013  –   
間隔號      0xFF0E  .   
書名號      0x300A  《   
       0x300B  》   
書名號      0x3008  〈   
       0x3009  〉

最終修改後的函式為:

static inline JSONCPP_STRING codePointToUTF8(unsigned int cp) {
  JSONCPP_STRING result;

  // based on description from http://en.wikipedia.org/wiki/UTF-8

  if (cp <= 0x7f) {
    result.resize(1);
    result[0] = static_cast<char>(cp);
  } else if (cp <= 0x7FF) {
    result.resize(2);
    result[1] = static_cast<char>(0x80 | (0x3f & cp));
    result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
  } else if (cp <= 0xFFFF) {
	  if ((cp >= 0x4E00 && cp <= 0x9FA5) || (cp >= 0xF900 && cp <= 0xFA2D) || cp == 0x3002 || cp == 0xFF1F || cp == 0xFF01 || cp == 0xFF0C || cp == 0x3001 || cp == 0xFF1B || cp == 0xFF1A || cp == 0x300C || cp == 0x300D || cp == 0x300E || cp == 0x300F || cp == 0x2018 || cp == 0x2019 || cp == 0x201C || cp == 0x201D || cp == 0xFF08 || cp == 0xFF09 || cp == 0x3014 || cp == 0x3015 || cp == 0x3010 || cp == 0x3011 || cp == 0x2014 || cp == 0x2026 || cp == 0x2013 || cp == 0xFF0E || cp == 0x300A || cp == 0x300B || cp == 0x3008 || cp == 0x3009)
    {
		wchar_t src[2] = { 0 };
		char dest[5] = { 0 };
		src[0] = static_cast<wchar_t>(cp);
		std::string curLocale = setlocale(LC_ALL, NULL);
		setlocale(LC_ALL, "chs");
		wcstombs_s(NULL, dest, 5, src, 2);
		result = dest;
		setlocale(LC_ALL, curLocale.c_str());
	}
	else
	{
		result.resize(3);
		result[2] = static_cast<char>(0x80 | (0x3f & cp));
		//result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
		//result[0] = static_cast<char>(0xE0 | (0xf & (cp >> 12)));
		result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
		result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
	}
  } else if (cp <= 0x10FFFF) {
    result.resize(4);
    result[3] = static_cast<char>(0x80 | (0x3f & cp));
    result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
    result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
    result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
  }

  return result;
}


即可完美解決中文亂碼問題。