C++字串處理
阿新 • • 發佈:2019-01-05
標題:常見字串處理程式碼示例
測試環境:boost 1.55、boost 1.57
注意:部份程式碼依賴Windows API
[1]刪除指定前後綴的字元
#include <boost/algorithm/string.hpp>
...
boost::trim_if(vecRec[0], boost::is_any_of("\" \n\r\t'"));
[2]分割字串
#include <boost/algorithm/string/split.hpp>
std::vector<std::string> vecRec;
boost::split(vecRec, vecDst[i], boost::is_any_of(":"));
[3]字元替換
x字元替換為y。
std::replace( s.begin(), s.end(),'x','y');
標頭檔案
#pragma once #include <string> #include <vector> #include <map> namespace StringHelper { void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret); void Str2Map(const std::string strSrc, std::map<std::string, std::string> &mapKeyValue); std::string UTF8ToGBK(const std::string &unicode); std::string GBKToUTF8(const std::string& gbk); std::wstring s2ws_UTF8ToGBK(std::string sUTF8); std::wstring s2ws_gbk(const std::string& s); std::string ws2s_gbk(const std::wstring& s); void string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst ); std::string encodeURI(std::string strSrc); std::string delHeapMemory(char *p); };
實現檔案
#include "StringHelper.h" #include <Windows.h> #include <algorithm> #include <boost/regex.hpp> #include <boost/algorithm/string.hpp> #include <locale> #include <codecvt> #include <sstream> using namespace std; /* VC++ 2008 SP1提供了個編譯選項,可以預設就構造出UTF8的string。 #pragma execution_character_set("utf-8") 另外C++11定義了u8,類似L std::string nstr = u8"123,我是誰?"; 但VS2010SP1並不支援。 */ namespace StringHelper { void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret) { size_t last = 0; size_t index=s.find_first_of(delim,last); while (index!=std::string::npos) { ret.push_back(s.substr(last,index-last)); last=index+1; index=s.find_first_of(delim,last); if (index-last>0) { ret.push_back(s.substr(last,index-last)); } } }//end func void Str2Map( const std::string strSrc, std::map<std::string, std::string> &mapKeyValue ) { std::vector<std::string> line; boost::split(line, strSrc, boost::is_any_of(";")); for (unsigned int i = 0; i < line.size(); i++) { std::vector<std::string> vecT; boost::split(vecT, line[i], boost::is_any_of("=")); if (vecT.size() == 2) { vecT[0] = vecT[0].substr(vecT[0].find_first_not_of(' '), vecT[0].find_last_not_of(' ')); mapKeyValue[vecT[0]] = vecT[1]; }//end if }//end for } wstring s2ws_UTF8ToGBK(string sUTF8) { std::wstring_convert<std::codecvt_utf8<wchar_t>> conv; std::wstring wsR = conv.from_bytes(sUTF8); std::locale::global(std::locale("Chinese-simplified")); wstringstream wss; wss<<wsR; return wss.str(); } std::string UTF8ToGBK( const std::string &strUTF8 ) { try { std::wstring gbk = s2ws_UTF8ToGBK(strUTF8); std::string sGBK = ws2s_gbk(gbk); return sGBK; } catch (...) { } return ""; } std::string GBKToUTF8(const std::string& gbk) { std::string strOutUTF8 = ""; WCHAR * str1; int n = MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, NULL, 0); str1 = new WCHAR[n]; MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, str1, n); n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL); char * str2 = new char[n]; WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL); strOutUTF8 = str2; delete[]str1; str1 = NULL; delete[]str2; str2 = NULL; return strOutUTF8; } std::wstring s2ws_gbk( const std::string& s ) { int len; int slength = (int)s.length() + 1; len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0); wchar_t* buf = new wchar_t[len]; MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len); std::wstring r(buf); delete[] buf; return r; } std::string ws2s_gbk(const std::wstring& ws) { std::string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C"; setlocale(LC_ALL, "chs"); const wchar_t* _Source = ws.c_str(); size_t _Dsize = 2 * ws.size() + 1; char *_Dest = new char[_Dsize]; memset(_Dest,0,_Dsize); wcstombs(_Dest,_Source,_Dsize); std::string result = _Dest; delete []_Dest; setlocale(LC_ALL, curLocale.c_str()); return result; } void string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst ) { std::wstring::size_type pos = 0; std::wstring::size_type srclen = strsrc.size(); std::wstring::size_type dstlen = strdst.size(); while( (pos=strBig.find(strsrc, pos)) != std::string::npos ) { strBig.replace( pos, srclen, strdst ); pos += dstlen; } } std::string delHeapMemory( char *p ) { std::string temp = p; delete p; return temp; } BOOL UrlEncode(const char* szSrc, char* pBuf, int cbBufLen, BOOL bUpperCase) { if (szSrc == NULL || pBuf == NULL || cbBufLen <= 0) return FALSE; size_t len_ascii = strlen(szSrc); if (len_ascii == 0) { pBuf[0] = 0; return TRUE; } //先轉換到UTF-8 char baseChar = bUpperCase ? 'A' : 'a'; int cchWideChar = MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, NULL, 0); LPWSTR pUnicode = (LPWSTR)malloc((cchWideChar + 1) * sizeof(WCHAR)); if (pUnicode == NULL) return FALSE; MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, pUnicode, cchWideChar + 1); int cbUTF8 = WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, NULL, 0, NULL, NULL); LPSTR pUTF8 = (LPSTR)malloc((cbUTF8 + 1) * sizeof(CHAR)); if (pUTF8 == NULL) { free(pUnicode); return FALSE; } WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, pUTF8, cbUTF8 + 1, NULL, NULL); pUTF8[cbUTF8] = '\0'; unsigned char c; int cbDest = 0; //累加 unsigned char *pSrc = (unsigned char*)pUTF8; unsigned char *pDest = (unsigned char*)pBuf; while (*pSrc && cbDest < cbBufLen - 1) { c = *pSrc; if (isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '~') { *pDest = c; ++pDest; ++cbDest; } else if (c == ' ') { *pDest = '+'; ++pDest; ++cbDest; } else { //檢查緩衝區大小是否夠用? if (cbDest + 3 > cbBufLen - 1) break; pDest[0] = '%'; pDest[1] = (c >= 0xA0) ? ((c >> 4) - 10 + baseChar) : ((c >> 4) + '0'); pDest[2] = ((c & 0xF) >= 0xA) ? ((c & 0xF) - 10 + baseChar) : ((c & 0xF) + '0'); pDest += 3; cbDest += 3; } ++pSrc; } //null-terminator *pDest = '\0'; free(pUnicode); free(pUTF8); return TRUE; } string encodeURI(string strSrc) { char *buf[512]; memset(buf, 0, sizeof(buf)); UrlEncode(strSrc.c_str(), (char*)(buf), sizeof(buf), TRUE); string dst((char *)buf); return dst; } };
字串轉大寫(或小寫)
http://www.cnblogs.com/mmix2009/archive/2013/07/19/3200150.html