1. 程式人生 > >C++字串處理

C++字串處理

標題:常見字串處理程式碼示例

測試環境:boost 1.55、boost 1.57

注意:部份程式碼依賴Windows API

[1]刪除指定前後綴的字元

#include <boost/algorithm/string.hpp>

...

boost::trim_if(vecRec[0], boost::is_any_of("\" \n\r\t'"));

[2]分割字串

#include <boost/algorithm/string/split.hpp>

std::vector<std::string> vecRec;
boost::split(vecRec, vecDst[i], boost::is_any_of(":"));

[3]字元替換

x字元替換為y。

std::replace( s.begin(), s.end(),'x','y');

標頭檔案

#pragma once

#include <string>
#include <vector>
#include <map>

namespace StringHelper
{
	void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret);  
	void Str2Map(const std::string strSrc, std::map<std::string, std::string> &mapKeyValue);
	std::string UTF8ToGBK(const std::string &unicode);
	std::string GBKToUTF8(const std::string& gbk);
	std::wstring s2ws_UTF8ToGBK(std::string sUTF8);

	std::wstring s2ws_gbk(const std::string& s);
	std::string ws2s_gbk(const std::wstring& s);
	void  string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst );

	std::string encodeURI(std::string strSrc);

	std::string delHeapMemory(char *p);
};

實現檔案
#include "StringHelper.h"

#include <Windows.h>

#include <algorithm>  

#include <boost/regex.hpp>  
#include <boost/algorithm/string.hpp>  

#include <locale>
#include <codecvt>

#include <sstream>

using namespace std;

/*
VC++ 2008 SP1提供了個編譯選項,可以預設就構造出UTF8的string。
#pragma execution_character_set("utf-8") 

另外C++11定義了u8,類似L
std::string nstr = u8"123,我是誰?";
但VS2010SP1並不支援。
*/

namespace StringHelper
{
	void SplitStr(const std::string& s, std::string& delim,std::vector<std::string> &ret)
	{  
		size_t last = 0;
		size_t index=s.find_first_of(delim,last);
		while (index!=std::string::npos)
		{
			ret.push_back(s.substr(last,index-last));
			last=index+1;
			index=s.find_first_of(delim,last);

			if (index-last>0)
			{
				ret.push_back(s.substr(last,index-last));
			}
		}
	}//end func

	void Str2Map( const std::string strSrc, std::map<std::string, std::string> &mapKeyValue )
	{ 
		std::vector<std::string> line;  
		boost::split(line, strSrc, boost::is_any_of(";"));  
		for (unsigned int i = 0; i < line.size(); i++)  
		{  
			std::vector<std::string> vecT;  
			boost::split(vecT, line[i], boost::is_any_of("="));  
			if (vecT.size() == 2)  
			{  
				vecT[0] = vecT[0].substr(vecT[0].find_first_not_of(' '), vecT[0].find_last_not_of(' '));  
				mapKeyValue[vecT[0]] = vecT[1];  
			}//end if  
		}//end for  
	}

	wstring s2ws_UTF8ToGBK(string sUTF8)
	{
		std::wstring_convert<std::codecvt_utf8<wchar_t>> conv;
		std::wstring wsR =  conv.from_bytes(sUTF8);
		std::locale::global(std::locale("Chinese-simplified"));

		wstringstream wss;
		wss<<wsR;
		return wss.str();
	}

	std::string UTF8ToGBK( const std::string &strUTF8 )
	{
		try
		{
			std::wstring gbk = s2ws_UTF8ToGBK(strUTF8);
			std::string sGBK = ws2s_gbk(gbk);
			return sGBK;
		}
		catch (...)
		{			
		}
		return "";
	}

	std::string GBKToUTF8(const std::string& gbk)  
	{  
		std::string strOutUTF8 = "";  
		WCHAR * str1;  
		int n = MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, NULL, 0);  
		str1 = new WCHAR[n];  
		MultiByteToWideChar(CP_ACP, 0, gbk.c_str(), -1, str1, n);  
		n = WideCharToMultiByte(CP_UTF8, 0, str1, -1, NULL, 0, NULL, NULL);  
		char * str2 = new char[n];  
		WideCharToMultiByte(CP_UTF8, 0, str1, -1, str2, n, NULL, NULL);  
		strOutUTF8 = str2;  
		delete[]str1;  
		str1 = NULL;  
		delete[]str2;  
		str2 = NULL;  
		return strOutUTF8;  
	}

	std::wstring s2ws_gbk( const std::string& s )
	{
		int len;  
		int slength = (int)s.length() + 1;  
		len = MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, 0, 0);  
		wchar_t* buf = new wchar_t[len];  
		MultiByteToWideChar(CP_ACP, 0, s.c_str(), slength, buf, len);  
		std::wstring r(buf);  
		delete[] buf;  
		return r;  
	}

	std::string ws2s_gbk(const std::wstring& ws)  
	{  
		std::string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C";
		setlocale(LC_ALL, "chs");
		const wchar_t* _Source = ws.c_str();
		size_t _Dsize = 2 * ws.size() + 1;
		char *_Dest = new char[_Dsize];
		memset(_Dest,0,_Dsize);
		wcstombs(_Dest,_Source,_Dsize);
		std::string result = _Dest;
		delete []_Dest;
		setlocale(LC_ALL, curLocale.c_str());
		return result;
	}

	void  string_replace( std::wstring &strBig, const std::wstring &strsrc, const std::wstring &strdst )  
	{  
		std::wstring::size_type pos = 0;  
		std::wstring::size_type srclen = strsrc.size();  
		std::wstring::size_type dstlen = strdst.size();  

		while( (pos=strBig.find(strsrc, pos)) != std::string::npos )  
		{  
			strBig.replace( pos, srclen, strdst );  
			pos += dstlen;  
		}  
	}

	std::string delHeapMemory( char *p )
	{
		std::string temp = p;
		delete p;
		return temp;
	}

	BOOL UrlEncode(const char* szSrc, char* pBuf, int cbBufLen, BOOL bUpperCase)  
	{  
		if (szSrc == NULL || pBuf == NULL || cbBufLen <= 0)  
			return FALSE;  

		size_t len_ascii = strlen(szSrc);  
		if (len_ascii == 0)  
		{  
			pBuf[0] = 0;  
			return TRUE;  
		}  

		//先轉換到UTF-8  
		char baseChar = bUpperCase ? 'A' : 'a';  
		int cchWideChar = MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, NULL, 0);  
		LPWSTR pUnicode = (LPWSTR)malloc((cchWideChar + 1) * sizeof(WCHAR));  
		if (pUnicode == NULL)  
			return FALSE;  
		MultiByteToWideChar(CP_ACP, 0, szSrc, len_ascii, pUnicode, cchWideChar + 1);  

		int cbUTF8 = WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, NULL, 0, NULL, NULL);  
		LPSTR pUTF8 = (LPSTR)malloc((cbUTF8 + 1) * sizeof(CHAR));  
		if (pUTF8 == NULL)  
		{  
			free(pUnicode);  
			return FALSE;  
		}  
		WideCharToMultiByte(CP_UTF8, 0, pUnicode, cchWideChar, pUTF8, cbUTF8 + 1, NULL, NULL);  
		pUTF8[cbUTF8] = '\0';  

		unsigned char c;  
		int cbDest = 0; //累加  
		unsigned char *pSrc = (unsigned char*)pUTF8;  
		unsigned char *pDest = (unsigned char*)pBuf;  
		while (*pSrc && cbDest < cbBufLen - 1)  
		{  
			c = *pSrc;  
			if (isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '~')  
			{  
				*pDest = c;  
				++pDest;  
				++cbDest;  
			}  
			else if (c == ' ')  
			{  
				*pDest = '+';  
				++pDest;  
				++cbDest;  
			}  
			else  
			{  
				//檢查緩衝區大小是否夠用?  
				if (cbDest + 3 > cbBufLen - 1)  
					break;  
				pDest[0] = '%';  
				pDest[1] = (c >= 0xA0) ? ((c >> 4) - 10 + baseChar) : ((c >> 4) + '0');  
				pDest[2] = ((c & 0xF) >= 0xA) ? ((c & 0xF) - 10 + baseChar) : ((c & 0xF) + '0');  
				pDest += 3;  
				cbDest += 3;  
			}  
			++pSrc;  
		}  
		//null-terminator  
		*pDest = '\0';  
		free(pUnicode);  
		free(pUTF8);  
		return TRUE;  
	}  

	string encodeURI(string strSrc)  
	{  
		char *buf[512];  
		memset(buf, 0, sizeof(buf));  
		UrlEncode(strSrc.c_str(), (char*)(buf), sizeof(buf), TRUE);  

		string dst((char *)buf);  
		return dst;  
	}  
};

字串轉大寫(或小寫)

http://www.cnblogs.com/mmix2009/archive/2013/07/19/3200150.html