linux下c/c++例項之五正則表示式字串匹配
阿新 • • 發佈:2019-02-07
一、簡介
標準的C和C++不支援正則表示式,但有一些函式庫可以輔助C/C++程式設計師完成這一功能。正則表示式常用函式:編譯正則表示式 regcomp()、匹配正則表示式 regexec()、釋放正則表示式 regfree()。
二、詳解
1、程式碼
regcomp.cpp:
#include <iostream> #include <stdio.h> #include <stdlib.h> #include <string> #include <regex.h> #include <assert.h> #include <string.h> using namespace std; int find_first(string input, string pattern, string &out){ regex_t reg; regmatch_t pm[1]; int iret = 0; out = ""; /*編譯正則表示式*/ iret = regcomp(®, pattern.c_str(), REG_EXTENDED|REG_NEWLINE); if (iret != 0){ return -1; } iret = regexec(®, input.c_str(), 1, pm, 0); if (iret == REG_NOMATCH){ out = ""; iret = input.length(); }else if (iret != 0) { return -2; }else{ out = input.substr(pm[0].rm_so,pm[0].rm_eo-pm[0].rm_so); iret = pm[0].rm_eo; } regfree(®); return iret; } int find_first(char *buff, char *pattern, char *outdata){ regex_t reg; regmatch_t pm[1]; int status = 0; /*編譯正則表示式*/ status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //擴充套件正則表示式和識別換行符 if (status != 0){ //成功返回0 return -1; } status = regexec(®, buff, 1, pm, 0); if (status == REG_NOMATCH){ printf("no match!\n"); status = -1; } else if (status != 0) { return -2; } else if (status == 0) { int i, j; for (i = pm[0].rm_so, j = 0; i < pm[0].rm_eo; i++, j++) { outdata[j] = buff[i]; } outdata[i] = '\0'; } regfree(®); return status; } int find_all(char *buff, char *pattern, char result[][20]){ //返回匹配個數 regex_t reg; regmatch_t pm[1]; int status = 0; char * p = buff; int count = 0; /*編譯正則表示式*/ status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //擴充套件正則表示式和識別換行符 if (status != 0){ //成功返回0 return -1; } int i = 0, j, k; while((status = regexec(®, p, 1, pm, 0)) == 0) { for(j = pm[0].rm_so, k = 0; j < pm[0].rm_eo; j++) { result[i][k++] = p[j]; } result[i][k] = '\0'; i++; p += pm[0].rm_eo; count++; if (*p == '\0') break; } regfree(®); return count; } int print_file(const char *file_name, const char *pattern) { regex_t reg; regmatch_t pm[1]; int status = 0; int count = 0; FILE *fp = fopen(file_name, "r+"); assert(fp); char buff[1024] = {0}; char output[1024] = {0}; /*編譯正則表示式*/ status = regcomp(®, pattern, REG_EXTENDED|REG_NEWLINE); //擴充套件正則表示式和識別換行符 assert(status == 0); while(fgets(buff, sizeof(buff), fp)) { //迴圈讀取檔案 char * p = buff; while(1) { status = regexec(®, p, 1, pm, 0); if (status == 0) { //匹配成功 count++; strncpy(output, p + pm[0].rm_so, pm[0].rm_eo - pm[0].rm_so); cout<<"匹配:"<<output<<endl; p += pm[0].rm_eo; } else { break; } } } regfree(®); return count; } int main() { char result[20][20] = {0}; char buf[] = "1231a4568b789c234"; char pattern[] = "[0-9]{3}"; char resultfirst[20] = {0}; find_first(buf, pattern, resultfirst); cout<<strlen(resultfirst) << ":" << resultfirst<<endl; cout << "***************************" <<endl; int count = find_all(buf, pattern, result); for (int i = 0; i < count; i++) { cout<<"result:"<<"i="<<i+1<<"----"<<result[i]<<endl; } cout << "***************************" <<endl; count = print_file("test.txt", "[0-9]{5}"); cout<<"匹配的個數:"<<count<<endl; return 0; }
2、編譯執行
g++ -o regcomp regcomp.cpp
./regcomp
當前目錄下的測試檔案test.txt的內容:abc12345678
ddd55555555hhh
123456