linux下使用pcre库进行正则表达式

Posted 2022-12-03 sunny_ss12

tags:

篇首语：本文由小常识网(cha138.com)小编为大家整理，主要介绍了linux下使用pcre库进行正则表达式相关的知识，希望对你有一定的参考价值。

//test.cpp 
#include <string>                        
#include <vector>
#include <stdio.h>
#include <ace/OS.h>
using namespace std;
extern "C"
#include "pcre.h"

int MatchRegexes::CompilePcre(pcre *&pegexPcre, string &regex)

const char *error;
int erroffset;

pegexPcre = pcre_compile(
regex.c_str(),             /* the pattern */
0,                    /* default options */
&error,               /* for error message */
&erroffset,           /* for error offset */
NULL);                /* use default character tables */

if (pegexPcre == NULL)

ACE_ERROR ((LM_INFO, "%D %M [MatchRegexes.%l] (%t), Method=CompilePcre, regex = %s ,PCRE compilation failed at offset %d: %s.\\n",ReplaceRegex.c_str(),erroffset,error));
return -1;


return 0;



// 解析日志数据
int MatchRegexes::ResolvePcre(pcre *pegexPcre, string log, vector<string> &regexStrVT)

if(pegexPcre == NULL)

return -1;
 

int DataCount = 0;
// 取括号数目 
int info = pcre_fullinfo(pegexPcre, NULL, PCRE_INFO_CAPTURECOUNT, &DataCount);
if(info != 0)

ACE_ERROR ((LM_ERROR, "%D %M [MatchRegexes.%l] (%t), Method=ResolvePcre, Error (info = %d) from pcre_fullinfo(%d).\\n ",info,DataCount));
return -1;


// 为了防止数据越界
int sub_len = (DataCount +2) * 3; /* see "man pcre" for the exact formula */

int *ovector = new int[sub_len];
if(ovector == NULL)

ACE_ERROR ((LM_ERROR, "%D %M [MatchRegexes.%l] (%t), Method=ResolvePcre, ovector(%x) malloc failed.\\n",ovector));
return -1;


int rc = pcre_exec(
pegexPcre,                   /* the compiled pattern */
NULL,                 /* no extra data - we didn't study the pattern */
log.c_str(),       /* the subject string */
log.length(),       /* the length of the subject */
0,              /* start at offset 0 in the subject */
0,                    /* default options */
ovector,              /* output vector for substring information */
sub_len);           /* number of elements in the output vector */

if (rc < 0)
 
//no match or error
// 2016年3月11日10:09:02 合川项目不匹配而定数据量较大 此处打印日志太多  可能导致程序崩溃 注释掉
//ACE_ERROR ((LM_ERROR, "%D %M [MatchRegexes.%l] (%t), Method=ResolvePcre, pcre_exec  failed.\\n"));
delete []ovector;
return -1;


// 因为第一个数据是原始日志，不显示出来
for(int i = 1; i < rc; i++)

const char *szSubStart = log.c_str() + ovector[2*i];
int nSubLen = (int)(ovector[2*i + 1] - ovector[2*i]);
regexStrVT.push_back(string(szSubStart, nSubLen));


delete [] ovector;
return 0;



int main(int argc,char* argv[])

     pcre* RegexPcre;
     std::vector<std::string> RegexPcreStrVT;
     std::string regex = "(([^\\\\s]*)\\\\s*([^\\\\s]3\\\\s*\\\\d*\\\\s*\\\\d2\\\\:\\\\d2\\\\:\\\\d2)\\\\s*(([\\\\d]*)[^\\\\s]*)\\\\s*([^\\\\s]*)\\\\s*:\\\\s*SerialNum=([^\\\\s]*)\\\\s*GenTime=\\"([^\\"]*)\\"\\\\s*Mac=([^\\\\s]*)\\\\s*Tunn
    elIP=([^\\\\s]*)\\\\s*)";    
     std::string log = "<17> Sep 29 18:00:29 103053_VERACRUZ_DE_IGNACIO_DE_LA_LLAVE_VERACRUZ DEV_REPORT: SerialNum=6002F-0117K-10001-09SQZ-0K2J6 GenTime=\\"2015-09-29 18:00:29\\" Mac=68-ed-a4-06-7b-b3 TunnelIP=17.16.1.16";
     CompilePcre(&RegexPcre,regex);
     ResolvePcre(RegexPcre,log,RegexPcreStrVT);
     
     int i = 0;
     for(i = 0;i<RegexPcreStrVT.size();i++)
    
         printf("%s\\n",RegexPcreStrVT.at(i).c_str());

g++ -o main test.cpp -lpcre -lace

则运行后的结果显示：

<17> Sep 29 18:00:29 103053_VERACRUZ_DE_IGNACIO_DE_LA_LLAVE_VERACRUZ DEV_REPORT: SerialNum=6002F-0117K-10001-09SQZ-0K2J6 GenTime="2015-09-29 18:00:29" Mac=68-ed-a4-06-7b-b3 TunnelIP=17.16.1.16
<17>
Sep 29 18:00:29
103053_VERACRUZ_DE_IGNACIO_DE_LA_LLAVE_VERACRUZ
103053
DEV_REPORT
6002F-0117K-10001-09SQZ-0K2J6
2015-09-29 18:00:29
68-ed-a4-06-7b-b3
17.16.1.16

其中正则表达式一个括号代表一个匹配的字符串，最外面的（）代表整个字符串

参考：http://blog.chinaunix.net/uid-26575352-id-3517146.html 《在C语言中利用PCRE实现正则表达式》

如果需要使libpcre库支持utf8，则编译需要使用./configure --enable-utf8 --enable-unicode-properties使其支持utf-8

并且pcre_compile调用添加UTF8属性。比如：

pegexPcre = pcre_compile(
ReplaceRegex.c_str(), /* the pattern */
PCRE_UTF8, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL);

int rc = pcre_exec(
pegexPcre, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
Replacelog.c_str(), /* the subject string */
Replacelog.length(), /* the length of the subject */
0, /* start at offset 0 in the subject */
PCRE_NO_UTF8_CHECK, /* default options */
ovector, /* output vector for substring information */
sub_len); /* number of elements in the output vector */

PCRE_NO_UTF8_CHECK：表示不进行UTF8错误检查

以上是关于linux下使用pcre库进行正则表达式的主要内容，如果未能解决你的问题，请参考以下文章