程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> C++ >> 關於C++ >> C++使用libpcre捕獲多行數據

C++使用libpcre捕獲多行數據

編輯:關於C++

由於初次嘗試用c++寫個簡單的爬蟲系統,不免用到正則,正則有多行匹配等特性,以前一直用php開發pcre_match加上正則修正符m多行,i忽略大小寫 s.代表任意字符,不要太方便,所以在使用cpp的時候遇到了困惑。

實際上pcre匹配數據是多次去匹配的,
我匹配到一個數據,然後對源數據進行偏移,然後匹配下一個,這樣子,知道最後一個

#include "pcre.h"
#include 
#include 
#include 
#include 
#include 

#define OVECCOUNT 256
using namespace std;
int tpcre();
int tmysql();

int main(int argc, char ** argv)
{
    tpcre();
    return 0;
}
int tmysql(){


    return 0;
}

int tpcre(){

     char pText[1024] = "\"21,537511285427,50005701,,shopsearch,1,shopcon,2950270077,,\"                               href=\"//detail.tmall.com/item.htm?id=537511285427&rn=54eb0efc1a7a49f5b93ed2051aa4fe9c&abbucket=0\" target=\"_blank\"  data-gold-url=\"/inshopse\"    href=\"//detail.tmall.com/item.htm?id=537511285421&rn=54eb0efc1a7a49f5b93ed2051aa4fe9c&abbucket=0\"href=\"//detail.tmall.com/item.htm?id=537511285422&rn=54eb0efc1a7a49f5b93ed2051aa4fe9c&abbucket=0\"href=\"//detail.tmall.com/item.htm?id=537511285423&rn=54eb0efc1a7a49f5b93ed2051aa4fe9c&abbucket=0\"href=\"//detail.tmall.com/item.htm?id=537511285424&rn=54eb0efc1a7a49f5b93ed2051aa4fe9c&abbucket=0\" ";
     /*
    string filename = "/Users/kang/Library/Developer/Xcode/DerivedData/TmailSpider-batmyukengwdwjcsejwqossttbhu/Build/Products/Debug/2.txt";
    fstream fp;
    fp.open(filename);
    char buf[256];
    string html;
    while (!fp.eof()) {
        fp.read(buf, 200);
        html.append(buf);
    }
    fp.close();
    char *pText = (char*)html.c_str();
    */
    std::cout << pText;
    //const char * pPattern = "(\\d+)\\w+";
    //const char *pPattern="href=\\\"(//detail.tmall.com/item.htm[^\\]+)\\";
    //const char * pPattern = "(//detail.tmall.com/item.htm[^\\\\]+)";
    const char * pPattern = "(//detail.tmall.com/item.htm\\?id=\\d+)";
    const char * pErrMsg = NULL;
    pcre * pPcre = NULL;
    int nOffset = -1;

    //PCRE_MULTILINE|PCRE_UTF8|PCRE_NO_AUTO_CAPTURE
    //pPcre = pcre_compile(pPattern, PCRE_DOTALL|PCRE_CASELESS|PCRE_MULTILINE, &pErrMsg, &nOffset, NULL);
    pPcre = pcre_compile(pPattern, PCRE_DOTALL|PCRE_CASELESS|PCRE_MULTILINE, &pErrMsg, &nOffset, NULL);

    if(pPcre == NULL){
        printf("pcre match error\n");
        return 1;
    }
    int ovector[OVECCOUNT];
    int matchFlag;

    int exec_offset = 0;
    int count = 0;
    do{
        matchFlag = (int)pcre_exec(pPcre, NULL, pText, (int)strlen(pText),exec_offset,0, ovector, OVECCOUNT);
        if(matchFlag > 0){
            ++count;
            printf("\nOK, has matched ...\n\n");
            for(int i=0;i<matchflag;i++){ char="" *strstart="pText+ovector[2*i];" int="" substrlen="ovector[2*i+1]" -="" ovector[2*i];="" matched[1024];="" memset(matched,="" 0,="" 1024);="" strncpy(matched,="" strstart,="" substrlen);="" printf(="" "match:$%d="%s\n&quot;,i,matched" );="" }="" exec_offset="ovector[1];" }while="" (matchflag=""> 0);
    cout << "count="<
        
   
  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved