程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> C語言 >> C++ >> 關於C++ >> c++爬蟲大眾點評數據

c++爬蟲大眾點評數據

編輯:關於C++
#include < curl / curl.h >

#include < iostream >

#include < stdio.h >

#include < string.h >

#include < pcre.h >

#define OVECCOUNT 30
/* should be a multiple of 3 */

#define EBUFLEN 128

#define BUFLEN 10240

using namespacestd;

size_t onWriteData(void * buffer, size_t size, size_t nmemb, void * str) {

    if (!str || !buffer) {

        return - 1;

    }

    string * result = (string * ) str;

    result - >append((char * ) buffer, size * nmemb);

    return nmemb;

}

//獲取頁面
int getWeb(string url, string & result)

{

    long code = 0;

    string htmlpage;

    CURL * curl = curl_easy_init();

    curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); //設置url
    curl_easy_setopt(curl, CURLOPT_POST, 0); //設置請求方法
    curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5."); //偽裝客戶端
    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &htmlpage); //設置接受返回結果字符串
    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, onWriteData); //設置處理方法
    curl_easy_perform(curl); //請求
    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code);

    if (code == 200)

    {

        cout << "request success" << endl;

        result = htmlpage;

        //cout<<htmlpage<<endl;

    }

    curl_easy_cleanup(curl);

    return code;

}

int main(int argc, char * *argv)

{

    pcre * re;

    constchar * error;

    int erroffset;

    int ovector[OVECCOUNT];

    int rc,
    i;

    string url = "http://www.dianping.com/search/category/212/10/g103";

    string html;

    getWeb(url, html);

    //char src[] =   " ";
    //char pattern[] = "(<a>.+?</a>)";

    constchar * src = html.c_str();

    char pattern[] = "(<li class=\"\"[\\s\\S]*?</li>)";

    printf("String : %s\n", src);

    printf("Pattern: \"%s\"\n", pattern);

    re = pcre_compile(pattern, 0, &error, &erroffset, NULL);

    if (re == NULL) {

        printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);

        return1;

    }

    char * p = (char * ) src;

    while ((rc = pcre_exec(re, NULL, p, strlen(p), 0, 0, ovector, OVECCOUNT)) != PCRE_ERROR_NOMATCH)

    {

        printf("\nOK, %d matched ...\n\n", rc);

        for (i = 0; i < rc - 1; i++)

        {

            char * substring_start = p + ovector[2 * i];

            int substring_length = ovector[2 * i + 1] - ovector[2 * i];

            char matched[10240];

            memset(matched, 0, 10240);

            strncpy(matched, substring_start, substring_length);

            printf("match:%s\n", matched);

        }

        p += ovector[1];

        if (!p)

        {

            break;

        }

    }

    pcre_free(re);

    return0;

}
  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved