程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
 程式師世界 >> 編程語言 >> 網頁編程 >> PHP編程 >> 關於PHP編程 >> curl采集 根據關鍵詞 獲取雅虎競價排名

curl采集 根據關鍵詞 獲取雅虎競價排名

編輯:關於PHP編程

之前寫過curl批處理采集數據,這裡貼上完整版本,代碼很簡單,廢話不說,上代碼,新手歡迎指教!!!

代碼只寫到 獲取到鏈接了,至於排名 後邊數組的鍵不就是排名喽。。。

  1 <?php
  2 /**
  3  * Based on yahoo access to data
  4  *
  5  * @author chujiu <[email protected]>
  6  * @copyright 2014.04.26 By chujiu
  7  * @version 0.2.1 2014.04.26
  8  */
  9 
 10 class DataCollectionRank {
 11 
 12     const   PAGE = 10;
 13     public  $path = '';
 14     public  $main = 91;
 15     
 16     // 添加curl句柄 返回資源
 17     private function _gather_data($keyword) {
 18         if(empty($keyword)) {
 19             return '';
 20         }
 21         $chs = array(); // 句柄
 22         $mh = curl_multi_init();
 23         for( $i=1; $i<=$this->main; $i+=self::PAGE ) {
 24             $url = 'http://search.yahoo.co.jp/search?p='.urlencode($keyword).'&tid=top_ga1_sa&ei=UTF-8&aq=-1&oq='.urlencode($keyword).'&pstart=1&fr=top_ga1_sa&b='.$i;
 25             $ch = curl_init();
 26             //設置選項
 27             curl_setopt_array($ch, array(
 28                 CURLOPT_URL => $url,
 29                 CURLOPT_HEADER => false,
 30                 CURLOPT_SSL_VERIFYPEER => false,
 31                 CURLOPT_RETURNTRANSFER => true,
 32                 CURLOPT_TIMEOUT => 30,
 33                 CURLOPT_AUTOREFERER => true
 34                 )
 35             );
 36             curl_multi_add_handle($mh, $ch); // 添加批處理句柄
 37             $chs['handle'][$i]['ch'] = $ch;
 38             $chs['handle'][$i]['url'] = $url;
 39         }
 40         $chs['mh'] = $mh;
 41         return $chs;
 42     }
 43     
 44     // 處理CURL請求
 45     public function exec_curl_get_data($keyword, $path) {
 46         $error = '';
 47         $this->path = $path;
 48         $chs = $this->_gather_data($keyword);
 49         if(empty($chs)) return ''; 
 50          // 執行批處理句柄
 51         $active = null;
 52         do {
 53            $mrc = curl_multi_exec($chs['mh'],$active);
 54            //$info = curl_multi_info_read($chs['mh']);
 55         } while ($active > 0);
 56         // 獲取數據
 57         $responses = array();
 58         foreach($chs['handle'] as $k=>$ch){ 
 59             if(curl_error($ch['ch'])){
 60                 $error .= "\n".'error提示:'.curl_error($ch['ch']).'-------URL:'.$ch['url'].'--------時間:'.date('Y-d-m H:i:s',time())."\n";
 61             } else {
 62                 $responses[$k]['data'] = curl_multi_getcontent( $ch['ch'] );
 63             }
 64             
 65             //curl_multi_info_read($mh);
 66             // close current handler 
 67             curl_multi_remove_handle($chs['mh'], $ch['ch']); 
 68             curl_close($ch['ch']);
 69         }
 70         //關閉curl 批處理
 71         curl_multi_close($chs['mh']);
 72         $str = '';
 73         if($error != '') {
 74             $this->_writeFile('get_rank_log.txt', $error, 'ab+');
 75         }
 76         foreach ($responses as $val) {
 77             if(!empty($val['data'])) {
 78                 $str.= $this->_get_keyword_link_preg($val['data']);
 79             }
 80         }
 81         $str = substr($str, 0 ,-1);
 82         $contents = explode('|', $str);
 83         return $contents;
 84     }
 85 
 86     // 過濾數據 獲取鏈接
 87     private function _get_keyword_link_preg ($str) {
 88         $res = '';
 89         if(empty($str)) {
 90             return '';
 91         }
 92         $arr = explode('<div id="web">', $str);
 93         $arr1 = explode('<div id="posS" class="spns">', $arr[1]);
 94         $arr2 = preg_replace('#<div id=\"pg\">[\s\S]+#', '', $arr1[0]);
 95         $arr3 = preg_replace('#<div id=\"rel\">[\s\S]+#', '', $arr2);
 96         $arr4 = preg_replace('#<em>[\s\S]+?</em>#', '', $arr3);
 97         if(preg_match_all('#href=\"(.*?)\">#',$arr4,$arr5) !== false) {
 98             foreach($arr5[1] as $val) {
 99                 $res.= urldecode($val).'|';
100             }
101         }
102         return $res;
103     }
104 
105     // 寫入文件
106     public function _writeFile($fileName, $data, $method="rb+", $iflock=1, $check=1, $chmod=1){
107         $check && @strpos($this->path.'/'.$fileName, '..')!==false && exit('403 Forbidden!');
108         @touch($this->path.'/'.$fileName);
109         $handle = @fopen($this->path.'/'.$fileName, $method);
110         if($iflock) {
111             @flock($handle,LOCK_EX);
112         }
113         $fw = @fwrite($handle,$data);
114         if($method == "rb+") ftruncate($handle, strlen($data));
115         fclose($handle);
116         $chmod && @chmod($this->path.'/'.$fileName,0777);
117     }
118 }
119 ?>

 

 1 function array_unique_fb($array){
 2     $temp = array();
 3     $data = array();
 4     foreach ($array as $value){
 5         $value = join(",",$value); //降維,也可以用implode,將一維數組轉換為用逗號連接的字符串
 6         $temp[] = $value;
 7     }
 8         $temp = array_flip(array_flip($temp));    //去掉重復的字符串,也就是重復的一維數組
 9     foreach ($temp as $k => $value){
10         $temp[$k] = explode(",",$value);   //再將拆開的數組重新組裝
11     }
12     foreach ($temp as $key => $value) {
13         $data[$key]['keyword'] = $value[0];
14         $data[$key]['domain'] = $value[1];
15     }
16     return $data;
17 }

 

 

  1. 上一頁:
  2. 下一頁:
Copyright © 程式師世界 All Rights Reserved