php通過登入後的cookie以及使用者代理然後通過curl獲取網頁內容
阿新 • • 發佈:2018-12-15
function curl_get_https($url, $data=array(), $header=array(), $timeout=30){
$ch = curl_init();curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); // 跳過證書檢查
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false); // 從證書中檢查SSL加密演算法是否存在
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_POST, false);
//curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
$response = curl_exec($ch);
if($error=curl_error($ch)){
die($error);
}
curl_close($ch);
return $response;
}
//$list是通過資料庫查詢獲取的一個包含公司名稱的二維陣列
foreach($list as $key=>$val)
{
if($val['ok']==0)
{
$url="https://www.tianyancha.com/search?key=".urlencode($val['company']);
$header = array();
$header[] = "這裡是登入天眼查之後複製下來的cookie";
$header[]="User-Agent這裡是複製下來的使用者代理";
$arr=[];
$html = curl_get_https($url,$arr,$header);
$regex="/<span class=\"overflow-width over-hide vertical-bottom in-block\" style=\"max-width:500px;\">.*?<\/span>/im";
preg_match_all($regex, $html, $matches);
$phone=strip_tags($matches[0][0]);
$phones[]=$phone;
}
}