Curl 模擬登陸網站得到返回資料
阿新 • • 發佈:2018-12-20
之前有一個佈置的任務是要用程式碼模擬登陸 https://www.biaojiquxiao.com/ 網站得到返回的資訊
首先是要輸入一個手機號碼,當手機號已經諮詢過時返回資料,當這個手機號碼沒有查詢過資訊時要進行一個文字驗證:
將點選的位置傳送回去進行驗證
當我進行抓取圖片的時候發現這張圖片是後臺用程式碼隨機生成的jfif格式的圖片
使用curl對其進行抓取後存入本地的圖片檔案中
再上傳到騰訊api進行圖文識別
這裡還有一處沒有辦法解決的問題:
生成的圖片文字是隨機的,所以可能會有多個文字重疊的情況,這樣的話,是無法識別出該文字的
所以當我們沒有匹配到相關文字時,再次爬取新的圖片再做驗證就可以了
匹配到相關文字位置資訊後傳送給後臺
返回的資料是0
所以應該有相關Cookie和Session進行記錄用來驗證資訊
之後我在登陸的時候就把相關Cookie儲存下來再發送就返回的是1了
該手機號通過了驗證就可以再次訪問該網站就可以得到相關資訊了
上程式碼:
<?php namespace Home\Controller; use Think\Controller; use GuzzleHttp\Client; use GuzzleHttp\Cookie\CookieJar; use GuzzleHttp\Exception\RequestException; class IndexController extends Controller { protected $client; protected $JSESSIONID; protected $number; function _initialize() { Vendor('autoload'); $this->client = new Client([ 'base_uri' => 'https://segmentfault.com', 'headers' => [ 'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36', 'Host' => 'www.biaojiquxiao.com', ], 'cookies' => true, 'http_errors' => true, ]); } function index(){ $this->number = I('get.number'); $cookie = $this->checkCode(); //訪問www.**.com/checkCode獲取cookie $this->JSESSIONID =$cookie['Value']; while (true){ $img = $this->getImg(); //www.**.com/code 獲取圖片 $data = $this->getPoint($img); //獲取座標 if($data){ //獲取成功跳出 break; } } $data['number'] = $this->number; if($this->checkCodeExc($cookie,$data)=='1'){ //圖片驗證成功 1 失敗 0 if($this->query($cookie)){ $res= $this->status($cookie); $code = 0; while (true){ if(json_decode($res)->status != '0'){ //返回結果為空 重新獲取 $code += 1; break; }else{ $res= $this->status($cookie); continue; } } } } else{ $res = "圖片驗證失敗"; $code = 0; } $message = '傳送的資料x,y,number 分別是'.implode(',',$data); $result = array( //最終獲取的結果 'code' => $code , 'message' => $message , 'data' => $res ); exit(json_encode($result, JSON_UNESCAPED_UNICODE)); } function query($cookie){ $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "https://www.biaojiquxiao.com/query/".$this->number, CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => "", CURLOPT_MAXREDIRS => 10, CURLOPT_TIMEOUT => 30, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => "GET", CURLOPT_HTTPHEADER => array( "Content-Type: application/x-www-form-urlencoded", "Cookie: JSESSIONID=".$cookie, "cache-control: no-cache" ), )); $response = curl_exec($curl); $err = curl_error($curl); curl_close($curl); if ($err) { return "cURL Error #:" . $err; } else { //echo $response; return true; } } function status($cookie){ $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "https://www.biaojiquxiao.com/status/".$this->number, CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => "", CURLOPT_MAXREDIRS => 10, CURLOPT_TIMEOUT => 30, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => "GET", CURLOPT_HTTPHEADER => array( "Content-Type: application/x-www-form-urlencoded", "Cookie: JSESSIONID=".$cookie, "cache-control: no-cache" ), )); $response = curl_exec($curl); $err = curl_error($curl); curl_close($curl); if ($err) { return "cURL Error #:" . $err; } else { return (string) $response; } } function checkCodeExc($cookie,$data){ $curl = curl_init(); curl_setopt_array($curl, array( CURLOPT_URL => "https://www.biaojiquxiao.com/checkCodeExc", CURLOPT_RETURNTRANSFER => true, CURLOPT_ENCODING => "", CURLOPT_MAXREDIRS => 10, CURLOPT_TIMEOUT => 30, CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1, CURLOPT_CUSTOMREQUEST => "POST", CURLOPT_POSTFIELDS => "x=".$data['x']."&y=".$data['y']."&number=".$data['number'], CURLOPT_HTTPHEADER => array( "Content-Type: application/x-www-form-urlencoded", "Cookie: JSESSIONID=".$cookie, "Host: www.biaojiquxiao.com", "Origin: https://www.biaojiquxiao.com", "Referer: https://www.biaojiquxiao.com/checkCode/17633465362", "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36", "cache-control: no-cache" ), )); $response = curl_exec($curl); $err = curl_error($curl); curl_close($curl); if ($err) { return "cURL Error #:" . $err; } else { // var_dump($response); return $response; } } function checkCode(){ $url ='https://www.biaojiquxiao.com/checkCode/'.$this->number; $res = $this->client->request('GET',$url); $this->connTime = time(); $config = $this->client->getConfig(); $cookie = $config['cookies']->toArray()[0]; return $cookie['Value']; } public function getImg() { $url = "https://www.biaojiquxiao.com/code"; $path = constant('PUBLIC') . 'images/tempImg'; if (!is_dir($path)) { mkdir('./' . $path, 0777, true); } $imgName = substr(str_shuffle('abcdefghijkmnpqrstwxyz23456789'), rand(0,15), rand(3,10)) . '.jpg'; $path = $path . '/' . $imgName; $response = $this->client->get($url, array( 'headers' => [ 'Referer' => 'https://www.biaojiquxiao.com/checkCode/'.$this->number, 'Host' => 'www.biaojiquxiao.com', 'Accept' => 'image/webp,image/apng,image/*,*/*;q=0.8', 'Accept-Encoding' => ' gzip, deflate, br', 'Cookie' => "JSESSIONID=".$this->JSESSIONID, ], ) ); $content = $response->getBody().stream; $myfile = fopen($path, "w"); fwrite($myfile, $content); fclose($myfile); if (file_exists($path)) { return $path; } else { return false; } } function getPoint($path){ //$path = constant('PUBLIC').'images/test/code.png'; $img = base64_encode(file_get_contents($path)); //sleep(1); $data = array( 'appid' => "", 'bucket' => '', 'image' => $img ); $content = json_encode($data); $url = 'http://recognition.image.myqcloud.com/ocr/handwriting'; $authorization = $this->getAuthorization(); $response = $this->posturl($url, $content, $authorization); if ($response['code'] != 0) { //騰訊api請求失敗 // var_dump($response['message']); return $response['message']; } else { $result = array(); if($response['data']['items'][0]['words'][4]['confidence'] < 0.5){ $result = null; }else{ $aimWord = $response['data']['items'][0]['words'][4]['character']; for ($i = 1; $i <= 4; $i++) { if ($response['data']['items'][$i]['itemstring'] == $aimWord) { $result['x'] = $response['data']['items'][$i]['itemcoord']['x']+15; $result['y'] = $response['data']['items'][$i]['itemcoord']['y']+15; unset($response); break; } } } if (file_exists($path)){ unlink($path); //刪除臨時圖片 } if(empty($result)){ return false; }else{ return $result; } } } //有效簽名串 function getAuthorization() { $path = constant('PUBLIC') . 'temp/authorization.xml'; $xml = simplexml_load_file($path); //如果簽名沒有過期直接返回 if ($xml->overtime < time()) { return $xml->content; } $appid = ""; $bucket = ""; $secret_id = ""; $secret_key = ""; $expired = time() + 2592000; $current = time(); $rdm = rand(); $srcStr = 'a=' . $appid . '&b=' . $bucket . '&k=' . $secret_id . '&e=' . $expired . '&t=' . $current . '&r=' . $rdm . '&f='; $signStr = base64_encode(hash_hmac('SHA1', $srcStr, $secret_key, true) . $srcStr); //儲存簽名 $overtime = $expired - 2000; $str = "<?xml version=\"1.0\" encoding=\"utf8\"?><authorization><overtime>$overtime</overtime><content>$signStr</content></authorization>"; file_put_contents($path, $str); return $signStr; } //呼叫騰訊介面 返回陣列 $data json function posturl($url, $data, $authorization) { $headerArray = array("Content-type:application/json;charset='utf-8'", "Accept:application/json", " Host:recognition.image.myqcloud.com", "Authorization:" . $authorization); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); curl_setopt($curl, CURLOPT_POST, 1); curl_setopt($curl, CURLOPT_POSTFIELDS, $data); curl_setopt($curl, CURLOPT_HTTPHEADER, $headerArray); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); $output = curl_exec($curl); curl_close($curl); return json_decode($output, true); } }