1. 程式人生 > >Curl 模擬登陸網站得到返回資料

Curl 模擬登陸網站得到返回資料

之前有一個佈置的任務是要用程式碼模擬登陸 https://www.biaojiquxiao.com/ 網站得到返回的資訊

 首先是要輸入一個手機號碼,當手機號已經諮詢過時返回資料,當這個手機號碼沒有查詢過資訊時要進行一個文字驗證:

將點選的位置傳送回去進行驗證

當我進行抓取圖片的時候發現這張圖片是後臺用程式碼隨機生成的jfif格式的圖片

使用curl對其進行抓取後存入本地的圖片檔案中

再上傳到騰訊api進行圖文識別

這裡還有一處沒有辦法解決的問題:

生成的圖片文字是隨機的,所以可能會有多個文字重疊的情況,這樣的話,是無法識別出該文字的

所以當我們沒有匹配到相關文字時,再次爬取新的圖片再做驗證就可以了

匹配到相關文字位置資訊後傳送給後臺

返回的資料是0

所以應該有相關Cookie和Session進行記錄用來驗證資訊

之後我在登陸的時候就把相關Cookie儲存下來再發送就返回的是1了

該手機號通過了驗證就可以再次訪問該網站就可以得到相關資訊了

上程式碼:

<?php

namespace Home\Controller;
use Think\Controller;
use GuzzleHttp\Client;
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\RequestException;
class IndexController extends Controller
{
    protected $client;
    protected $JSESSIONID;
    protected $number;
    function _initialize()
    {
        Vendor('autoload');
        $this->client = new Client([
            'base_uri' => 'https://segmentfault.com',
            'headers' => [
                'User-Agent'      => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36',
                'Host'            => 'www.biaojiquxiao.com',
            ],
            'cookies' => true,
            'http_errors' => true,
        ]);
    }

    function index(){
        $this->number = I('get.number');

        $cookie = $this->checkCode();  //訪問www.**.com/checkCode獲取cookie
        $this->JSESSIONID =$cookie['Value'];

        while (true){
            $img = $this->getImg();         //www.**.com/code 獲取圖片
            $data = $this->getPoint($img);  //獲取座標
            if($data){                      //獲取成功跳出
                break;
            }
        }
        $data['number'] = $this->number;

        if($this->checkCodeExc($cookie,$data)=='1'){    //圖片驗證成功 1  失敗 0
            if($this->query($cookie)){
                $res= $this->status($cookie);
                $code = 0;
                while (true){
                    if(json_decode($res)->status != '0'){   //返回結果為空 重新獲取
                        $code += 1;
                        break;
                    }else{
                        $res= $this->status($cookie);
                        continue;
                    }
                }
            }
        } else{
            $res = "圖片驗證失敗";
            $code = 0;
        }

        $message = '傳送的資料x,y,number 分別是'.implode(',',$data);
        $result = array(       //最終獲取的結果
            'code'    => $code ,
            'message' => $message ,
            'data' => $res
        );
        exit(json_encode($result, JSON_UNESCAPED_UNICODE));

    }

    function query($cookie){
        $curl = curl_init();
        curl_setopt_array($curl, array(
            CURLOPT_URL => "https://www.biaojiquxiao.com/query/".$this->number,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_CUSTOMREQUEST => "GET",
            CURLOPT_HTTPHEADER => array(
                "Content-Type: application/x-www-form-urlencoded",
                "Cookie: JSESSIONID=".$cookie,
                "cache-control: no-cache"
            ),
        ));
        $response = curl_exec($curl);
        $err = curl_error($curl);
        curl_close($curl);
        if ($err) {
            return "cURL Error #:" . $err;
        } else {
            //echo $response;
            return true;
        }
    }
    function status($cookie){
        $curl = curl_init();
        curl_setopt_array($curl, array(
            CURLOPT_URL => "https://www.biaojiquxiao.com/status/".$this->number,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_CUSTOMREQUEST => "GET",
            CURLOPT_HTTPHEADER => array(
                "Content-Type: application/x-www-form-urlencoded",
                "Cookie: JSESSIONID=".$cookie,
                "cache-control: no-cache"
            ),
        ));
        $response = curl_exec($curl);
        $err = curl_error($curl);
        curl_close($curl);
        if ($err) {
            return "cURL Error #:" . $err;
        } else {
            return (string) $response;
        }
    }
    function checkCodeExc($cookie,$data){
        $curl = curl_init();
        curl_setopt_array($curl, array(
            CURLOPT_URL => "https://www.biaojiquxiao.com/checkCodeExc",
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_CUSTOMREQUEST => "POST",
            CURLOPT_POSTFIELDS => "x=".$data['x']."&y=".$data['y']."&number=".$data['number'],
            CURLOPT_HTTPHEADER => array(
                "Content-Type: application/x-www-form-urlencoded",
                "Cookie: JSESSIONID=".$cookie,
                "Host: www.biaojiquxiao.com",
                "Origin: https://www.biaojiquxiao.com",
                "Referer: https://www.biaojiquxiao.com/checkCode/17633465362",
                "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36",
                "cache-control: no-cache"
            ),
        ));
        $response = curl_exec($curl);
        $err = curl_error($curl);
        curl_close($curl);
        if ($err) {
            return "cURL Error #:" . $err;
        } else {
            // var_dump($response);
            return $response;
        }
    }

    function checkCode(){
        $url ='https://www.biaojiquxiao.com/checkCode/'.$this->number;
        $res = $this->client->request('GET',$url);
        $this->connTime = time();
        $config = $this->client->getConfig();
        $cookie = $config['cookies']->toArray()[0];
        return $cookie['Value'];
    }
    public function getImg()
    {
        $url = "https://www.biaojiquxiao.com/code";
        $path = constant('PUBLIC') . 'images/tempImg';
        if (!is_dir($path)) {
            mkdir('./' . $path, 0777, true);
        }
        $imgName = substr(str_shuffle('abcdefghijkmnpqrstwxyz23456789'), rand(0,15), rand(3,10)) . '.jpg';
        $path = $path . '/' . $imgName;
        $response = $this->client->get($url,
            array(
                'headers' => [
                    'Referer' => 'https://www.biaojiquxiao.com/checkCode/'.$this->number,
                    'Host'    => 'www.biaojiquxiao.com',
                    'Accept'  => 'image/webp,image/apng,image/*,*/*;q=0.8',
                    'Accept-Encoding' => ' gzip, deflate, br',
                    'Cookie'          => "JSESSIONID=".$this->JSESSIONID,
                ],
            )
        );
        $content = $response->getBody().stream;
        $myfile = fopen($path, "w");
        fwrite($myfile, $content);
        fclose($myfile);
        if (file_exists($path)) {
            return $path;
        } else {
            return false;
        }
    }



    function getPoint($path){
        //$path = constant('PUBLIC').'images/test/code.png';
        $img = base64_encode(file_get_contents($path));
        //sleep(1);
        $data = array(
            'appid' => "",
            'bucket' => '',
            'image' => $img
        );
        $content = json_encode($data);
        $url = 'http://recognition.image.myqcloud.com/ocr/handwriting';
        $authorization = $this->getAuthorization();
        $response = $this->posturl($url, $content, $authorization);
        if ($response['code'] != 0) {    //騰訊api請求失敗
            // var_dump($response['message']);
            return $response['message'];
        } else {
            $result = array();
            if($response['data']['items'][0]['words'][4]['confidence'] < 0.5){
                $result = null;
            }else{
                $aimWord = $response['data']['items'][0]['words'][4]['character'];
                for ($i = 1; $i <= 4; $i++) {
                    if ($response['data']['items'][$i]['itemstring'] == $aimWord) {
                        $result['x'] = $response['data']['items'][$i]['itemcoord']['x']+15;
                        $result['y'] = $response['data']['items'][$i]['itemcoord']['y']+15;
                        unset($response);
                        break;
                    }
                }
            }
            if (file_exists($path)){
                unlink($path);        //刪除臨時圖片
            }
            if(empty($result)){
                return false;
            }else{
                return $result;
            }
        }
    }



    //有效簽名串
    function getAuthorization()
    {
        $path = constant('PUBLIC') . 'temp/authorization.xml';
        $xml = simplexml_load_file($path);
        //如果簽名沒有過期直接返回
        if ($xml->overtime < time()) {
            return $xml->content;
        }
        $appid = "";
        $bucket = "";
        $secret_id = "";
        $secret_key = "";
        $expired = time() + 2592000;
        $current = time();
        $rdm = rand();
        $srcStr = 'a=' . $appid . '&b=' . $bucket . '&k=' . $secret_id . '&e=' . $expired . '&t=' . $current . '&r=' . $rdm . '&f=';
        $signStr = base64_encode(hash_hmac('SHA1', $srcStr, $secret_key, true) . $srcStr);
        //儲存簽名
        $overtime = $expired - 2000;
        $str = "<?xml version=\"1.0\" encoding=\"utf8\"?><authorization><overtime>$overtime</overtime><content>$signStr</content></authorization>";
        file_put_contents($path, $str);
        return $signStr;
    }

    //呼叫騰訊介面 返回陣列 $data json
    function posturl($url, $data, $authorization)
    {
        $headerArray = array("Content-type:application/json;charset='utf-8'", "Accept:application/json", " Host:recognition.image.myqcloud.com", "Authorization:" . $authorization);
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
        curl_setopt($curl, CURLOPT_POST, 1);
        curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
        curl_setopt($curl, CURLOPT_HTTPHEADER, $headerArray);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        $output = curl_exec($curl);
        curl_close($curl);
        return json_decode($output, true);
    }
}