1. 程式人生 > >爬蟲:php實現 百度首頁書籤的獲取,以及百度首頁書籤的遷移

爬蟲:php實現 百度首頁書籤的獲取,以及百度首頁書籤的遷移

1,獲取舊賬戶的cookie,token等資料,從瀏覽器審查元素即可

點選新增分類 即可獲取到cookie token等資料

2,利用正則獲取書籤資訊資料(獲取舊賬戶的書籤資訊,併為新賬戶建立書籤分類,並存儲分類id,curl:為自己實現的類)

public function actionCreatedir(){
        $redis=Yii::$app->redis;
        $curl=new curl();
        $curl->setOption(CURLOPT_COOKIE,'BxxxxxxxxxxxxxxxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJcur1uXLq9bW; BD_HOME=1; H_PS_PSSID=1423_21088_18559_26350; BD_UPN=12314353');//舊賬戶的cookie
        $curl->setOption(CURLOPT_REFERER,'https://www.baidu.com/');
        $url='http://www.baidu.com/';
        $curl->close=false;
        $curl->setUrl($url);
        $data=$curl->get();

        $pattern="/<span class=name-text.*>((?!<).*)<\/span>.*(<div.*dir-content.*del-dir)/isU";
        preg_match_all($pattern,$data,$matchs);
        $result=[];
        $pattern="/<a.*title=\"(.*)\".*href=\"(.*)\"/isU";
        $create_cate_url='https://www.baidu.com/home/subscribe/submit/manoperation';

        $newcookie=<<<cookie
BAIDUID=F6xxxxxxxxxxxxxxxxxx
cookie;//新賬戶的cookie

        $curl->setOption(CURLOPT_COOKIE,$newcookie);
        $curl->setUrl($create_cate_url);
        foreach ($matchs[2] as $key=>$match) {
            $need=[];
            preg_match_all($pattern,$match,$item);
            foreach ($item[1] as $i_key=>$val){
                $need[]=$item[1][$i_key].";".$item[2][$i_key];
            }
            $result[$matchs[1][$key]]=$need;
        }
        $error=[];
        foreach ($result as  $key=>&$item) {
            $curl->setOption(CURLOPT_COOKIE,$newcookie);
            $curl->setUrl($create_cate_url);
            $data=[
                'cmd'=>'add_dir',
                'dirName'=>$key,
                'tabid'=>1,
                'indextype'=>'manht',
                'bsToken'=>'53887830be8b71f61233282aaff9a7d4bfb',
                '_req_seqid'=>'0xc9e6ddb612300040933',
                'sid'=>'1433_21079123_26350_20928',
            ];
            $return=$curl->post($data,'build');
            $return=json_decode($return,true);
            if($return['errNo']!=0){
                $error[$key]=$item;
                $redis->set('dir_error',json_encode($error,320));
            }else{
                $dir_id=$return['data']['dirId'];
                $item['dir_id']=$dir_id;
            }
        }
        $redis->set('dir_create',json_encode($result,320));
    }

3,建立具體的書籤(上一步包含,書籤目錄,此時遍歷建立即可)


    public  function actionCreateitem()
    {
        $data=Yii::$app->request->post();
        extract($data);
        $redis = Yii::$app->redis;
        $bookmark = json_decode($redis->get('dir_create'), true);
//        var_dump($bookmark);die();
        $cookie=$cookie??<<<cookie
BAIDUID=F6D138xxxxxxxxxxxxxS_PSSID=1433_21079_26350_20928; BD_UPN=12314353
cookie;//新賬戶的cookie

        $curl=new curl();
        $curl->setOption(CURLOPT_COOKIE,$cookie);
        $curl->close=false;

        foreach ($bookmark as $item) {
            if (isset($item['dir_id'])) {
                foreach ($item as $key => $i) {
                    if (is_numeric($key)) {
                        $i_array = explode(';', $i);
                        $name = $i_array[0];
                        $url = $i_array[1];
                        $data['cmd'] = 'add';
                        $data['from'] = 'u_layer';
                        $data['name'] = $name;
                        $data['url'] = $url;
                        $data['customDirId'] = $item['dir_id'];
                        $data['tabid']='1';
                        $data['indextype']='manht';
                        $data['_req_seqid'] = $qid??'0x9d28b81xxxxxxxxx00011e6a';
                        $data['bsToken']=$token??'53887830bxxxxxxxxx282aaff9a7d4bfb';
                        $data['sid'] = $sid??'1433_xxxxxxx0_20928';
                        self::create_item($data, $curl);
                    }
                }
            }
        }
    }

    public static function create_item($data,curl $curl){
        $url='https://www.baidu.com/home/subscribe/submit/manoperation';
        $curl->setUrl($url);
        $result=$curl->post($data,'build');

    }

效果圖:

舊賬戶的書籤圖

舊賬戶圖片

新賬戶書籤圖:(未分類的書籤沒有獲取到資料)

新賬戶效果圖