爬蟲:php實現 百度首頁書籤的獲取,以及百度首頁書籤的遷移
阿新 • • 發佈:2018-12-12
1,獲取舊賬戶的cookie,token等資料,從瀏覽器審查元素即可
點選新增分類 即可獲取到cookie token等資料
2,利用正則獲取書籤資訊資料(獲取舊賬戶的書籤資訊,併為新賬戶建立書籤分類,並存儲分類id,curl:為自己實現的類)
public function actionCreatedir(){ $redis=Yii::$app->redis; $curl=new curl(); $curl->setOption(CURLOPT_COOKIE,'BxxxxxxxxxxxxxxxAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJcur1uXLq9bW; BD_HOME=1; H_PS_PSSID=1423_21088_18559_26350; BD_UPN=12314353');//舊賬戶的cookie $curl->setOption(CURLOPT_REFERER,'https://www.baidu.com/'); $url='http://www.baidu.com/'; $curl->close=false; $curl->setUrl($url); $data=$curl->get(); $pattern="/<span class=name-text.*>((?!<).*)<\/span>.*(<div.*dir-content.*del-dir)/isU"; preg_match_all($pattern,$data,$matchs); $result=[]; $pattern="/<a.*title=\"(.*)\".*href=\"(.*)\"/isU"; $create_cate_url='https://www.baidu.com/home/subscribe/submit/manoperation'; $newcookie=<<<cookie BAIDUID=F6xxxxxxxxxxxxxxxxxx cookie;//新賬戶的cookie $curl->setOption(CURLOPT_COOKIE,$newcookie); $curl->setUrl($create_cate_url); foreach ($matchs[2] as $key=>$match) { $need=[]; preg_match_all($pattern,$match,$item); foreach ($item[1] as $i_key=>$val){ $need[]=$item[1][$i_key].";".$item[2][$i_key]; } $result[$matchs[1][$key]]=$need; } $error=[]; foreach ($result as $key=>&$item) { $curl->setOption(CURLOPT_COOKIE,$newcookie); $curl->setUrl($create_cate_url); $data=[ 'cmd'=>'add_dir', 'dirName'=>$key, 'tabid'=>1, 'indextype'=>'manht', 'bsToken'=>'53887830be8b71f61233282aaff9a7d4bfb', '_req_seqid'=>'0xc9e6ddb612300040933', 'sid'=>'1433_21079123_26350_20928', ]; $return=$curl->post($data,'build'); $return=json_decode($return,true); if($return['errNo']!=0){ $error[$key]=$item; $redis->set('dir_error',json_encode($error,320)); }else{ $dir_id=$return['data']['dirId']; $item['dir_id']=$dir_id; } } $redis->set('dir_create',json_encode($result,320)); }
3,建立具體的書籤(上一步包含,書籤目錄,此時遍歷建立即可)
public function actionCreateitem() { $data=Yii::$app->request->post(); extract($data); $redis = Yii::$app->redis; $bookmark = json_decode($redis->get('dir_create'), true); // var_dump($bookmark);die(); $cookie=$cookie??<<<cookie BAIDUID=F6D138xxxxxxxxxxxxxS_PSSID=1433_21079_26350_20928; BD_UPN=12314353 cookie;//新賬戶的cookie $curl=new curl(); $curl->setOption(CURLOPT_COOKIE,$cookie); $curl->close=false; foreach ($bookmark as $item) { if (isset($item['dir_id'])) { foreach ($item as $key => $i) { if (is_numeric($key)) { $i_array = explode(';', $i); $name = $i_array[0]; $url = $i_array[1]; $data['cmd'] = 'add'; $data['from'] = 'u_layer'; $data['name'] = $name; $data['url'] = $url; $data['customDirId'] = $item['dir_id']; $data['tabid']='1'; $data['indextype']='manht'; $data['_req_seqid'] = $qid??'0x9d28b81xxxxxxxxx00011e6a'; $data['bsToken']=$token??'53887830bxxxxxxxxx282aaff9a7d4bfb'; $data['sid'] = $sid??'1433_xxxxxxx0_20928'; self::create_item($data, $curl); } } } } } public static function create_item($data,curl $curl){ $url='https://www.baidu.com/home/subscribe/submit/manoperation'; $curl->setUrl($url); $result=$curl->post($data,'build'); }
效果圖:
舊賬戶的書籤圖
新賬戶書籤圖:(未分類的書籤沒有獲取到資料)