php curl 抓頁面 imgs
阿新 • • 發佈:2022-04-10
宣告
本分享純屬為了技術分享,禁止商用!!!禁止商用!!!禁止商用!!!
未經本人允許,如有發現,違者必追!!!
呼叫
// img $url='http://xxx.cn/'; $re=curl_img($url); $re=json_decode($re,true); if ($re==null || count($re)==0) { echo '獲取錯誤'; exit; } array_splice($re,2); // 取2圖 echo curl_downimg($re[0],dirname(__FILE__).DIRECTORY_SEPARATOR.'imgs'.DIRECTORY_SEPARATOR); // 單檔案 // 多檔案遍歷 //foreach ($re as $v) { // echo curl_downimg($v,dirname(__FILE__).DIRECTORY_SEPARATOR.'imgs'.DIRECTORY_SEPARATOR,'GET','1'); //} // 批量 //curl_downimg_multi($re,dirname(__FILE__).DIRECTORY_SEPARATOR.'imgs'.DIRECTORY_SEPARATOR);
方法
// ============ imgs ============ function curl_img($url='') { $ch=curl_init(); $array=array( CURLOPT_URL => $url, CURLOPT_ENCODING => 'gzip,deflate', CURLOPT_SSL_VERIFYPEER => 0, CURLOPT_SSL_VERIFYHOST => 0, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_HTTPHEADER => array( 'pragma: no-cache', 'cache-control: no-cache', 'sec-ch-ua: " Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"', 'accept: application/json, text/plain, */*', 'content-type: application/json', 'sec-ch-ua-mobile: ?0', 'user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36', 'sec-ch-ua-platform: "Windows"', 'sec-fetch-site: same-origin', 'sec-fetch-mode: cors', 'sec-fetch-dest: empty', 'accept-language: zh-CN,zh;q=0.9' ) ); curl_setopt_array($ch,$array); $output=curl_exec($ch); if (curl_errno($ch)) { return curl_error($ch); } curl_close($ch); $r=preg_match_all('/data-src="(.*)"/',$output,$arr); // 規則自己根據實際情況定義 $items=$arr[1]; $url_main=parse_url($url)['scheme'].'://'.parse_url($url)['host']; array_walk($items,function(&$item) use ($url_main){ $item = substr($item,0,2)=='//'?substr($item,2):(substr($item,0,1)=='/'?$url_main.$item:$item); return $item; }); $items=array_filter($items); $items=array_unique($items); return json_encode($items,320); } /** * @param $url * @param $dir * @param $method * @param $type 命名規則 1=原始檔名 2=隨機命名,不會重複 * @return string */ function curl_downimg($url='',$dir='',$method='GET',$type='1') { if (!is_dir($dir)) { mkdir($dir,0777,true); } $ext=pathinfo($url)['extension']?pathinfo($url)['extension']:'jpg'; $file_path=$type=='1'?$dir.pathinfo($url)['filename'].'.'.$ext:$dir.sha1(md5(microtime(true).mt_rand(1,100000).mt_rand(1,100000))).'.'.$ext; $ch=curl_init(); $fp=fopen($file_path,'w'); $arr=array( CURLOPT_URL => $url, CURLOPT_CUSTOMREQUEST => strtoupper($method), // CURLOPT_PROGRESSFUNCTION => 'progressCallback', // CURLOPT_NOPROGRESS => 0, CURLOPT_HEADER => 0, CURLOPT_SSL_VERIFYPEER => 0, CURLOPT_SSL_VERIFYHOST => 0, CURLOPT_CONNECTTIMEOUT => 10, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_FILE => $fp, ); curl_setopt_array($ch,$arr); $output=curl_exec($ch); $size=filesize($file_path); $info=curl_getinfo($ch); if (curl_errno($ch)) { fclose($fp); unlink($file_path); return curl_error($ch); } elseif ($info['http_code'] != '200' || $size != $info['size_download']) { fclose($fp); unlink($file_path); return '資料不完整'; } return 'ok'; } /** * @param $arrs * @param $dir * @param $method * @param $type 命名規則 1=原始檔名 2=隨機命名,不會重複 * @return void */ function curl_downimg_multi($arrs=array(),$dir='',$method='GET',$type='1') { if (!is_dir($dir)) { mkdir($dir,0777,true); } $conn=array(); $file_path=array(); $fp=array(); $mh=curl_multi_init(); foreach ($arrs as $k=>$v) { $ext=pathinfo($v)['extension']?pathinfo($v)['extension']:'jpg'; $file_path[$k]=$type=='1'?$dir.pathinfo($v)['filename'].'.'.$ext:$dir.sha1(md5(microtime(true).mt_rand(1,100000).mt_rand(1,100000))).'.'.$ext; $conn[$k]=curl_init(); $fp[$k]=fopen($file_path[$k],'w'); $arr=array( CURLOPT_URL => $v, CURLOPT_CUSTOMREQUEST => strtoupper($method), // CURLOPT_PROGRESSFUNCTION => 'progressCallback', // CURLOPT_NOPROGRESS => 0, CURLOPT_HEADER => 0, CURLOPT_SSL_VERIFYPEER => 0, CURLOPT_SSL_VERIFYHOST => 0, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_CONNECTTIMEOUT => 60, CURLOPT_TIMEOUT => 60, CURLOPT_RETURNTRANSFER => 1, CURLOPT_FILE => $fp[$k], ); curl_setopt_array($conn[$k],$arr); curl_multi_add_handle($mh,$conn[$k]); } $active = null; do { curl_multi_exec($mh, $active); static $i=0; static $ok=0; while ($done=curl_multi_info_read($mh)) { if (curl_errno($done['handle'])) { curl_multi_remove_handle($mh,$done['handle']); curl_close($done['handle']); continue; } $info=curl_getinfo($done['handle']); $size=filesize($file_path[$i]); if ($info['http_code'] != '200') { fclose($fp[$i]); unlink($file_path[$i]); } curl_multi_remove_handle($mh,$done['handle']); curl_close($done['handle']); ++$i; ++$ok; } } while ($active > 0); echo 'ok: '.$ok; curl_multi_close($mh); }