PHP 匹配下載網路資源
阿新 • • 發佈:2018-12-19
<?php /** * 匹配下載網路資源 */ header("Content-type: text/html;charset=utf-8"); error_reporting(E_ALL ^E_NOTICE^E_WARNING); class DownloadFileFromWebsite{ private $img_ext_arr=array('WEBP','BMP','JPG','GIF','JPEG','PSD','EPS','PNG','RAW','EMF','ICO'); public $file_dir; public $matches_x; function __construct($web_url,$root_dir){ if(empty($web_url) || empty($root_dir)){ exit('引數錯誤!'); } $this->file_dir=$this->creatDirByWebUrl($web_url,$root_dir); $this->matches_x=$this->pregMatchUrl($web_url); } //通過地址獲取檔案內容 protected function getContentByUrl($url,$param_arr) { $param_sring=http_build_query($param_arr); return file_get_contents($url.$param_sring); } //下載檔案 protected function dowmload_file($file_url, $save_to,$logDir,$encodeName) { $file_name=basename($file_url); $content = file_get_contents($file_url); $result=file_put_contents($save_to, $content); if($result){ $status='下載成功!'; $state=''; }else{ unlink('./leikesasi_img/img/'.$file_name);//刪除空的錯誤檔案 $status='下載失敗!!!'; $state='[✘]'; } //根據系統進行配置 $encode = stristr(PHP_OS, 'WIN') ? 'GBK' : 'UTF-8'; $arr_name_ext=explode('.',basename($save_to)); $filename=iconv($encode,'UTF-8',$encodeName); $ext=$arr_name_ext[1]; $log_record=$state.'圖片檔案:'.$file_name.' ------ ['.$filename.'.'.$ext.'] ------ '.date('Y-m-d H:i:s',time()).' ------ 大小:'.round($result/1024,2).'kb ------'.$status.PHP_EOL; echo $log_record.'<br>'; file_put_contents($logDir,$log_record,FILE_APPEND); } //根據訪問地址建立資料夾目錄 private function creatDirByWebUrl($url,$root_dir){ preg_match_all('/([http|https]*):\/\/*(.*?\/.*)/',$url,$match_web_url); $dir_path=str_replace('/','_',$match_web_url[2][0]); $this->file_dir=$root_dir.'/'.$match_web_url[1][0]."_".$dir_path; if(!is_dir($this->file_dir.'/img')){ $staue=mkdir($this->file_dir.'/img',0777,true); if(!$staue){ echo $dir_path.'目錄建立失敗!<br/>'; return false; } } return $this->file_dir; } //根據設定規則,匹配要下載的資源 public function pregMatchUrl($url){ $param_arr=array(); $string_html=$this->getContentByUrl($url,$param_arr); preg_match_all('/<img src=["|\']([http|https].*)["|\'] alt=["|\'](.*)["|\'].*\/*?>/U',$string_html,$matches_x); return $matches_x; } //處理檔案字尾,並下載資源 public function renameDownloadFiles(){ foreach($this->matches_x[1] as $k=>$v){ $arr=explode('.',basename($v)); $ext=strtoupper(end($arr)); //根據系統進行配置 $encode = stristr(PHP_OS, 'WIN') ? 'GBK' : 'UTF-8'; $this->matches_x[2][$k] = iconv('UTF-8', $encode, $this->matches_x[2][$k]); if(in_array($ext,$this->img_ext_arr)){ $this->dowmload_file($v,$this->file_dir.'/img/'.$this->matches_x[2][$k].'.'.strtolower($ext),$this->file_dir.'/log.txt',$this->matches_x[2][$k]); }else{ //處理特殊字尾,排除網站字尾干擾 如 jpg!|
[email protected] $str_ext='['.join('|',$this->img_ext_arr).']{1,}'; preg_match_all('/'.$str_ext.'/',strtoupper($ext),$match_all); if($match_all){ $ext_new=$match_all[0][0]; } $this->dowmload_file($v,$this->file_dir.'/img/'.$this->matches_x[2][$k].'.'.strtolower($ext_new),$this->file_dir.'/log.txt',$this->matches_x[2][$k]); } } } } //$web_url='http://www.duok******.com/'; $pageCount=20; for($a=1;$a<=$pageCount;$a++){ $web_url='http://www.duok*****.com/list/1-'.$a; $root_dir='./DuoKan_DownloadFile'; $obj=new DownloadFileFromWebsite($web_url,$root_dir); $obj->renameDownloadFiles($matches_x,$img_ext_arr,$file_dir); }