原生態php通過dom獲取div/table裡面的內容,不用正則!
阿新 • • 發佈:2019-01-06
原生態php獲取網頁標籤裡面的內容,不用外掛!不用正則,直接一把摳出來!
error_reporting(E_ALL);
$out=_getUrl('http://www.gdczepb.gov.cn/detail/23328');
$out=preg_replace(array('/<head>([\s\S]+?)<\/head>/i','/<p>/i'),array('<head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head>','<br><p>' ),$out);
// echo $out;
$dom = new DOMDocument();
@$dom->loadHTML($out);
$xpath = new DOMXPath($dom);
$url=$xpath->query("//div[@class='contents']");
$str=$url->item(0)->nodeValue;
echo '<pre>';
print_r($str);
// print_r(htmlspecialchars($str));
echo '</pre>';
//以下是通過curl傳輸陣列引數獲取html內容!
function _getUrl($url){
$curl=curl_init();
$options=array(
CURLOPT_URL=>$url,
CURLOPT_RETURNTRANSFER =>1,
CURLOPT_USERAGENT=>'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' ,
CURLOPT_CONNECTTIMEOUT=>60
);
curl_setopt_array($curl,$options);
$out=curl_exec($curl);
curl_close($curl);
return $out;
}