PHP-文章簡單採集
阿新 • • 發佈:2019-01-05
以下是在wamp環境下PHP利用檔案操作,獲取url,達到文章採集效果
<?php
//文章採集方法一
/* $res = fopen("http://www.huanqiu.com/","r");
$data ='';
while($strcon = fgets($res)){
$data .= $strcon;
}
fclose($res);*/
//文章採集方法二
$data = file_get_contents("http://www.huanqiu.com/");
$data1 = file_get_contents("http://china.huanqiu.com/article/2017-02/10136020.html" );
$data2 = file_get_contents("http://opinion.huanqiu.com/editorial/2017-02/10135124.html");
$data3 = file_get_contents("http://world.huanqiu.com/exclusive/2017-02/10137395.html");
//標題正則
$div_preg = '/<div id="block_id_42876" class="admin_block" blockid="42876">[\s\S]+?<\/div>/';
$title_preg = '/<h4>[\s\S]+?<\/h4>/';
//內容正則
$con_preg = '/<div class="conText">[\s\S]+?<!-- 左側 end -->/';
//得到返回的陣列資料
preg_match($div_preg,$data,$arr);
preg_match($con_preg,$data1,$arr1);
preg_match($con_preg,$data2,$arr2);
preg_match($con_preg,$data3,$arr3);
//匹配資料裡所有需要的標籤內容
preg_match_all($title_preg,$arr[0],$arrT);
?>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>文章採集</title>
<style type="text/css">
ul,li{
margin: 0;
padding: 0;
list-style:none;
}
.con{
width: 420px;
height: 460px;
float: left;
margin-right: 20px;
overflow: auto;
border: 2px solid #ccc;
border-radius: 10px;
box-shadow: 4px 5px 3px #aaa;
}
</style>
</head>
<body>
<ul>
<?php foreach ($arrT[0] as $key => $value):?>
<li><?php echo $value;?></li>
<?php endforeach;?>
</ul>
<ul>
<div class='con'><?php echo $arr1[0];?></div>
<div class='con'><?php echo $arr2[0];?></div>
<div class='con'><?php echo $arr3[0];?></div>
</ul>
</body>
</html>