採集金山詞霸每日一句一言Api
阿新 • • 發佈:2018-12-16
Api官方介面
每日:http://open.iciba.com/dsapi/
查指定時間:http://sentence.iciba.com/index.php?c=dailysentence&m=getdetail&title=2018-11-06&_=1541655200812
楊小杰Apis:https://wiki.yum6.cn/docs/apis/wiki
搭建採集介面
<?php header("Content-type: text/html; charset=utf-8"); //設定編碼 utf-8 $t1 = microtime(true); $utime = date("Y-m-d");//api的尾綴時間 $translation = '0';//翻譯語句,0不採集,1採集 $content = '1';//英語版,0不採集,1採集 //使用curl提高執行速度 不用動 function httpGet($url) { $curl = curl_init(); $httpheader[] = "Accept:*/*"; $httpheader[] = "Accept-Language:zh-CN,zh;q=0.8"; $httpheader[] = "Connection:close"; curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" ); curl_setopt($curl, CURLOPT_HTTPHEADER, $httpheader); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_TIMEOUT, 3); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($curl, CURLOPT_URL, $url); $res = curl_exec($curl); curl_close($curl); return $res; } $myfile = fopen("iciba.txt", "a+");//建立檔案儲存抓取的句子 //迴圈次數 2018-7-31 至現在日期相差的天數 for ($i=1; $i<100; $i++) { $json_string =httpGet('http://sentence.iciba.com/index.php?c=dailysentence&m=getdetail&title='.$utime.'&_='.time());//curl 自定義函式訪問api $data= json_decode($json_string,true);//解析json 轉為php if (isset($data['note'])) { $text1= $data['note']."\n"; fwrite($myfile, $text1); } if (isset($data['translation'])&&$translation==1) { $text2= str_replace('小編的話:', '', $data['translation'])."\n"; fwrite($myfile, $text2); } if (isset($data['content'])&&$content==1) { $text3= $data['content']."\n"; fwrite($myfile, $text3); } $utime= date("Y-m-d",strtotime("-".strval($i)." day")); //每迴圈一次 當前日期減去迴圈變數 } fclose($myfile); $t2 = microtime(true); echo 'ok,耗時'.round($t2-$t1,3).'秒'; ?>
執行這個介面能採集到之前100天的。
一言介面
<?php //獲取句子檔案的絕對路徑 //如果你介意別人可能會拖走這個文字,可以把檔名自定義一下,或者通過Nginx禁止拉取也行。 $path = dirname(__FILE__); $file = file($path."/iciba.txt"); //隨機讀取一行 $arr = mt_rand( 0, count( $file ) - 1 ); $content = trim($file[$arr]); //編碼判斷,用於輸出相應的響應頭部編碼 if (isset($_GET['charset']) && !empty($_GET['charset'])) { $charset = $_GET['charset']; if (strcasecmp($charset,"gbk") == 0 ) { $content = mb_convert_encoding($content,'gbk', 'utf-8'); } } else { $charset = 'utf-8'; } //格式化判斷,輸出js或純文字 if (isset($_GET['encode'])&&$_GET['encode'] === 'js') { header('Content-type: text/javascript;charset=utf-8'); echo "function iciba(){document.write('" . $content ."');}"; } else { echo $content; }
每日採集介面
<?php header("Content-type: text/html; charset=utf-8"); //設定編碼 utf-8 $utime = date("Y-m-d"); $file_data = 'data.txt'; if(!file_exists($file_data)){ fopen($file_data, "w"); } $str = file_get_contents('data.txt'); $d=date('Y/m/d H:i',strtotime($str)); $translation = '0';//翻譯語句,0不採集,1採集 $content = '1';//英語版,0不採集,1採集 //請更改監控key 預設iciba if($_GET['p']==='iciba'){ //判斷今天是否已爬 if(strtotime($utime)>strtotime($d)){ //爬蟲開始 //使用curl提高執行速度 不用動 function httpGet($url) { $curl = curl_init(); $httpheader[] = "Accept:*/*"; $httpheader[] = "Accept-Language:zh-CN,zh;q=0.8"; $httpheader[] = "Connection:close"; curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" ); curl_setopt($curl, CURLOPT_HTTPHEADER, $httpheader); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_TIMEOUT, 3); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($curl, CURLOPT_URL, $url); $res = curl_exec($curl); curl_close($curl); return $res; } //定義寫入函式 function myfile($txt){ $myfile = fopen("iciba.txt", "a+"); fwrite($myfile,$txt); fclose($myfile); } $json_string =httpGet('http://open.iciba.com/dsapi/');//curl 自定義函式訪問api $data= json_decode($json_string,true);//解析json 轉為php //2018-4-11之前只有一條資料 so 加判斷 if (isset($data['note'])) { $text1= $data['note']."\n"; myfile($text1); } if (isset($data['translation'])&&$translation==1) { $text2= str_replace('小編的話:', '', $data['translation'])."\n"; myfile($text2); } if (isset($data['content'])&&$content==1) { $text3= $data['content']."\n"; myfile($text3); } $myfile = fopen("data.txt", "w"); fwrite($myfile,$utime); fclose($myfile); echo "ok"; //爬蟲結束 }else{ echo "已爬"; } }else echo "老鐵 搞事情嗎"; ?>
之後可以把這個每日採集介面使用寶塔計劃監控為每日採集即可。
ps:多的圖文介紹也就不多說了,檢視採集毒雞湯的文章即可。