php從爬蟲爬取的txt檔案按行讀取並寫入儲存到excel,csv中
阿新 • • 發佈:2019-01-24
需求:採集攜程網酒店資訊
步驟:
- 使用火車頭編寫採集規則並儲存到txt檔案
- 編寫php指令碼讀取txt檔案並按照規則儲存到csv
程式碼:
<?php /** * 讀取txt檔案,存入csv表格中 */ $file_name = 'hotel_2018-3-8.txt'; $file = fopen($file_name,"r"); $data = []; while(! feof($file)) { $str = fgets($file); $arr_line = filter_my($str); $data[] = $arr_line; } fclose($file); put_csv_my($data); echo 'OK'; /** * 字串過濾 * @param [type] $str [description] * @return [type] [description] */ function filter_my($str){ //這裡寫你自己的邏輯 $str = str_replace(' ', '', $str); $str = str_replace('!!', '', $str); $str = str_replace('[]', '', $str); $str = str_replace(' ', '', $str); $tmp_arr = explode(',,', $str); return $tmp_arr; } /** * 寫入csv * @param [type] $dataList [description] * @return [type] [description] */ function put_csv_my($dataList){ //這裡寫你自己的邏輯 $fp = fopen('hotel_2018-3-8.csv', 'w'); fputcsv($fp,array(iconv("UTF-8", "GB2312//IGNORE",'酒店名稱'),iconv("UTF-8", "GB2312//IGNORE",'地址'), iconv("UTF-8", "GB2312//IGNORE",'房間數'),iconv("UTF-8", "GB2312//IGNORE",'開業時間'),iconv("UTF-8", "GB2312//IGNORE",'聯絡電話'))); foreach ($dataList as $data) { $name = iconv("UTF-8", "GB2312//IGNORE",isset($data[0]) ? $data[0] : ''); $addr = iconv("UTF-8", "GB2312//IGNORE",isset($data[1]) ? $data[1] : ''); $house = iconv("UTF-8", "GB2312//IGNORE",isset($data[2]) ? $data[2] : ''); $open = iconv("UTF-8", "GB2312//IGNORE",isset($data[3]) ? $data[3] : ''); $tel = iconv("UTF-8", "GB2312//IGNORE",isset($data[4]) ? $data[4] : ''); fputcsv($fp,array($name,$addr,$house,$open,$tel)); //fputcsv可以用陣列迴圈的方式進行實現 } fclose($fp); }