1. 程式人生 > >jsoup解析檔案存入本地mongodb資料庫

jsoup解析檔案存入本地mongodb資料庫

package Tomongo;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.mongodb.BasicDBObject;
import com.mongodb
.DB; import com.mongodb.DBCollection; import com.mongodb.MongoClient; public class MonGoTest { public static void main(String[] args)throws IOException { try { //連結momgodb資料庫 MongoClient client = new MongoClient("127.0.0.1",27017); //查詢所有的資料庫名 @SuppressWarnings("deprecation"
) List<String> databaseNames = client.getDatabaseNames(); for(String name:databaseNames){ System.out.println(name); } //獲得一個數據庫連線 @SuppressWarnings("deprecation") DB db = client.getDB("baidushujuku"); //查詢該資料庫所有的集合名 Set
<String> collectionNames = db.getCollectionNames(); for(String name:collectionNames){ System.out.println(name); } DBCollection teacher_collection = db.getCollection("teacher3"); //迴圈讀取本地檔案 String filepath="F:/webmagic/zhidao.baidu.com"; File file=new File(filepath); if(file.isDirectory()){ String[] filelist=file.list(); for(int i=0;i<filelist.length;i++){ File input=new File(filepath+"\\"+filelist[i]); // File input = new File("F:/webmagic/zhidao.baidu.com/new.txt"); //單個文件輸入時候的測試用這一句 Document doc = Jsoup.parse(input,"UTF-8","http://zhidao.baidu.com/"); Elements one = doc.getElementsByClass("tableone"); String a=one.text();//map直接儲存不了Elements ,所以轉換為String Elements two = doc.getElementsByClass("tabletwo"); String b=two.text(); Elements three = doc.getElementsByClass("tablethree"); String c=three.text(); Elements four = doc.getElementsByClass("tablefour"); String d=four.text(); //使用Map物件 Map<String, Object> map1 =new HashMap<String, Object>(); map1.put("one", a); map1.put("two", b); map1.put("three", c); map1.put("four", d); teacher_collection.insert(new BasicDBObject(map1)); System.out.println("完成!"); } } } catch (Exception e) { e.printStackTrace(); } } }