人生第一個java指令碼-jsoup例項
阿新 • • 發佈:2018-12-05
目的:
獲取如下資訊;
製作流程圖
該方法缺點,會獲取到多個重複貨號。
解決:匯出成excel表格-》選擇 貨號 列 -》刪除重複值
程式碼結構如下:
ToMain.java
package com.lnthz.main; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import org.jsoup.Connection;import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.lnthz.cookie.CookieUtil; import com.lnthz.jdbc.JdbcMain; import com.lnthz.pojo.ItemCas; import com.lnthz.pojo.TargetData; import com.lnthz.pojo.XDocDataPojo;/** * @Desc 主類 * @author lnthz * @param * */ public class ToMain { public static void main(String[] args) throws Exception{ ToMain.JueDDZ(441, 1000); //這兩個引數是為了方便除錯,有少量目標網頁規則不一樣,也可以用作開執行緒 } //此方法為了找到 每個貨號對應的絕對地址 public static voidJueDDZ(int aa,int bb) throws Exception{ ItemCas itemCas=new ItemCas(); int HH=100001; String aUrl="https://www.xfnano.com/Product/?1=1&key="; //找到規則迴圈貨期地址 for (int i = aa; i<bb; ++i) { //空Url String nullUrl="https://www.xfnano.com/Product/comment.aspx?fk=0&kind=0&width=520&height=350&TB_iniframe=true&KeepThis=true&TB_iframe=true&modal=false"; int aHH=HH+i; //拼接字串 String bUrl=aUrl+aHH; //得到整個目標頁面原始碼 Document doc = Jsoup.connect(bUrl).get(); //得到貨號所在的 div Element clasDoc=doc.select("div.pro_list_container").first(); //得到貨號地址 Elements links = clasDoc.select("a[href]"); String casName = clasDoc.select("a[href]").text(); //得到絕對地址 刪選出來空地址 String absHref = links.attr("abs:href");// if(absHref.equals(nullUrl)){ continue; }else{ /*ToMain.JueDDZ(absHref);*/ /* System.out.println(absHref);*/ System.out.println("當前i值:"+i+"當前地址:"+absHref); itemCas.setItem(aHH); itemCas.setCasName(casName); itemCas.setMaincasurl(absHref); JdbcMain.addItemCas(itemCas); ToMain.xTableData(absHref); ToMain.xDocData(aHH,absHref); } } System.out.println("最後"); JdbcMain.jdbcClose(); } /** * 此方法用於獲取貨號對應的詳細介紹 * @param absHref */ private static void xDocData(int aHH,String absHref) throws Exception{ // TODO Auto-generated method stub XDocDataPojo xd=new XDocDataPojo(); String url=absHref; Connection conn=Jsoup.connect(url); conn.cookies(CookieUtil.getCookies()); Document doc_x=conn.get(); // Element doc_d=doc_x.getElementById("conn"); // System.out.println(doc_d.val()); // if (doc_d.val() != null) { // String x2doc=doc_d.select("div.other_r div.pro_detail").html(); // xd.setItem(aHH); // xd.setXdoc(x2doc); // JdbcMain.addXDocDataPojo(xd); // } else { Elements x1doc=doc_x.select("div.pro_contbox"); Elements x2doc=x1doc.select("div.other_r"); String x3doc=x2doc.select("div.pro_detail").html(); // System.out.println(""+x1doc); xd.setItem(aHH); xd.setXdoc(x3doc); JdbcMain.addXDocDataPojo(xd); // } } /** * 此方法用於獲取表格詳細資料 * @author lnthz * @param absHref */ private static void xTableData(String absHref) throws Exception{ // TODO Auto-generated method stub JdbcMain td=new JdbcMain(); List list = new ArrayList(); String url=absHref; Connection conn=Jsoup.connect(url); conn.cookies(CookieUtil.getCookies()); Document doc_t=conn.get(); Elements doc_table=doc_t.select("div.pro_contbox div.tablelist"); // 使用選擇器選擇該table內所有的<tr> <tr/> Elements trs = doc_table.select("tr"); /*System.out.println(trs);*/ //遍歷表格 //i=0,帶第一行標題; i=1 不帶第一行標題 for (int i = 1; i < trs.size(); ++i) { // 獲取一個tr Element tr = trs.get(i); // 獲取該行的所有td節點 Elements tds = tr.select("td"); //遍歷td資料 HashMap<Integer,String> map=new HashMap<Integer,String>(); for(int j=0; j<tds.size(); j++){ Element[] array=new Element[16]; array[j]= tds.get(j); map.put(j, array[j].text()); } list.add(map); /* System.out.println("-----------------"); */ } td.insertCas(list); } }
JdbcMain.java
package com.lnthz.jdbc; import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Iterator; import java.util.List; import java.util.Map; import com.lnthz.pojo.ItemCas; import com.lnthz.pojo.XDocDataPojo; public class JdbcMain { public static final String URL = "jdbc:mysql://localhost:3307/webCas?useUnicode=true&characterEncoding=utf8"; public static final String USER = "root"; public static final String PASSWORD = "123456"; private static Connection conn = null; static{ try { //1.載入驅動程式 Class.forName("com.mysql.jdbc.Driver"); //2. 獲得資料庫連線 conn = DriverManager.getConnection(URL, USER, PASSWORD); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } public static void jdbcClose(){ try { System.out.println("資料庫已關閉(* ̄︶ ̄)"); conn.close(); } catch (SQLException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static Connection getConnection(){ return conn; } /** * 此方法用於增加表格詳情 * @param list * @throws Exception */ public static void insertCas(List list)throws Exception{ Map map=null; Connection conn = JdbcMain.getConnection(); String str="insert into TargetData(id,itemnumber,casnumber,packnumber,parameter,instock,period,price,shu) values(?,?,?,?,?,?,?,?,?)"; PreparedStatement pstat = conn.prepareStatement(str); for(int h =0;h<list.size();h++){ map = (Map)list.get(h); Iterator<Map.Entry<Integer, String>> entries = map.entrySet().iterator(); while (entries.hasNext()) { Map.Entry<Integer, String> entry = entries.next(); int a=entry.getKey()+1; pstat.setString(a,entry.getValue()); } pstat.executeUpdate(); } System.out.println("TargetData插入成功(* ̄︶ ̄)"); } /** * 用於ItemCas資料表增加 * * @param i */ public static void addItemCas(ItemCas i) { // TODO Auto-generated method stub Connection conn = JdbcMain.getConnection(); String sql="insert into ItemCas(item,casName,maincasurl) values (?,?,?)"; PreparedStatement ptmt; try { ptmt = conn.prepareStatement(sql); ptmt.setInt(1,i.getItem()); ptmt.setString(2, i.getCasName()); ptmt.setNString(3, i.getMaincasurl()); System.out.println("ItemCas插入成功(* ̄︶ ̄)"); ptmt.executeUpdate(); } catch (SQLException e) { e.printStackTrace(); }finally { } } public static void addXDocDataPojo(XDocDataPojo xd) throws SQLException{ Connection conn=JdbcMain.getConnection(); PreparedStatement ptmt=null; String sql="insert into XDocDataPojo(item,xdoc) values(?,?)"; ptmt=conn.prepareStatement(sql); ptmt.setInt(1, xd.getItem()); ptmt.setString(2, xd.getXdoc()); System.out.println("XDocDataPojo插入成功(* ̄︶ ̄)"); ptmt.executeUpdate(); } }
ItemCas.java
package com.lnthz.pojo; public class ItemCas { public int item; public String casName; public String maincasurl; public String getCasName() { return casName; } public void setCasName(String casName) { this.casName = casName; } public int getItem() { return item; } public void setItem(int item) { this.item = item; } public String getMaincasurl() { return maincasurl; } public void setMaincasurl(String maincasurl) { this.maincasurl = maincasurl; } }
XDocDataPojo.java
package com.lnthz.pojo; public class XDocDataPojo { public int item; public String xdoc; public int getItem() { return item; } public void setItem(int aHH) { this.item = aHH; } public String getXdoc() { return xdoc; } public void setXdoc(String xdoc) { this.xdoc = xdoc; } }
CookieUtil.java
package com.lnthz.cookie; import java.util.HashMap; public class CookieUtil { static HashMap cookies; static{ HashMap cookie=new HashMap(); //目標網站需要登入,cookie表自行解決,put引數就行 cookie.put("Hm_lvt_d4e9a2b5f76697fc95880ee989b6b944", "1543460799,1543894953,1543987988,1543992054"); cookie.put("LXB_REFER", "www.baidu.com"); } public static HashMap getCookies(){ return cookies; } }