1. 程式人生 > >人生第一個java指令碼-jsoup例項

人生第一個java指令碼-jsoup例項

目的:

  獲取如下資訊;

   

 

製作流程圖

 

 

 該方法缺點,會獲取到多個重複貨號。

解決:匯出成excel表格-》選擇 貨號 列 -》刪除重複值

程式碼結構如下:

ToMain.java

 

 
  
  
package com.lnthz.main;



import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.jsoup.Connection;
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.lnthz.cookie.CookieUtil; import com.lnthz.jdbc.JdbcMain; import com.lnthz.pojo.ItemCas; import com.lnthz.pojo.TargetData; import com.lnthz.pojo.XDocDataPojo;
/** * @Desc 主類 * @author lnthz * @param * */ public class ToMain { public static void main(String[] args) throws Exception{ ToMain.JueDDZ(441, 1000); //這兩個引數是為了方便除錯,有少量目標網頁規則不一樣,也可以用作開執行緒 } //此方法為了找到 每個貨號對應的絕對地址 public static void
JueDDZ(int aa,int bb) throws Exception{ ItemCas itemCas=new ItemCas(); int HH=100001; String aUrl="https://www.xfnano.com/Product/?1=1&key="; //找到規則迴圈貨期地址 for (int i = aa; i<bb; ++i) { //空Url String nullUrl="https://www.xfnano.com/Product/comment.aspx?fk=0&kind=0&width=520&height=350&TB_iniframe=true&KeepThis=true&TB_iframe=true&modal=false"; int aHH=HH+i; //拼接字串 String bUrl=aUrl+aHH; //得到整個目標頁面原始碼 Document doc = Jsoup.connect(bUrl).get(); //得到貨號所在的 div Element clasDoc=doc.select("div.pro_list_container").first(); //得到貨號地址 Elements links = clasDoc.select("a[href]"); String casName = clasDoc.select("a[href]").text(); //得到絕對地址 刪選出來空地址 String absHref = links.attr("abs:href");// if(absHref.equals(nullUrl)){ continue; }else{ /*ToMain.JueDDZ(absHref);*/ /* System.out.println(absHref);*/ System.out.println("當前i值:"+i+"當前地址:"+absHref); itemCas.setItem(aHH); itemCas.setCasName(casName); itemCas.setMaincasurl(absHref); JdbcMain.addItemCas(itemCas); ToMain.xTableData(absHref); ToMain.xDocData(aHH,absHref); } } System.out.println("最後"); JdbcMain.jdbcClose(); } /** * 此方法用於獲取貨號對應的詳細介紹 * @param absHref */ private static void xDocData(int aHH,String absHref) throws Exception{ // TODO Auto-generated method stub XDocDataPojo xd=new XDocDataPojo(); String url=absHref; Connection conn=Jsoup.connect(url); conn.cookies(CookieUtil.getCookies()); Document doc_x=conn.get(); // Element doc_d=doc_x.getElementById("conn"); // System.out.println(doc_d.val()); // if (doc_d.val() != null) { // String x2doc=doc_d.select("div.other_r div.pro_detail").html(); // xd.setItem(aHH); // xd.setXdoc(x2doc); // JdbcMain.addXDocDataPojo(xd); // } else { Elements x1doc=doc_x.select("div.pro_contbox"); Elements x2doc=x1doc.select("div.other_r"); String x3doc=x2doc.select("div.pro_detail").html(); // System.out.println(""+x1doc); xd.setItem(aHH); xd.setXdoc(x3doc); JdbcMain.addXDocDataPojo(xd); // } } /** * 此方法用於獲取表格詳細資料 * @author lnthz * @param absHref */ private static void xTableData(String absHref) throws Exception{ // TODO Auto-generated method stub JdbcMain td=new JdbcMain(); List list = new ArrayList(); String url=absHref; Connection conn=Jsoup.connect(url); conn.cookies(CookieUtil.getCookies()); Document doc_t=conn.get(); Elements doc_table=doc_t.select("div.pro_contbox div.tablelist"); // 使用選擇器選擇該table內所有的<tr> <tr/> Elements trs = doc_table.select("tr"); /*System.out.println(trs);*/ //遍歷表格 //i=0,帶第一行標題; i=1 不帶第一行標題 for (int i = 1; i < trs.size(); ++i) { // 獲取一個tr Element tr = trs.get(i); // 獲取該行的所有td節點 Elements tds = tr.select("td"); //遍歷td資料 HashMap<Integer,String> map=new HashMap<Integer,String>(); for(int j=0; j<tds.size(); j++){ Element[] array=new Element[16]; array[j]= tds.get(j); map.put(j, array[j].text()); } list.add(map); /* System.out.println("-----------------"); */ } td.insertCas(list); } }
 
 

JdbcMain.java

 
  
  
package com.lnthz.jdbc;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.lnthz.pojo.ItemCas;
import com.lnthz.pojo.XDocDataPojo;




public class JdbcMain {
    public static final String URL = "jdbc:mysql://localhost:3307/webCas?useUnicode=true&characterEncoding=utf8";
    public static final String USER = "root";
    public static final String PASSWORD = "123456";
    private static Connection conn = null;
    static{
        try {
            //1.載入驅動程式
            Class.forName("com.mysql.jdbc.Driver");
            //2. 獲得資料庫連線
            conn = DriverManager.getConnection(URL, USER, PASSWORD);
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        } catch (SQLException e) {
            e.printStackTrace();
        }
    }
    
   public  static void jdbcClose(){
        try {
            System.out.println("資料庫已關閉(* ̄︶ ̄)");
            conn.close();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    
    }
    public static Connection getConnection(){
        return conn;
    }
    /**
     * 此方法用於增加表格詳情
     * @param list
     * @throws Exception
     */
    public static void insertCas(List list)throws Exception{
        Map map=null;
        Connection conn = JdbcMain.getConnection();
        String str="insert into TargetData(id,itemnumber,casnumber,packnumber,parameter,instock,period,price,shu) values(?,?,?,?,?,?,?,?,?)";
        PreparedStatement pstat = conn.prepareStatement(str);
        for(int h =0;h<list.size();h++){
            map = (Map)list.get(h);
            Iterator<Map.Entry<Integer, String>> entries = map.entrySet().iterator(); 
            while (entries.hasNext()) {               
              Map.Entry<Integer, String> entry = entries.next(); 
             int a=entry.getKey()+1;
            pstat.setString(a,entry.getValue());  
                         
            }
        
            pstat.executeUpdate();
        }
        System.out.println("TargetData插入成功(* ̄︶ ̄)");
        
    }
    /**
     * 用於ItemCas資料表增加
     * 
     * @param i
     */
    public static void addItemCas(ItemCas i) {
        // TODO Auto-generated method stub
        Connection conn = JdbcMain.getConnection();
        String sql="insert into ItemCas(item,casName,maincasurl) values (?,?,?)";
        PreparedStatement ptmt;
        try {
            ptmt = conn.prepareStatement(sql);
            ptmt.setInt(1,i.getItem());
            ptmt.setString(2, i.getCasName());
            ptmt.setNString(3, i.getMaincasurl());
            System.out.println("ItemCas插入成功(* ̄︶ ̄)");
            ptmt.executeUpdate();
        } catch (SQLException e) {
            e.printStackTrace();
        }finally {
        
        }
        
    }

    public static void addXDocDataPojo(XDocDataPojo xd) throws SQLException{
        Connection conn=JdbcMain.getConnection();
        PreparedStatement ptmt=null;
        String sql="insert into XDocDataPojo(item,xdoc) values(?,?)";
        ptmt=conn.prepareStatement(sql);
        ptmt.setInt(1, xd.getItem());
        ptmt.setString(2, xd.getXdoc());
        System.out.println("XDocDataPojo插入成功(* ̄︶ ̄)");
        ptmt.executeUpdate();
        
    }
}
 
 

ItemCas.java

package com.lnthz.pojo;

public class ItemCas {
    public int item;
    public String casName;
    public String maincasurl;
    
    public String getCasName() {
        return casName;
    }
    public void setCasName(String casName) {
        this.casName = casName;
    }
    public int getItem() {
        return item;
    }
    public void setItem(int item) {
        this.item = item;
    }
    public String getMaincasurl() {
        return maincasurl;
    }
    public void setMaincasurl(String maincasurl) {
        this.maincasurl = maincasurl;
    }
    
}

XDocDataPojo.java

package com.lnthz.pojo;

public class XDocDataPojo {
    public int item;
    public String xdoc;
    public int getItem() {
        return item;
    }
    public void setItem(int aHH) {
        this.item = aHH;
    }
    public String getXdoc() {
        return xdoc;
    }
    public void setXdoc(String xdoc) {
        this.xdoc = xdoc;
    }
    
}

CookieUtil.java

package com.lnthz.cookie;

import java.util.HashMap;

public class CookieUtil {
    static HashMap cookies;
    
    static{
        HashMap cookie=new HashMap();
          //目標網站需要登入,cookie表自行解決,put引數就行
       
        cookie.put("Hm_lvt_d4e9a2b5f76697fc95880ee989b6b944", "1543460799,1543894953,1543987988,1543992054");
        cookie.put("LXB_REFER", "www.baidu.com");
        
    }
    public static HashMap getCookies(){
        return cookies;
    }
    
}