1. 程式人生 > >利用httpclient和mysql模擬搜尋引擎

利用httpclient和mysql模擬搜尋引擎

資料抓取模組

package crowling1;


import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import
org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import
org.apache.http.util.EntityUtils; /** * An example that performs GETs from multiple threads. * */ public class ClientMultiThreadedExecution { public static void main(String[] args) throws Exception { // Create an HttpClient with the ThreadSafeClientConnManager. // This connection manager must be used if more than one thread will
// be using the HttpClient. PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); cm.setMaxTotal(1000); CloseableHttpClient httpclient = HttpClients.custom() .setConnectionManager(cm) .build(); GetThread[] threads = new GetThread[5000]; for (int i = 0; i < threads.length; i++) { HttpGet httpget = new HttpGet(reportIP()); threads[i] = new GetThread(httpclient, httpget, i + 1); } // start the threads for (int j = 0; j < threads.length; j++) { threads[j].start(); } // join the threads for (int j = 0; j < threads.length; j++) { threads[j].join(); } } finally { httpclient.close(); } } /** * A thread that performs a GET. */ static class GetThread extends Thread { private final CloseableHttpClient httpClient; private final HttpContext context; private final HttpGet httpget; private final int id; public GetThread(CloseableHttpClient httpClient, HttpGet httpget, int id) { this.httpClient = httpClient; this.context = new BasicHttpContext(); this.httpget = httpget; this.id = id; } /** * Executes the GetMethod and prints some status information. */ @Override public void run() { try { System.out.println(id + " - about to get something from " + httpget.getURI()); CloseableHttpResponse response = httpClient.execute(httpget, context); try { System.out.println(id + " - get executed"); // get the response body as an array of bytes HttpEntity entity = response.getEntity(); String str = null; if (entity != null) { byte[] bytes = EntityUtils.toByteArray(entity); str=new String(bytes,"utf-8"); System.out.println(id + " - " + bytes.length + " bytes read"); } demo3 d=new demo3(); String mys="'"+httpget.getURI()+"'"; String ip=mys; int begin=str.indexOf("<title>")+7; int end=str.indexOf("</title>"); int debegin=str.indexOf("Description"); String title=""; if (begin!=-1){ title="'"+str.substring(begin, end)+"'"; } String desc=null; if (debegin!=-1){ desc="'"+str.substring(debegin, debegin+10)+"'"; }else { desc="'沒有獲取到描述'"; } System.out.println(title); d.createconn(); String sql="insert into web values("+ip+","+title+","+desc+")"; d.savedata(sql); } finally { response.close(); } } catch (Exception e) { System.out.println(id + " - error: " + e); } } } static int a=110; static int b=75; static int c=114; static int d=0; public synchronized static String reportIP(){ if (d==255){ d=0; c++; }else if(b==255){ b=0; a++; }else if(c==255){ c=0; b++; }else { d++; } return new String("http://"+a+"."+b+"."+c+"."+d); } }

資料儲存模組

package crowling1;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;

public class demo3 {
    public Connection con;
    public void createconn(){

      try{   
            //載入MySql的驅動類   
            Class.forName("com.mysql.jdbc.Driver") ;   
            }catch(ClassNotFoundException e){   
            System.out.println("找不到驅動程式類 ,載入驅動失敗!");   
            e.printStackTrace() ;   
            }   
      String url = "jdbc:mysql://localhost:3306/webcro" ;    
         String username = "root" ;   
         String password = "root" ;   
        try{   
            //連線
        con =    
                 DriverManager.getConnection(url , username , password ) ;   
         }catch(SQLException se){   
        System.out.println("資料庫連線失敗!");   
        se.printStackTrace() ;   
         }   
    }
    public void savedata(String sql) throws SQLException{
        //sql insert into web 
             //、建立一個Statement    
               PreparedStatement pstmt = con.prepareStatement(sql) ;   
        // 執行SQL語句   
               int rows = pstmt.executeUpdate() ;   //如果沒有返回,rows=0
    }
public static void main(String[] args) throws SQLException {
    demo3 d=new demo3();
    String ip="'255.255.255.253'";
    String title="'百度'";
    String desc="'百度'";
    d.createconn();
    String sql="insert into web values("+ip+","+title+","+desc+")";
    d.savedata(sql);
}
}