想要免費看小說嗎?
阿新 • • 發佈:2019-01-02
很多人找不到想要看的小說,今天就教大家一種方法,寫個爬蟲直接完事,啥都不說了上程式碼
package com.cn.love.ui; import java.io.DataOutputStream; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.io.Writer; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.http.HttpHost; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.conn.params.ConnRoutePNames; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.CoreConnectionPNames; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class HttpclientJsoup { /** * Httpclient負責傳送請求 * Jsoup負責解析 * @param args * @throws IOException * @throws ClientProtocolException */ public static void main(String[] args) throws ClientProtocolException, IOException { //建立httpClient物件 HttpClient httpClient = new DefaultHttpClient(); //設定響應時間,設定傳輸程式碼時間,設定伺服器 連結超時 傳輸超時 代理伺服器 //httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 1000).setParameter(CoreConnectionPNames.SO_TIMEOUT, 1000).setParameter(ConnRoutePNames.DEFAULT_PROXY, new HttpHost("124.88.67.52",83)); //建立get請求 3406013 68章 3432974 String url=""; HttpGet httpGet=new HttpGet("http://www.17k.com/list/493239.html"); // HttpGet httpGet=new HttpGet("http://www.readnovel.com/novel/120040.html"); //向百度伺服器傳送請求,獲取網頁原始碼 HttpResponse response = httpClient.execute(httpGet); //EntityUtils工具類把網頁實體轉換成字串 String content = EntityUtils.toString(response.getEntity(), "utf-8"); //拿到網頁內容了 開始解析 Document doc = Jsoup.parse(content); //使用元素選擇器選擇 網頁內容 Elements a= doc.select(".Main .Volume dd a"); for (int i = 0; i < a.size(); i++) { Element eurl = a.get(i); url="http://www.17k.com/"+eurl.attr("href"); HttpGet httpGet1=new HttpGet(url); HttpResponse response1 = httpClient.execute(httpGet1); String content1 = EntityUtils.toString(response1.getEntity(), "utf-8"); Document doc1 = Jsoup.parse(content1); Elements es= doc1.select("h1"); Elements qw= doc1.select(".p #chapterContentWapper"); //System.out.println(es); // for (Element e:es) { // System.out.println(e.text()+":"+e.attr("href")); // } File file = new File("D:" + File.separator + "修羅武神.txt"); try { // 注意,這個地方,那個true的引數,代表如果這個檔案已經存在了,就把新的內容新增到該檔案的最後 // 如果你想重新建立新檔案,把true改成false就好了 Writer writer = new OutputStreamWriter(new FileOutputStream(file, true), "UTF-8"); StringBuilder builder = new StringBuilder(); for (int j = 0; j < es.size(); j++) { Element h1 = es.get(j); Element test = qw.get(j); System.out.println(h1.text()); System.out.println(test.text()); builder.append(h1.text()); builder.append("\r\n"); builder.append(test.text()); builder.append("\r\n"); } writer.write(builder.toString()); writer.close(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } }