Jsoup 爬蟲 抓取網路圖片
阿新 • • 發佈:2019-02-05
package common; import java.io.BufferedInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.UUID; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * JSoup 網路爬蟲工具類 * @author Cloud * @data 2016-11-21 * JsoupUtil */ public class JsoupUtil { /** * <span style="color:red;font-size:18px;">獲取網站圖片</span> * @param networkUrl 網站路徑 * @param outPath 圖片儲存地址 * @throws IOException */ public static void getNetworkImage(String networkUrl, String outPath) throws IOException{ //輸入輸出流 FileOutputStream outputStream = null; InputStream inputStream = null; BufferedInputStream bis = null; Document doument; Elements elements; try { //獲取網站資源 doument = (Document) Jsoup.connect(networkUrl).get(); //獲取網站資源圖片 elements = doument.select("img[src]"); //迴圈讀取 for (Element e : elements) {//讀取網站所有圖片 String outImage = UUID.randomUUID().toString().replaceAll("-", "") + ".jpg"; //建立連線 URL imgUrl = new URL(e.attr("src")); //獲取輸入流 inputStream = imgUrl.openConnection().getInputStream(); //將輸入流資訊放入緩衝流提升讀寫速度 bis = new BufferedInputStream(inputStream); //讀取位元組婁 byte[] buf = new byte[1024]; //生成檔案 outputStream = new FileOutputStream(outPath + outImage); int size = 0; //邊讀邊寫 while ((size = bis.read(buf)) != -1) { outputStream.write(buf, 0, size); } //重新整理檔案流 outputStream.flush(); } } catch (IOException e) { e.printStackTrace(); } finally{ //釋放資源 遵循先開後關原則 if(outputStream != null) outputStream.close(); if(bis != null) bis.close(); if(inputStream != null) inputStream.close(); } } } 測試類: package test; import common.JsoupUtil; /** * * @author Cloud * @data 2016-12-15 * JsoupTest */ public class JsoupTest { public static void main(String[] args) throws Exception { System.out.println("--start--"); JsoupUtil.getNetworkImage("http://www.tooopen.com/img/87.aspx", "E://"); System.out.println("--end--"); } }
擷取之後的圖片:
最後總結下:非同步請求是抓取不到的,比如ajax請求載入的圖片;
有什麼不懂的可以加我QQ私聊,294706865
jar 下載地址:
http://download.csdn.net/download/u014266877/9716649