Java 根據網路URL獲取該網頁上面所有的img標籤並下載圖片

阿新 • • 發佈：2020-11-06

說明：根據網路URL獲取該網頁上面所有的img標籤並下載符合要求的所有圖片

所需jar包：jsoup.jar

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * 圖片批量下載工具類
 * @author Marydon
 * @create time 2016-9-3下午2:01:03 
 * @update time 2017年9月30日11:07:02
 * @E-mail:[email protected]
 */
public class ImgDownloadUtil {

  /**
   * 根據URL獲取網頁DOM物件
   * @param url
   *      網址
   * @return DOM物件
   */
  public static Document getHtmlDocument(String url) {
    Document document = null;
    URL urlObj = null;
    try {
      // 1.建立網路連線
      urlObj = new URL(url);
      // 2.根據url獲取Document物件
      document = Jsoup.parse(urlObj,5000);// 單位：毫秒超時時間

    } catch (MalformedURLException e) {
      System.out.println("世界上最遙遠的距離就是沒有網，檢查設定！");
      e.printStackTrace();
    } catch (IOException e) {
      System.out.println("您的網路連線開啟失敗，請稍後重試！");
      e.printStackTrace();
    }

    return document;
  }

  /**
   * 根據URL獲取網頁原始碼
   * @param url
   *      網址
   * @return 網頁原始碼
   */
  public static String getHtmlText(String url) {
    String htmlText = "";
    Document document = null;
    URL urlObj = null;
    try {
      // 1.建立網路連線
      urlObj = new URL(url);
      // 2.根據url獲取Document物件
      document = Jsoup.parse(urlObj,5000);// 單位：毫秒超時時間
      // 3.根據dom物件獲取網頁原始碼
      htmlText = document.html();
    } catch (MalformedURLException e) {
      System.out.println("世界上最遙遠的距離就是沒有網，檢查設定！");
      e.printStackTrace();
    } catch (IOException e) {
      System.out.println("您的網路連線開啟失敗，請稍後重試！");
      e.printStackTrace();
    }

    return htmlText;
  }

  /**
   * 操作Dom物件獲取圖片地址
   * @param document
   *      Dom物件
   * @return 圖片地址集合
   */
  public static List<String> getImgAddressByDom(Document document) {
    // 用於儲存圖片地址
    List<String> imgAddress = new ArrayList<String>();
    if (null != document) {
      // <img src="" alt="Java 根據網路URL獲取該網頁上面所有的img標籤並下載圖片" width="" height=""/>
      // 獲取頁面上所有的圖片元素
      Elements elements = document.getElementsByTag("img");
      String imgSrc = "";
      // 迭代獲取圖片地址
      for (Element el : elements) {
        imgSrc = el.attr("src");
        // imgSrc的內容不為空，並且以http://開頭
        if ((!imgSrc.isEmpty()) && imgSrc.startsWith("http://")) {
          // 將有效圖片地址新增到List中
          imgAddress.add(imgSrc);
        }
      }
    }

    return imgAddress;
  }

  /**
   * 根據網路URL下載檔案
   * @param url
   *      檔案所在地址
   * @param fileName
   *      指定下載後該檔案的名字
   * @param savePath
   *      檔案儲存根路徑
   */
  public static void downloadFileByUrl(String url,String fileName,String savePath) {
    URL urlObj = null;
    URLConnection conn = null;
    InputStream inputStream = null;
    BufferedInputStream bis = null;
    OutputStream outputStream = null;
    BufferedOutputStream bos = null;
    try {
      // 1.建立網路連線
      urlObj = new URL(url);
      // 2.開啟網路連線
      conn = urlObj.openConnection();
      // 設定超時間為3秒
      conn.setConnectTimeout(3 * 1000);
      // 防止遮蔽程式抓取而返回403錯誤
      conn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
      // 3.得到輸入流
      inputStream = conn.getInputStream();
      bis = new BufferedInputStream(inputStream);

      // 檔案儲存位置
      File saveDir = new File(savePath);
      if (!saveDir.exists()) {
        saveDir.mkdirs();
      }
      // 檔案的絕對路徑
      String filePath = savePath + File.separator + fileName;
      File file = new File(filePath);
      // 4.
      outputStream = new FileOutputStream(file);
      bos = new BufferedOutputStream(outputStream);
      byte[] b = new byte[1024];
      int len = 0;
      while ((len = bis.read(b)) != -1) {
        bos.write(b,len);
      }
      System.out.println("info:" + url + " download success,fileRename=" + fileName);
    } catch (MalformedURLException e) {
      System.out.println("世界上最遙遠的距離就是沒有網，檢查設定");
      System.out.println("info:" + url + " download failure");
      e.printStackTrace();
    } catch (IOException e) {
      System.out.println("您的網路連線開啟失敗，請稍後重試！");
      System.out.println("info:" + url + " download failure");
      e.printStackTrace();
    } finally {// 關閉流
      try {
        if (bis != null) {// 關閉位元組緩衝輸入流
          bis.close();
        }

        if (inputStream != null) {// 關閉位元組輸入流
          inputStream.close();
        }
        if (bos != null) {// 關閉位元組緩衝輸出流
          bos.close();
        }
        if (outputStream != null) {// 關閉位元組輸出流
          outputStream.close();
        }

      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }

}

測試

public static void main(String[] args) {
  // 1.確定網址
  String url = "http://www.cnblogs.com/Marydon20170307/p/7402871.html";
  // 2.獲取該網頁的Dom物件
  Document document = getHtmlDocument(url);
  // 3.獲取該網頁所有符合要求的圖片地址
  List<String> imgAddresses = getImgAddressByDom(document);
  String imgName = "";
  String imgType = "";
  // 4.設定圖片儲存路徑
  String savePath = "C:/Users/Marydon/Desktop";
  // 5.批量下載圖片
  for (String imgSrc : imgAddresses) {
    // 5.1圖片命名：圖片名用32位字元組成的唯一標識
    imgName = UUID.randomUUID().toString().replace("-","");
    // 5.2圖片格式（型別）
    imgType = imgSrc.substring(imgSrc.lastIndexOf("."));
    imgName += imgType;
    // 5.3下載該圖片
    downloadFileByUrl(imgSrc,imgName,savePath);
  }
}

以上就是Java 根據網路URL獲取該網頁上面所有的img標籤並下載圖片的詳細內容，更多關於java 下載網路圖片的資料請關注我們其它相關文章！

Java 根據網路URL獲取該網頁上面所有的img標籤並下載圖片

說明：根據網路URL獲取該網頁上面所有的img標籤並下載符合要求的所有圖片所需jar包：jsoup.jar

Java 獲取網路時間，並根據網路時間獲取這個月的月初時間

技術標籤：javadate 獲取網路時間，並根據網路當前時間獲取這個月的月初時間，然後轉換為String型別

hibernate根據包名獲取該包下實體類，資料庫中不存在的庫表名稱

import com.alibaba.fastjson.JSON;import com.alibaba.fastjson.JSONObject;import com.supermap.realestate_bh.registration.util.StringUtil;import com.supermap.wisdombusiness.core.SuperSpringContext;import

java根據時間間隔獲取時間集合

public static List<String> allRangeDate(String startDateStr, String endDateStr) { List<String> listDate = new ArrayList<>();

PHP獲取指定網頁的所有連結

技術標籤：正則表示式php爬蟲 $string = file_get_contents("https://fanyi.qq.com/");

獲取豆瓣讀書所有熱門標籤並儲存到mongodb資料庫

目標url：https://book.douban.com/tag/?view=type&icn=index-sorttags-all 目的：抓取所有標籤名稱（tag_name），標籤連結（tag_url），標籤下的書籍數量（tag_book_num）

HTML網頁中，img標籤和Ajax請求是支援跳轉（Redirect）的

我們知道在HTTP請求中，狀態碼301和302代表跳轉，也叫重定向（Redirect）。 301-Moved Permanently：永久移動。請求的資源已被永久的移動到新URI，返回資訊會包括新的URI，瀏覽器會自動定向到新URI。今後任何新的請

4、[簡答題] 【Map介面中的方法】往一個Map集合中新增若干元素。獲取Map中的所有value，並使用增強for和迭代器遍歷輸出每個value。

package day_04_test;import java.util.Collection;import java.util.HashMap;import java.util.Iterator;import java.util.Set;//4、[簡答題]//【Map介面中的方法】//往一個Map集合中新增若干元素。獲取Map中的所有