1. 程式人生 > >Jsoup抓取圖片

Jsoup抓取圖片

楔子

jsoup抓取圖片,其實就是分析頁面圖片地址,然後下載圖片

demo

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.Connection.
Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; /** * @Title: DownPic.java * @Package com.pic * @Description: TODO(用一句話描述該檔案做什麼) * @author 作者 grq * @version 建立時間:2018年12月2日 下午9:24:03 * */ public class DownPic { static String mainUrl =
"https://www.meitulu.com/t/toutiaonvshen/"; File saveFile = new File("c://picc"); public static void main(String[] args) throws IOException { for (int i = 1; i < 10; i++) { String pageUrl = ""; final int pageNum = i; if (i == 1) { pageUrl = mainUrl; } else { pageUrl = mainUrl +
i + ".html"; } parseMinPage(pageUrl, "page_" + pageNum); } } private static void parseMinPage(String url, String pageNum) { try { Document document = Jsoup.connect(url).get(); Elements imgLis = document.getElementsByClass("img"); // 從li中獲取第一個a標籤 Elements elementsByTag = Jsoup.parse(imgLis.toString()).getElementsByTag("li"); for (Element element : elementsByTag) { Element child = element.child(0); Elements allElements = child.getAllElements(); String picURL = allElements.get(0).attr("href"); String attr = allElements.get(1).attr("alt"); // 圖片 數量 String picCount = element.child(1).text().substring("圖片: ".length(), "圖片: ".length() + 2); downDetail(picURL, pageNum + "/" + attr.replaceAll(" ", ""), picCount.trim()); System.out.println("down pic is " + attr + " 地址是:" + picURL); } } catch (IOException e) { System.out.println("主頁連線超時"); e.printStackTrace(); } } /** * 在主頁下載圖片 * * @param picURL * @param title * @param picCount * @throws IOException */ private static void downDetail(String picURL, String title, String picCount) throws IOException { String baseUrl = picURL.substring(0, picURL.length() - 5); for (int i = 1; i <= (Integer.valueOf(picCount) + 3) / 4; i++) { if (i == 1) { picURL = baseUrl + ".html"; } else { picURL = baseUrl + "_" + i + ".html"; } Elements pics = Jsoup.connect(picURL).get().getElementsByClass("content"); Elements picImg = pics.get(0).getElementsByTag("img"); for (Element ele : picImg) { downPic(ele.attr("src"), title); } } } private static void downPic(String picUrl, String title) throws IOException { // https://www.meitulu.com/t/toutiaonvshen/ Connection connect = Jsoup.connect(picUrl); Response execute = connect.referrer("https://www.meitulu.com/").ignoreContentType(true).execute(); String name = FilenameUtils.getName(picUrl); BufferedInputStream bodyStream = execute.bodyStream(); FileUtils.copyInputStreamToFile(bodyStream, new File("c:/piccc/" + title, name)); } }

pom

<dependency>
	<groupId>commons-io</groupId>
	<artifactId>commons-io</artifactId>
	<version>2.4</version>
</dependency>
	<dependency>
	<groupId>org.jsoup</groupId>
	<artifactId>jsoup</artifactId>
	<version>${jsoup.version}</version>
</dependency>

在這裡插入圖片描述