Java爬蟲網頁抓取圖片
阿新 • • 發佈:2019-02-10
昨天突然想搞下抓取網上的圖片所以寫了下
這個url自己定義,本地儲存地址也是自己定義,上面的url是百度的,但是抓不到幾張,原因還在研究中。這個是一次抓取所有的png,jpeg,jpg型別的圖片。import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.SocketException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Image { public static void main(String args[]) throws SocketException { String str = GetUrl("http://image.baidu.com/search/index?tn=baiduimage&ps=1&ct=201326592&lm=-1&cl=2&nc=1&ie=utf-8&word=%E9%AB%98%E5%9C%86%E5%9C%86"); List<String> ouput = GetMatcher(str, "src=\"([\\w\\s./:]+?)\""); for (String temp : ouput) { System.out.println(temp); } System.out.println("...................."); for (int i = 0; i < ouput.size(); i++) { String aurl = ouput.get(i); URL url; try { url = new URL(aurl); // 開啟URL連線 URLConnection con = (URLConnection) url.openConnection(); // 得到URL的輸入流 InputStream input = con.getInputStream(); if (input.available() > 0) { // 設定資料緩衝 byte[] bs = new byte[1024 * 2]; // 讀取到的資料長度 int len; // 輸出的檔案流儲存圖片至本 String[] a = aurl.split("\\/"); String name = a[a.length - 1]; if (name.contains(".png") || name.contains(".jpeg") || name.contains(".jpg")|| name.contains(".gif")|| name.contains(".bmp")) { String dir = "E:\\Image\\gaoyuanyuan"; File file = new File(dir, name); OutputStream os = new FileOutputStream(file); while ((len = input.read(bs)) != -1) { os.write(bs, 0, len); } os.close(); input.close();} } else if (input.available() == 0) { System.out.println("與伺服器的連結已中斷"); break; } } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } System.out.println("————————————————————————單張抓取完畢——————————————————————————"); } System.out.println("————————————————————————全部抓取完畢——————————————————————————"); } public static String GetUrl(String inUrl) { StringBuilder sb = new StringBuilder(); try { URL url = new URL(inUrl); BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream())); String temp = ""; while ((temp = reader.readLine()) != null) { // System.out.println(temp); sb.append(temp); } } catch (MalformedURLException e) { // TODO 自動生成的 catch 塊 e.printStackTrace(); } catch (IOException e) { // TODO 自動生成的 catch 塊 e.printStackTrace(); } return sb.toString(); } public static List<String> GetMatcher(String str, String url) { List<String> result = new ArrayList<String>(); Pattern p = Pattern.compile(url);// 獲取網頁地址 Matcher m = p.matcher(str); while (m.find()) { // System.out.println(m.group(1)); result.add(m.group(1)); } return result; } }