java網路爬蟲——下載頁面圖片
阿新 • • 發佈:2019-01-28
package com.http5; import java.io.InputStream; import org.apache.commons.io.IOUtils; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.util.EntityUtils; public class ImageShow { public static String getimg(HttpClient http,String url){ try{ HttpGet get=new HttpGet(url); HttpResponse hr=http.execute(get); HttpEntity he=hr.getEntity();//鍝堝搱 if(he!=null){ String charset=EntityUtils.getContentCharSet(he); InputStream is=he.getContent(); return IOUtils.toString(is,charset); } }catch(Exception e){ e.printStackTrace(); } return null; } public static byte[] getimage(HttpClient http,String url){ try{HttpGet hg=new HttpGet(url); HttpResponse hr=http.execute(hg); HttpEntity he=hr.getEntity(); if(he!=null){ InputStream is=he.getContent(); return IOUtils.toByteArray(is); } } catch(Exception e){ e.printStackTrace(); } return null; } }
package com.http5; import java.io.FileOutputStream; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; import org.apache.http.client.HttpClient; import org.apache.http.impl.client.DefaultHttpClient; import org.htmlparser.Parser; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.tags.ImageTag; import org.htmlparser.util.NodeList; public class Imagedownload { /** * @param args */ public static void main(String[] args) { HttpClient http=new DefaultHttpClient(); try{ String html=ImageShow.getimg(http, "http://www.baidu.com/"); Parser p=new Parser();//建立解析器 p.setInputHTML(html);//解析html NodeList nl=p.parse(new NodeClassFilter(ImageTag.class));//選擇image標籤 for(int i=0;i<nl.size();i++){ ImageTag image=(ImageTag)nl.elementAt(i); String imageurl=image.getImageURL();//獲得圖片src屬性值 String url="http://www.baidu.com"+imageurl; System.out.println(url); String jpg=FilenameUtils.getName(url);//圖片名 byte[] im=ImageShow.getimage(http,url ); IOUtils.write(im, new FileOutputStream("E:/temp/"+jpg)); }}catch(Exception e){ e.printStackTrace(); } } }