java-selenium下載百度圖片
阿新 • • 發佈:2017-10-16
trac edr sel contains str ack tput args div
package download; import java.io.DataInputStream; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.input.ReaderInputStream; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; public class Geturl { public static List<String> geturls(String baseUrl){ List<String> urllist=new ArrayList<String>(); System.setProperty("webdriver.chrome.driver", "E:\\\\webDriver\\\\chromedriverV2.28.exe"); WebDriver driver = new ChromeDriver(); driver.get(baseUrl); //獲取所有img標簽 List<WebElement> imgList = driver.findElements(By.tagName("img")); System.out.println(imgList.size()); try { Thread.sleep(10000); } catch (Exception e) { e.printStackTrace(); } //便利所有標簽 try { for (WebElement a : imgList) { //System.out.println(a.getText()); System.out.println(a.getAttribute("src"));//獲取img標簽中的data-imgurl data-imgurl //獲取img標簽data-imgurl屬性值 String urlStr = a.getAttribute("src"); /*if(urlStr.contains(".jpg")) { urllist.add(urlStr); }*/ } } catch (Exception e) { e.printStackTrace(); } return urllist; } public static void downloadImg(List<String> urllist) throws Exception{ URL url=null; int imageNumber = 0; for(String urlString:urllist) { url = new URL(urlString); DataInputStream dis = new DataInputStream(url.openStream()); String imageName ="C:\\Users\\0\\Pictures\\插畫\\photos"+ imageNumber +".jpg"; FileOutputStream fos =new FileOutputStream(new File(imageName)); byte[] buffer = new byte[1024]; int length; while((length = dis.read(buffer))>0) { fos.write(buffer, 0, length); } dis.close(); fos.close(); imageNumber++; } } public static void main(String[] args) { List<String> urllist=geturls("https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B2%E5%BB%AD&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=111111"); System.out.println(urllist.size()); try { downloadImg(urllist); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
java-selenium下載百度圖片