python尤果網圖片爬蟲(簡單)__selenium+phantomJS+urllib2
阿新 • • 發佈:2018-12-13
1.首先給python安裝selenium庫,然後下載phantomJS並配置環境變數(網上搜索一堆)
2.直接放python程式碼: youguo_image_spider.py
#!/usr/bin/env python #_*_coding:utf-8_*_ from selenium import webdriver from selenium.webdriver.common.keys import Keys from lxml import etree import urllib2 class youguoSpider(): def __init__(self): self.url = "https://www.ugirls.com/" self.driver = webdriver.PhantomJS() def start(self): self.login_getIndexPage() def login_getIndexPage(self): #開啟尤果網首頁 self.driver.get(self.url) #截圖檢視是否成功 # driver.save_screenshot("index.png") #模擬點選"登入"按鈕,跳出登入彈窗 self.driver.find_element_by_id("btnshowlogin").click() # driver.save_screenshot("butLogin.png") #在登入彈窗填充賬戶密碼 self.driver.find_element_by_id("txtAccount").send_keys(u"XXXXXXXXXX")#賬號 self.driver.find_element_by_id("txtPass").send_keys(u"XXXXXXXX")#密碼 # driver.save_screenshot("account.png") #點選登入按鈕 self.driver.find_element_by_id("btnLogin").click() print "登入成功" # driver.save_screenshot("login.png") # print driver.page_source self.driver.find_element_by_id("1").click() html = self.driver.page_source print html self.get_personUrlList(html) def get_personUrlList(self,html): ehtml = etree.HTML(html) urllist = ehtml.xpath('//div[@class="magazine_list_wrap"]/div/a/img[@class="magazine_img"]/@src') self.download(urllist) print urllist def download(self,urllist): print "開始下載圖片:" flg = 1 for personurl in urllist: print personurl response = urllib2.urlopen(personurl) with open(str(flg)+"_"+personurl[-48:-16]+".jpg","wb") as f: f.write(response.read()) flg = int(flg) + 1 print "下載結束!" def main(): ygs = youguoSpider() ygs.start() if __name__ == "__main__": main()