chromedriver 全屏 翻頁 錯誤
阿新 • • 發佈:2019-01-29
submit sel child color web support tex present rap
from selenium import webdriver from selenium.common.exceptions import TimeoutException, StaleElementReferenceException from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from pyquery importPyQuery as pq browser=webdriver.Chrome() def search(): try: browser.get(‘https://www.jd.com/‘) input=WebDriverWait(browser,10).until( EC.presence_of_element_located((By.CSS_SELECTOR,‘#key‘)) ) submit=WebDriverWait(browser,10).until( EC.element_to_be_clickable((By.CSS_SELECTOR,‘#search > div > div.form > button > i‘)) ) input.send_keys("佩奇") submit.click() total_pages=WebDriverWait(browser,10).until( EC.presence_of_element_located((By.CSS_SELECTOR,‘#J_bottomPage > span.p-skip > em:nth-child(1) > b‘)) ) get_product_media() pages=int(total_pages.text) return pages except TimeoutException: search() def search_page(number): try: input = WebDriverWait(browser, 20).until( EC.presence_of_element_located((By.CSS_SELECTOR, ‘#J_bottomPage > span.p-skip > input‘)) ) submit = WebDriverWait(browser, 20).until( EC.element_to_be_clickable((By.CSS_SELECTOR, ‘#J_bottomPage > span.p-skip > a‘)) ) input.clear() input.send_keys(number) submit.click() get_product_media() # WebDriverWait(browser, 10).until( # EC.text_to_be_present_in_element((By.CSS_SELECTOR,‘#J_bottomPage > span.p-num > a.curr‘),str(number)) # ) except StaleElementReferenceException: search_page(number) def get_product_media(): # try: WebDriverWait(browser, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR,‘#J_goodsList .gl-item .p-img‘)) ) html=browser.page_source doc=pq(html) items=doc(‘#J_goodsList .gl-i-wrap ‘).items() for item in items: product={ ‘image‘: item.find(‘.p-img‘).attr(‘src‘), # ‘price‘: item.find(‘.p-price‘).text() # ‘image‘: item.find(‘.p-img a img‘).attr(‘data-lazy-img‘) } print(product) # print(item) def main(): pages=search() print(type(pages)) for i in range(2,pages+1): search_page(i) if __name__ == ‘__main__‘: main()
運行的時候如果彈出的chrome不是全屏模式,翻頁會不能運行。。。
另:一直無法解析到正確的src,直到看了https://www.cnblogs.com/airnew/p/10101698.html,發現把html = browser.page_source.replace(‘xmlns‘, ‘another_attr‘),後就可以正確解析了replace(‘xmlns‘, ‘another_attr‘)這是什麽意思,原作者說要把xmls替換,試了下替換成‘an’也會工作,
chromedriver 全屏 翻頁 錯誤