爬蟲小練習:網頁原始碼隱藏資料(非ajax和js載入)之空氣質量網
阿新 • • 發佈:2019-02-14
注:一層一層剝開它的心,切記一次性訪問目標網頁
from selenium import webdriver import requests import time option = webdriver.ChromeOptions() option.add_argument("disable-infobars") option.add_argument("--headless") driver = webdriver.Chrome(chrome_options = option) driver.get("https://www.aqistudy.cn/historydata/") driver.maximize_window() time.sleep(2) driver.find_element_by_xpath('//div[@class="bottom"]//a[@href="monthdata.php?city=深圳"]').click() time.sleep(3) content = driver.page_source # print(content) # response = requests.get("https://www.aqistudy.cn/historydata/monthdata.php?city=%E4%B8%8A%E6%B5%B7") # content = response.content.decode("utf-8") with open("test.txt","w",encoding='utf-8') as f: f.write(content) # print(content)