關於如何只用python獲取網頁天氣(數據)的方法
阿新 • • 發佈:2018-08-23
pytho 解析 dsta ads parser 導入 3.0 根據 lang
獲取網頁數據無非就三步!
第一步:首先通過python獲取到前端html完整代碼!(需要用到request模塊)
第二步:通過獲取到的html代碼進行過濾,獲取到有用天氣數據 (需要用到bs4模塊下的BeautifulSoup)
第三步:獲取到的天氣數據,進行本地化保存
PS:其它用到的模塊:time、random、socket、csv、http.client
廢話不多說,直接上代碼!
首先,導入引用模塊
from bs4 import BeautifulSoup import requests, time, random, socket, csv import http.client
第一步:首先通過python獲取到前端html完整代碼!(需要用到request模塊)
# 獲取請求網址的完整HTML代碼 def htmlcontent(url, data=None): header = { ‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘, ‘Accept-Encoding‘: ‘gzip, deflate, sdch‘, ‘Accept-Language‘: ‘zh-CN,zh;q=0.8‘,第一步‘Connection‘: ‘keep-alive‘, ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235‘ } # request 的請求頭 timeout = random.choice(range(80, 180)) while True: try: rep = requests.get(url, headers=header, timeout=timeout) #請求url地址,獲得返回response信息 rep.encoding = ‘utf-8‘ break except socket.timeout as e: print(‘3:‘, e) time.sleep(random.choice(range(8, 15))) except socket.error as e: print(‘4:‘, e) time.sleep(random.choice(range(20, 60))) except http.client.BadStatusLine as e: print(‘5:‘, e) time.sleep(random.choice(range(30, 80))) except http.client.IncompleteRead as e: print(‘6:‘, e) time.sleep(random.choice(range(5, 15))) return rep.text # 返回的Html全部代碼
第二步:通過獲取到的html代碼進行過濾,獲取到有用天氣數據 (需要用到bs4模塊下的BeautifulSoup)
# 過濾篩選有用數據 def weatherdata(html_text): data_al = [] bs = BeautifulSoup(html_text, "html.parser") # 創建BeautifulSoup對象並以html.parser方式解析 li = bs.body.find(‘div‘, {‘id‘: ‘7d‘}).find(‘ul‘).find_all(‘li‘) # 根據前端HTML代碼的標簽獲取具體天氣數據 for data in li: temp = [] date = data.find(‘h1‘).string inf = data.find_all(‘p‘) weather = inf[0].string # 天氣 temperature_highest = inf[1].find(‘span‘).string # 最高溫度 temperature_low = inf[1].find(‘i‘).string # 最低溫度 temp.append(date) # 添加日期 temp.append(weather) # 添加天氣 temp.append(temperature_low) # 添加最低溫度 temp.append(temperature_highest) # 添加最高溫度 data_al.append(temp) # 數據全部儲存在一個列表中 return data_al第二步
第三步:獲取到的天氣數據,進行本地化保存
# 把數據寫入本地文件 def writedata(data, name): with open(name, ‘a‘, errors=‘ignore‘, newline=‘‘) as f: f_csv = csv.writer(f) f_csv.writerows(data)第三步
最後,進行調用
if __name__ == ‘__main__‘: url = ‘http://www.weather.com.cn/weather/101010100.shtml‘ # 獲取天氣數據的網址 html = htmlcontent(url) # 獲取網頁信息 result = weatherdata(html) # 解析網頁信息,拿到需要的數據 writedata(result, ‘C:/Users/LoveCounter/Desktop/天氣test.csv‘) # 數據寫入到 csv文檔中
完整性代碼,如下:
from bs4 import BeautifulSoup import requests, time, random, socket, csv import http.client # 獲取請求網址的完整HTML代碼 def htmlcontent(url, data=None): header = { ‘Accept‘: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘, ‘Accept-Encoding‘: ‘gzip, deflate, sdch‘, ‘Accept-Language‘: ‘zh-CN,zh;q=0.8‘, ‘Connection‘: ‘keep-alive‘, ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.235‘ } # request 的請求頭 timeout = random.choice(range(80, 180)) while True: try: rep = requests.get(url, headers=header, timeout=timeout) # 請求url地址,獲得返回response信息 rep.encoding = ‘utf-8‘ break except socket.timeout as e: print(‘3:‘, e) time.sleep(random.choice(range(8, 15))) except socket.error as e: print(‘4:‘, e) time.sleep(random.choice(range(20, 60))) except http.client.BadStatusLine as e: print(‘5:‘, e) time.sleep(random.choice(range(30, 80))) except http.client.IncompleteRead as e: print(‘6:‘, e) time.sleep(random.choice(range(5, 15))) return rep.text # 返回的Html全部代碼 # 過濾篩選有用數據 def weatherdata(html_text): data_al = [] bs = BeautifulSoup(html_text, "html.parser") # 創建BeautifulSoup對象並以html.parser方式解析 li = bs.body.find(‘div‘, {‘id‘: ‘7d‘}).find(‘ul‘).find_all(‘li‘) # 根據前端HTML代碼的標簽獲取具體天氣數據 for data in li: temp = [] date = data.find(‘h1‘).string inf = data.find_all(‘p‘) weather = inf[0].string # 天氣 temperature_highest = inf[1].find(‘span‘).string # 最高溫度 temperature_low = inf[1].find(‘i‘).string # 最低溫度 temp.append(date) # 添加日期 temp.append(weather) # 添加天氣 temp.append(temperature_low) # 添加最低溫度 temp.append(temperature_highest) # 添加最高溫度 data_al.append(temp) # 數據全部儲存在一個列表中 return data_al # 把數據寫入本地文件 def writedata(data, name): with open(name, ‘a‘, errors=‘ignore‘, newline=‘‘) as f: f_csv = csv.writer(f) f_csv.writerows(data) if __name__ == ‘__main__‘: url = ‘http://www.weather.com.cn/weather/101010100.shtml‘ # 獲取天氣數據的網址 html = htmlcontent(url) # 獲取網頁信息 result = weatherdata(html) # 解析網頁信息,拿到需要的數據 writedata(result, ‘C:/Users/LoveCounter/Desktop/天氣test.csv‘) # 數據寫入到 csv文檔中獲取天氣完整性代碼
關於如何只用python獲取網頁天氣(數據)的方法