1. 程式人生 > 其它 >Python之爬取天氣預報並生成圖表

Python之爬取天氣預報並生成圖表

  使用Python爬蟲去天氣預報網站爬取天氣資料儲存至MySQL然後使用pyecharts實現繪圖

  本次程式碼可以在gitee下載https://gitee.com/liuyueming/weatherSpider.git

  一,環境檢視

  Python版本

C:\Users\liuym\Desktop\weatherSpider>python --version
Python 3.6.6

  MySQL版本

 mysql --version
mysql  Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using  EditLine wrapper

  二,程式碼

  本次爬取的天氣預報網站為http://www.tianqihoubao.com/

  執行過程中遇到沒有安裝的庫使用pip install安裝即可

  主程式main.py

import pymysql
import requests
from bs4 import BeautifulSoup
 
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
 
#獲取網頁資訊
def get_html(url):
    html = requests.get(url)
    html.encoding = html.apparent_encoding
    soup = BeautifulSoup(html.text, 'lxml')
    return soup
 
year = ['2020']
 
month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12']
 
 
time = [y+x for y in year for x in month] 
for date in time:
    url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html'
    soup = get_html(url)
    sup = soup.find('table',attrs={'class':'b'})
    tr = sup.find_all('tr')
    for trl in tr[1:]:
        td = trl.find_all('td')
        href = td[0].find('a')['href'] #獲取連結資訊
        title = td[0].find('a')['title'] #獲取名稱
        weather = td[1].get_text().replace('\r\n','').replace(' ','') #獲取天氣狀況
        wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#獲取溫度
        fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #獲取風力大小       
 
        sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \
                values(%s, %s, %s, %s, %s)"""
        cursor.execute(sql, (title, href, weather, wendu, fengli))
        db.commit()
db.close
print('爬取完成')

  程式碼解析

db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 資料庫連線資訊,根據實際情況修改

  

year = ['2020'] # 需要爬取的年份資訊

  

url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市資訊 本次為南昌

  生成html程式myVisualize.py

import pymysql
import pyecharts.options as opts
from pyecharts.charts import Line, Pie

def create_temp():
    db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    cursor.execute('SELECT * FROM weather_spider;')
    data = cursor.fetchall()
    max_temp_list = []
    min_temp_list = []
    day_list = []
    for d in data:
        max_temp_list.append(d[3].split('/')[0].replace('℃', ''))
        min_temp_list.append(d[3].split('/')[1].replace('℃', ''))
        day_list.append(d[0][:11])
    line = Line()
    line.add_xaxis(day_list)
    line.add_yaxis(series_name="最高氣溫", y_axis=max_temp_list, is_symbol_show = False, 
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.add_yaxis(series_name="最低氣溫", y_axis=min_temp_list, is_symbol_show = False, 
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.set_global_opts(yaxis_opts=opts.AxisOpts(name="溫度(℃)"), 
        title_opts=opts.TitleOpts(title="南昌氣溫變化表"), 
        tooltip_opts=opts.TooltipOpts(trigger="axis"))
    
    line.render('南昌2020氣溫變化表.html')
    print('氣溫圖生成成功')
    db.close()
    cursor.close()

def create_weather():
    db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    attr = ["雨", "多雲", "晴", "陰", "雪", "霧", "霾"]
    rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";')
    cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多雲%";')
    sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";')
    overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%陰%";')
    snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";')
    fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霧%";')
    smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";')
    weather = [rain, cloud, sun, overcast, snow, fog, smog]
    pie = (
        Pie()
        .add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])])
        .set_global_opts(title_opts=opts.TitleOpts(title="天氣佔比表"))
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
    )
    
    pie.render('南昌2020天氣佔比表.html')
    print('天氣圖生成成功')
    db.close()
    cursor.close()


if __name__ == '__main__':
    create_temp()
    create_weather()

  MySQL操作(安裝MySQL不詳述)

  建立庫

create databese weather;

  匯入表

mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql

  表語句sql如下weather.sql

DROP TABLE IF EXISTS `weather_spider`;
CREATE TABLE `weather_spider` (
  `time_local` varchar(255) DEFAULT NULL,
  `link` varchar(255) DEFAULT NULL,
  `weather_type` varchar(255) DEFAULT NULL,
  `temperature` varchar(255) DEFAULT NULL,
  `wind_power` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;

  三,執行

  執行主程式

python main.py

  執行正常會往MySQL資料庫寫入資料,登入資料庫搜尋檢視

 select * from weather_spider;

  執行生成html程式

python myVisualize.py
氣溫圖生成成功
天氣圖生成成功

  在當前目錄會生成html,開啟檢視