資料清洗-> 資料入庫-> 資料視覺化 的 簡單專案
阿新 • • 發佈:2018-11-27
資料從同事那裡拿來,大概60萬條,幾百MB ,是某市面上保險櫃子的資料,現在要分析這批資料。
#!/usr/bin/python3 import pymysql type_list = ["userInfoSync","alertReport","changeNetwork","closeDoor","dataSync","deleteFP","dynPwd","dynPwdSecond", "formatDevice","heartbeat","lock_activation","network","openDoor","readStatus","regFP","resetting", "setCtlPwd","updateFirmware"] def get_type_counts(): config = { "mysql_config": { "host": "***", "user": "***", "password": "***", "database": "***" } } type_counts_dict={} user = config["mysql_config"]["user"] host = config["mysql_config"]["host"] password = config["mysql_config"]["password"] database = config["mysql_config"]["database"] # 開啟資料庫連線 db = pymysql.connect(host,user ,password ,database , charset='utf8' ) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() # SQL 查詢語句 sql = "SELECT type,count(*) as freq FROM dictionary WHERE type != 'NULL' and type != 'networkStatus' group by type ;" try: # 執行SQL語句 cursor.execute(sql) # 獲取所有記錄列表 results = cursor.fetchall() #print(results) for row in results: type = row[0] freq = row[1] type_counts_dict[type]=freq # # 列印結果 # print ("type=%s,freq=%s" % \ # (type, freq )) except: print ("Error: unable to fetch data") # 關閉資料庫連線 db.close() return type_counts_dict def fill_null_type(type_counts_dict,type_list): key_list = [ i for i in type_counts_dict] len_key_list = len(key_list) len_type_list = len(type_list) #查出的資料型別是否和預設業務型別作對比 if len_key_list < len_type_list : null_type = list(set(type_list).difference(set(key_list))) print(null_type) for i in null_type: type_counts_dict[i] = 0 return type_counts_dict elif len_key_list == type_list : print("Info: Data type is equals business type!!!") return type_counts_dict else: print("Error: Data type is larger than business type!!!") return type_counts_dict def data_visualization(type_counts_dict): import matplotlib.pyplot as plt import matplotlib matplotlib.rcParams['font.sans-serif'] = ['SimHei'] matplotlib.rcParams['axes.unicode_minus'] = False #對字典進行排序 type_counts_dict_sorted = sorted(zip(type_counts_dict.values(), type_counts_dict.keys()),reverse=True) datas = [] type_name = [] for x in type_counts_dict_sorted: datas.append(x[0]) type_name.append(x[1]) """ 繪製水平條形圖方法barh 引數一:y軸 引數二:x軸 """ plt.barh(range(len(datas)), datas, height=0.5, color='steelblue', alpha=0.8) # 從下往上畫 plt.yticks(range(len(type_name)), type_name) max_datas = max(datas) plt.xlim(0,max_datas+1000) plt.xlabel("Data Proportion") plt.title("Different types of data volume") for x, y in enumerate(datas): plt.text(y + 1/2, x - 0.1, '%s' % y) plt.show() #獲取資料 type_counts_dict = get_type_counts() #填充業務上要求,資料中沒有的型別 type_counts_dict = fill_null_type(type_counts_dict,type_list) #結果展示 data_visualization(type_counts_dict)
#!/usr/bin/python3 import pymysql import json #獲取資料 def get_type_counts(): config = { "mysql_config": { "host": "****", "user": "***", "password": "***.***", "database": "****" } } user = config["mysql_config"]["user"] host = config["mysql_config"]["host"] password = config["mysql_config"]["password"] database = config["mysql_config"]["database"] open_Doortype_counts_dict={} # 開啟資料庫連線 db = pymysql.connect(host,user ,password ,database , charset='utf8' ) # 使用cursor()方法獲取操作遊標 cursor = db.cursor() # SQL 查詢語句 sql = "SELECT msg FROM dictionary WHERE type = 'openDoor';" try: # 執行SQL語句 cursor.execute(sql) # 獲取所有記錄列表 results = cursor.fetchall() #print(results) for row in results: line = str(row)[2:-3].strip("\\n") #print(line) open_Doortype = json.loads(line)["data"]["openDoorType"] if open_Doortype in open_Doortype_counts_dict.keys(): open_Doortype_counts_dict[open_Doortype] += 1 else: open_Doortype_counts_dict[open_Doortype] = 1 # # 列印結果 # print ("type=%s,freq=%s" % \ # (type, freq )) except: print ("Error: unable to fetch data") # 關閉資料庫連線 db.close() return open_Doortype_counts_dict #獲取資料 open_Doortype_counts_dict = get_type_counts() #print(open_Doortype_counts_dict) #{'3': 2191, '1': 1275} #填充資料 def fill_null_type(open_Doortype_counts_dict): type_list = ["0","1","2","3","4"] key_list = [ i for i in open_Doortype_counts_dict] len_key_list = len(key_list) len_type_list = len(type_list) #查出的資料型別是否和預設業務型別作對比 if len_key_list < len_type_list : null_type = list(set(type_list).difference(set(key_list))) print(null_type) for i in null_type: open_Doortype_counts_dict[i] = 0 return open_Doortype_counts_dict elif len_key_list == type_list : print("Info: Data type is equals business type!!!") return type_counts_dict else: print("Error: Data type is larger than business type!!!") return type_counts_dict # 填充空值 open_Doortype_counts_dict = fill_null_type(open_Doortype_counts_dict) #資料視覺化 def data_visualization(open_Doortype_counts_dict): import numpy as np import matplotlib.pyplot as plt fig, ax = plt.subplots(figsize=(9, 20), subplot_kw=dict(aspect="equal")) datas = [] type_name = [] open_Doortype_name_dict={'0':"Bluetooth opening",'1':"Open the door remotely",'2':"Password open",'3':"Fingerprint opening",'4':"Dynamic cipher"} type_name_cn = {} #名稱轉換 0 -> 藍芽開啟 for name in open_Doortype_counts_dict: if name in open_Doortype_name_dict.keys(): type_name_cn[open_Doortype_name_dict[name]] = open_Doortype_counts_dict[name] for x in type_name_cn: datas.append(type_name_cn[x]) type_name.append(x) def func(pct, allvals): absolute = int(pct/100.*np.sum(allvals)) return "{:.1f}%\n({:d} )".format(pct, absolute) wedges, texts, autotexts = ax.pie(datas, autopct=lambda pct: func(pct, datas), textprops=dict(color="w")) # 標籤距離 ax.legend(wedges, type_name, title="Ingredients", loc="center left", bbox_to_anchor=(1, 0, 0.5, 0.5)) #圖上的字 plt.setp(autotexts, size=20, weight="bold") # title ax.set_title("Open Door Type Proportion",size = 20) plt.show() data_visualization(open_Doortype_counts_dict)