一個電子圍欄需求的腳本記錄
需求:系統對接了廠家的GPS數據,基於這些GPS數據,過濾出指定區域的數據
從網上找到了一個電子圍欄的python腳本,現在需要的是循環取數據判斷是否在指定區域,在指定區域就把這部分數據拿出來放到另外一個庫表
碰到的其中一個問題是腳本的效率問題,以5W條數據來測試
腳本1:使用cur.fetchone(),逐條讀取數據,逐條判斷,逐條插入列表,批量入庫,批量commit
#!/usr/bin/env python # -*- coding: utf-8 -*- import json import math import MySQLdb import time ISOTIMEFORMAT=‘%Y-%m-%d %X‘ printView Code‘start‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) lnglatlist = [] data = ‘[{"name":"廣本黃埔工廠","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]‘ data = json.loads(data) if ‘points‘ in data[0]: for point in data[0][‘points‘]: #print(str(point[‘lng‘])+" "+str(point[‘lat‘])) lnglat = [] lnglat.append(float(str(point[‘lng‘]))) lnglat.append(float(str(point[‘lat‘]))) lnglatlist.append(lnglat) def windingNumber(point, poly): poly.append(poly[0]) px = point[0] py = point[1] sum = 0 length = len(poly)-1 for indexin range(0,length): sx = poly[index][0] sy = poly[index][1] tx = poly[index+1][0] ty = poly[index+1][1] #點與多邊形頂點重合或在多邊形的邊上 if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)): return "on" #點與相鄰頂點連線的夾角 angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px) #確保夾角不超出取值範圍(-π 到 π) if(angle >= math.pi): angle = angle - math.pi * 2 elif(angle <= -math.pi): angle = angle + math.pi * 2 sum += angle #計算回轉數並判斷點和多邊形的幾何關系 result = ‘out‘ if int(sum / math.pi) == 0 else ‘in‘ return result ################循環取GPS數據########################## conn=MySQLdb.connect(user=‘root‘,passwd=‘XXX‘,host=‘XXX‘,charset="utf8") #連接到mysql cur=conn.cursor() conn.select_db(‘XXX‘) cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>=‘2019-04-10 18:00:00‘ and sample_time<‘2019-04-10 20:00:00‘ limit 50000") ####################第一種方式######################## count=1 scope_gps_list=[] while count<=50000: #這種方式,這個數量不能比SQL的數據量小,不然會報錯 gps_data_per=cur.fetchone() # print gps_data_per point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 point=map(float,point) #字符串類型轉換成浮點型 # print point # print count if count in(10000,20000,30000,40000,50000): print count,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) # print windingNumber(point,lnglatlist) if windingNumber(point,lnglatlist)==‘in‘: scope_gps_list.append(gps_data_per) #生成[(1,2,3),(1,2,3)] count=count+1 sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" cur.executemany(sqlin,scope_gps_list) conn.commit() ####################第一種方式######################## ####################第二種方式######################## # gps_data_all=cur.fetchall() # count=0 # for gps_data_per in gps_data_all: # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 # point=map(float,point) #字符串類型轉換成浮點型 # if windingNumber(point,lnglatlist)==‘in‘: # cur.execute(sqlin,gps_data_per) # count=count+1 # print count # conn.commit() ####################第二種方式######################## cur.close() #關閉遊標 conn.close() #關閉數據鏈接 ################循環取GPS數據########################## print ‘end‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime())
為什麽要逐條插入列表?
因為使用cur.fetchone()讀取數據後馬上去insert into數據,再次cur.fenchone數據,取不到數據
執行效率:30分6秒跑完
start : 2019-04-19 20:30:00 |
總結:最後的列表插入數據庫很快,不到1秒
腳本2:使用cur.fetchall()先存全部數據,再使用for循環,逐條讀取,逐條判斷,逐條入庫,批量commit
對比腳本1,主要是逐條入庫,避免插入到列表,確定下是否插入到列表,列表越來越大導致的慢
#!/usr/bin/env python # -*- coding: utf-8 -*- import json import math import MySQLdb import time ISOTIMEFORMAT=‘%Y-%m-%d %X‘ print ‘start‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) lnglatlist = [] data = ‘[{"name":"工廠","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]‘ data = json.loads(data) if ‘points‘ in data[0]: for point in data[0][‘points‘]: #print(str(point[‘lng‘])+" "+str(point[‘lat‘])) lnglat = [] lnglat.append(float(str(point[‘lng‘]))) lnglat.append(float(str(point[‘lat‘]))) lnglatlist.append(lnglat) def windingNumber(point, poly): poly.append(poly[0]) px = point[0] py = point[1] sum = 0 length = len(poly)-1 for index in range(0,length): sx = poly[index][0] sy = poly[index][1] tx = poly[index+1][0] ty = poly[index+1][1] #點與多邊形頂點重合或在多邊形的邊上 if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)): return "on" #點與相鄰頂點連線的夾角 angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px) #確保夾角不超出取值範圍(-π 到 π) if(angle >= math.pi): angle = angle - math.pi * 2 elif(angle <= -math.pi): angle = angle + math.pi * 2 sum += angle #計算回轉數並判斷點和多邊形的幾何關系 result = ‘out‘ if int(sum / math.pi) == 0 else ‘in‘ return result ################循環取GPS數據########################## conn=MySQLdb.connect(user=‘root‘,passwd=‘XXX‘,host=‘XXX‘,charset="utf8") #連接到mysql cur=conn.cursor() conn.select_db(‘XXX‘) cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>=‘2019-04-10 18:00:00‘ and sample_time<‘2019-04-10 20:00:00‘ limit 50000") ####################第一種方式######################## # count=1 # scope_gps_list=[] # while count<=50000: #這種方式,這個數量不能比SQL的數據量小,不然會報錯 # gps_data_per=cur.fetchone() # point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 # point=map(float,point) #字符串類型轉換成浮點型 # if count in(10000,20000,30000,40000,50000): # print count,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) # if windingNumber(point,lnglatlist)==‘in‘: # scope_gps_list.append(gps_data_per) #生成[(1,2,3),(1,2,3)] # count=count+1 # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # cur.executemany(sqlin,scope_gps_list) # conn.commit() ####################第一種方式######################## ####################第二種方式######################## gps_data_all=cur.fetchall() count=0 for gps_data_per in gps_data_all: sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 point=map(float,point) #字符串類型轉換成浮點型 if windingNumber(point,lnglatlist)==‘in‘: cur.execute(sqlin,gps_data_per) count=count+1 if count in(10000,20000,30000,40000,50000): print count,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) conn.commit() ####################第二種方式######################## cur.close() #關閉遊標 conn.close() #關閉數據鏈接 ################循環取GPS數據########################## print ‘end‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime())View Code
執行效率:29分鐘22秒
start : 2019-04-19 21:05:09 |
總結,看來不是插入到列表,導致的速度慢
腳本3:使用cur.fetchall()先存全部數據,再使用for循環,逐條讀取,逐條判斷,逐條入庫,逐條commit
對比腳本2,逐條入庫,逐條commit,只是做個簡單的對比
#!/usr/bin/env python # -*- coding: utf-8 -*- import json import math import MySQLdb import time ISOTIMEFORMAT=‘%Y-%m-%d %X‘ print ‘start‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) lnglatlist = [] data = ‘[{"name":"工廠","points":[{"lng":113.642124,"lat":23.167372},{"lng":113.636176,"lat":23.175162},{"lng":113.644930,"lat":23.179870},{"lng":113.652108,"lat":23.173823}],"type":0}]‘ data = json.loads(data) if ‘points‘ in data[0]: for point in data[0][‘points‘]: #print(str(point[‘lng‘])+" "+str(point[‘lat‘])) lnglat = [] lnglat.append(float(str(point[‘lng‘]))) lnglat.append(float(str(point[‘lat‘]))) lnglatlist.append(lnglat) def windingNumber(point, poly): poly.append(poly[0]) px = point[0] py = point[1] sum = 0 length = len(poly)-1 for index in range(0,length): sx = poly[index][0] sy = poly[index][1] tx = poly[index+1][0] ty = poly[index+1][1] #點與多邊形頂點重合或在多邊形的邊上 if((sx - px) * (px - tx) >= 0 and (sy - py) * (py - ty) >= 0 and (px - sx) * (ty - sy) == (py - sy) * (tx - sx)): return "on" #點與相鄰頂點連線的夾角 angle = math.atan2(sy - py, sx - px) - math.atan2(ty - py, tx - px) #確保夾角不超出取值範圍(-π 到 π) if(angle >= math.pi): angle = angle - math.pi * 2 elif(angle <= -math.pi): angle = angle + math.pi * 2 sum += angle #計算回轉數並判斷點和多邊形的幾何關系 result = ‘out‘ if int(sum / math.pi) == 0 else ‘in‘ return result ################循環取GPS數據########################## conn=MySQLdb.connect(user=‘root‘,passwd=‘XXX‘,host=‘XXX‘,charset="utf8") #連接到mysql cur=conn.cursor() conn.select_db(‘XXX‘) cur.execute("select id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no from gps_msg where sample_time>=‘2019-04-10 18:00:00‘ and sample_time<‘2019-04-10 20:00:00‘ limit 50000") ####################第一種方式######################## # count=1 # scope_gps_list=[] # while count<=50000: #這種方式,這個數量不能比SQL的數據量小,不然會報錯 # gps_data_per=cur.fetchone() # point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 # point=map(float,point) #字符串類型轉換成浮點型 # if count in(10000,20000,30000,40000,50000): # print count,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) # if windingNumber(point,lnglatlist)==‘in‘: # scope_gps_list.append(gps_data_per) #生成[(1,2,3),(1,2,3)] # count=count+1 # sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # cur.executemany(sqlin,scope_gps_list) # conn.commit() ####################第一種方式######################## ####################第二種方式######################## gps_data_all=cur.fetchall() count=0 for gps_data_per in gps_data_all: sqlin="insert into gps_msg_20190411(id,sim,alarm,status,latitude,longitude,asl,speed,direction,sample_time,attch_msg,create_time,car_no) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" point=gps_data_per[5].split(",")+gps_data_per[4].split(",") #取出每條數據的經緯度,split()轉換成列表 point=map(float,point) #字符串類型轉換成浮點型 if windingNumber(point,lnglatlist)==‘in‘: cur.execute(sqlin,gps_data_per) conn.commit() count=count+1 if count in(10000,20000,30000,40000,50000): print count,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime()) ####################第二種方式######################## cur.close() #關閉遊標 conn.close() #關閉數據鏈接 ################循環取GPS數據########################## print ‘end‘,‘:‘,time.strftime(ISOTIMEFORMAT, time.localtime())View Code
執行效率:
start : 2019-04-19 21:45:11 |
總結:逐條commit會降低效率
參考:https://www.cnblogs.com/dong1/p/10220116.html
一個電子圍欄需求的腳本記錄