重寫慢日誌解析程序,實現打印慢SQL信息及其所屬數據庫
阿新 • • 發佈:2019-01-24
數據庫 sta timestamp port read oat lock filename 慢日誌 分組自研的審計平臺最近推出慢SQL優化的功能,topN慢SQL可以通過mysqldumpslow拿到,但由於mysqldumpslow輸出的信息不包含數據庫,這讓程序後續的自動優化變得有些棘手。在觀察了MySQL慢日誌結構後,決定自己寫一個python解析程序,返回的結果比mysqldumpslow解析結果上多出數據庫名稱這一列:
Count: 15 Time=0.002961s (0.034505s) Lock=8.8e-05s (0.000767s) Rows=446 (6690), dbmgr[dbmgr]@10.33.46.179 mysql --SQL所屬數據庫 SHOW GLOBAL VARIABLES;
python版本:2.7
文件名稱:slowParse.py --目前僅支持按query time取topN
代碼內容:
import os import sys def get_sql(slowlog, topN): #Slow log 所在目錄,請自行替換 f1 = open("/var/mysql/data3306/" + slowlog, "r") flag1 = 1 flag2 = 0 sqltext = "" slow_sql_all = {} sql_info = [] queryTime_list = [] locksTime_list = [] rows_list = [] logonInfo_list = [] db = "None" rownum = 0 while 1: line = f1.readline() rownum += 1 if not line: break elif "use " in line and len(line) < 30: db = getDB(line.strip()) elif "# User@Host" in line and flag1 == 1: flag2 = 1 userAndHost = getUserAndHost(line.strip()) logonInfo = userAndHost[0] + "[" + userAndHost[0] + "]@" + userAndHost[1] logonInfo_list.append(logonInfo) elif "# Query_time" in line and flag1 == 1 and flag2 == 1: execInfo = getExecInfo(line.strip()) queryTime_list.append(execInfo[0]) locksTime_list.append(execInfo[1]) rows_list.append(execInfo[2]) elif ";" in line and "SET timestamp" not in line and flag1 == 1 and flag2 == 1: flag2 = 0 sqltext = sqltext + line.strip() if slow_sql_all.has_key(sqltext): tmp = slow_sql_all[sqltext] tmp[0].append(queryTime_list[0]) tmp[1].append(locksTime_list[0]) tmp[2].append(rows_list[0]) tmp[3].append(logonInfo_list[0]) else: # count = 1 # sql_info.append(count) sql_info.append(queryTime_list) sql_info.append(locksTime_list) sql_info.append(rows_list) sql_info.append(logonInfo_list) sql_info.append(db) slow_sql_all[sqltext] = sql_info queryTime_list = [] locksTime_list = [] rows_list = [] logonInfo_list = [] sqltext = "" sql_info = [] elif flag1 == 1 and flag2 == 1 and "# User@Host" not in line and "# Query_time" not in line and "# Time" not in line and "SET timestamp" not in line: sqltext = sqltext + line.strip() + " " f1.close() sqlCombined = {} sqlTmp = {} for i in slow_sql_all: # print i,slow_sql_all[i] count = len(slow_sql_all[i][0]) totalQueryTime = 0 totalLocksTime = 0 totalRows = 0 for j in slow_sql_all[i][0]: totalQueryTime += float(j) maxQueryTime = float(max(slow_sql_all[i][0])) for k in slow_sql_all[i][1]: totalLocksTime += float(k) maxLocksTime = float(max(slow_sql_all[i][1])) for l in slow_sql_all[i][2]: totalRows += int(l) maxRows = int(max(slow_sql_all[i][2])) logonInfo = slow_sql_all[i][3][0] db = slow_sql_all[i][4] sqlCombined[i] = ( count, maxQueryTime, totalQueryTime, maxLocksTime, totalLocksTime, maxRows, totalRows, logonInfo, db) sqlTmp[i] = maxQueryTime sqlTopN = sorted(sqlTmp.items(), key=lambda x: x[1], reverse=True)[:topN] #請自行替換生成文件的所在目錄 f2 = open("/var/mysql/data3306/" + slowlog[:-4] + "-top" + str(topN) + ".txt", "w") for i in sqlTopN: sqltext = i[0] count_str = "Count: " + str(sqlCombined[sqltext][0]) queryTime_str = "Time=" + str(sqlCombined[sqltext][1]) + "s (" + str(sqlCombined[sqltext][2]) + "s)" locksTime_str = "Lock=" + str(sqlCombined[sqltext][3]) + "s (" + str(sqlCombined[sqltext][4]) + "s)" rows_str = "Rows=" + str(sqlCombined[sqltext][5]) + " (" + str(sqlCombined[sqltext][6]) + ")," logonInfo_str = sqlCombined[sqltext][7] db_str = sqlCombined[sqltext][8] f2.write( count_str + " " + queryTime_str + " " + locksTime_str + " " + rows_str + " " + logonInfo_str + " " + db_str + "\n " + sqltext + "\n") f2.close() def getDB(line): info = line.split(" ") db = info[1][:-1] return db def getUserAndHost(line): info_list = line.split(" ") User = info_list[2].split("[")[0] idx = info_list.index("@") hostInfo = info_list[idx + 2] if hostInfo == "[]": Host = "localhost" else: Host = hostInfo[1:-1] return User, Host def getExecInfo(line): info_list = line.split(" ") Query_time = info_list[0].split(" ")[2] Lock_time = info_list[1].split(" ")[1] Rows_sent = info_list[1].split(" ")[3] return Query_time, Lock_time, Rows_sent if __name__ == ‘__main__‘: filename = str(sys.argv[1]) topN = int(sys.argv[2]) get_sql(filename, topN)
使用:
python slowParse.py slow.log 5 --取top 5
重寫慢日誌解析程序,實現打印慢SQL信息及其所屬數據庫