keras實現網路流量分類功能的CNN
阿新 • • 發佈:2018-12-10
-
資料集選用KDD99
資料下載地址:http://kdd.ics.uci.edu/databases/kddcup99/kddcup99.html
需求:https://blog.csdn.net/com_stu_zhang/article/details/6987632 -
執行環境
win10+keras
安裝步驟:https://blog.csdn.net/u010916338/article/details/83822562 -
資料預處理
包含數值替換文字、數值歸一化、標籤獨熱編碼
# -*- coding: utf-8 -*- """ Created on Tue Nov 6 09:24:20 2018 @author: hrh """ import pandas as pd from sklearn.preprocessing import OneHotEncoder from pandas.core.frame import DataFrame def get_total_data(): data = pd.read_csv('data_test.csv',header=None) data[1]=data[1].map({'tcp':0, 'udp':1, 'icmp':2}) data[2]=data[2].map({'aol':0, 'auth':1, 'bgp':2, 'courier':3, 'csnet_ns':4,'ctf':5, 'daytime':6, 'discard':7, 'domain':8, 'domain_u':9,'echo':10, 'eco_i':11, 'ecr_i':12, 'efs':13, 'exec':14,'finger':15, 'ftp':16, 'ftp_data':17, 'gopher':18, 'harvest':19,'hostnames':20, 'http':21, 'http_2784':22, 'http_443':23, 'http_8001':24,'imap4':25, 'IRC':26, 'iso_tsap':27, 'klogin':28, 'kshell':29,'ldap':30, 'link':31, 'login':32, 'mtp':33, 'name':34,'netbios_dgm':35, 'netbios_ns':36, 'netbios_ssn':37, 'netstat':38, 'nnsp':39,'nntp':40, 'ntp_u':41, 'other':42, 'pm_dump':43, 'pop_2':44,'pop_3':45, 'printer':46, 'private':47, 'red_i':48, 'remote_job':49,'rje':50, 'shell':51, 'smtp':52, 'sql_net':53, 'ssh':54,'sunrpc':55, 'supdup':56, 'systat':57, 'telnet':58, 'tftp_u':59,'tim_i':60, 'time':61, 'urh_i':62, 'urp_i':63, 'uucp':64,'uucp_path':65, 'vmnet':66, 'whois':67, 'X11':68, 'Z39_50':69}) data[3]=data[3].map({'OTH':0, 'REJ':0, 'RSTO':0,'RSTOS0':0, 'RSTR':0, 'S0':0,'S1':0, 'S2':0, 'S3':0,'SF':1, 'SH':0}) data[41]=data[41].map({'normal.':0, 'ipsweep.':1, 'mscan.':2, 'nmap.':3, 'portsweep.':4, 'saint.':5, 'satan.':6, 'apache2.':7,'back.':8, 'land.':9, 'mailbomb.':10, 'neptune.':11, 'pod.':12,'processtable.':13, 'smurf.':14, 'teardrop.':15, 'udpstorm.':16, 'buffer_overflow.':17, 'httptunnel.':18, 'loadmodule.':19, 'perl.':20, 'ps.':21,'rootkit.':22, 'sqlattack.':23, 'xterm.':24, 'ftp_write.':25,'guess_passwd.':26, 'imap.':27, 'multihop.':28, 'named.':29, 'phf.':30,'sendmail.':31, 'snmpgetattack.':32, 'snmpguess.':33, 'spy.':34, 'warezclient.':35,'warezmaster.':36, 'worm.':37, 'xlock.':38, 'xsnoop.':39}) data[2] = (data[2]-data[2].min())/(data[2].max() - data[2].min()) data[4] = (data[4]-data[4].min())/(data[4].max() - data[4].min()) data[5] = (data[5]-data[5].min())/(data[5].max() - data[5].min()) data[22] = (data[22]-data[22].min())/(data[22].max() - data[22].min()) data[23] = (data[23]-data[23].min())/(data[23].max() - data[23].min()) data[31] = (data[31]-data[31].min())/(data[31].max() - data[31].min()) data[32] = (data[32]-data[32].min())/(data[32].max() - data[32].min()) return data def get_target_data(): data = get_total_data() enc = OneHotEncoder(sparse = False) enc.fit([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39]]) result = enc.transform(data[[41]]) return DataFrame(result) def get_input_data(): data = get_total_data() del data[41] return data if __name__ == '__main__': data_input = get_input_data() # data = get_total_data() data_input.to_csv('data_test_input.csv',header=None,index=None) data_target = get_target_data() data_target.to_csv('data_test_target.csv',index=None,header=None)
- 程式碼
import time start = time.time() import keras from keras.models import Sequential #序貫模型 from keras.layers import Dense #全連線層 from keras.layers import Dropout #隨機失活層 from keras.layers import Flatten #展平層,從卷積層到全連線層必須展平 from keras.layers import Conv1D #二維卷積層,多用於影象 from keras.layers import MaxPooling1D #最大值池化 import pandas as pd from keras import backend as k batch_size = 128 #一批訓練樣本128張圖片 num_classes = 40 #有10個類別 epochs = 12 #一共迭代12輪 x_train = pd.read_csv('data_input.csv',header=None).values y_train = pd.read_csv('data_target.csv',header=None).values x_test = pd.read_csv('data_test_input.csv',header=None).values y_test = pd.read_csv('data_test_target.csv',header=None).values if k.image_data_format() == 'channels_first': x_train = x_train.reshape(x_train.shape[0], 1, 41) x_test = x_test.reshape(x_test.shape[0], 1, 41) input_shape = (1, 41) else: x_train = x_train.reshape(x_train.shape[0], 41, 1) x_test = x_test.reshape(x_test.shape[0], 41, 1) input_shape = (41, 1) model = Sequential() #序貫模型,一個架子 model.add(Conv1D(32, 3, activation='relu',input_shape=input_shape)) #卷積層, 32個神經元, 卷積核3x3 model.add(Conv1D(64, 3, activation='relu')) #卷積層, 64個神經元, 卷積核3x3 model.add(MaxPooling1D(pool_size=(2))) #池化層 model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) #全連線層, 128神經元 model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) #編譯,損失函式, 優化函式, 評價標註是準確率 model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) #執行 , verbose步長 model.fit(x_train, y_train, batch_size= batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) stop = time.time() print(str(stop-start) + "秒")
- 執行結果
CNN模型訓練準確率及誤差: Train on 494021 samples, validate on 311029 samples Epoch 1/12 494021/494021 [==============================] - 35s 71us/step - loss: 0.0380 - acc: 0.9932 - val_loss: nan - val_acc: 0.9161 Epoch 2/12 494021/494021 [==============================] - 34s 70us/step - loss: 0.0192 - acc: 0.9971 - val_loss: nan - val_acc: 0.9162 Epoch 3/12 494021/494021 [==============================] - 35s 70us/step - loss: 0.0178 - acc: 0.9975 - val_loss: nan - val_acc: 0.9163 Epoch 4/12 494021/494021 [==============================] - 34s 69us/step - loss: 0.0178 - acc: 0.9976 - val_loss: nan - val_acc: 0.9165 Epoch 5/12 494021/494021 [==============================] - 34s 70us/step - loss: 0.0160 - acc: 0.9978 - val_loss: nan - val_acc: 0.9165 Epoch 6/12 494021/494021 [==============================] - 34s 70us/step - loss: 0.0159 - acc: 0.9978 - val_loss: nan - val_acc: 0.9165 Epoch 7/12 494021/494021 [==============================] - 35s 71us/step - loss: 0.0160 - acc: 0.9979 - val_loss: nan - val_acc: 0.9185 Epoch 8/12 494021/494021 [==============================] - 34s 69us/step - loss: 0.0155 - acc: 0.9979 - val_loss: nan - val_acc: 0.9163 Epoch 9/12 494021/494021 [==============================] - 34s 70us/step - loss: 0.0156 - acc: 0.9980 - val_loss: nan - val_acc: 0.9172 Epoch 10/12 494021/494021 [==============================] - 34s 69us/step - loss: 0.0147 - acc: 0.9981 - val_loss: nan - val_acc: 0.9181 Epoch 11/12 494021/494021 [==============================] - 34s 69us/step - loss: 0.0146 - acc: 0.9980 - val_loss: nan - val_acc: 0.9164 Epoch 12/12 494021/494021 [==============================] - 34s 69us/step - loss: 0.0148 - acc: 0.9981 - val_loss: nan - val_acc: 0.9163 Test loss: nan Test accuracy: 0.916342206033768 427.40167260169983秒