1. 程式人生 > >正則化極限學習機程式碼

正則化極限學習機程式碼

#!usr/bin/python3
# coding:utf-8

from numpy import *
import pandas as pd
import datetime
from sklearn.model_selection import train_test_split


def main():
    # 資料格式1308條資料,16個屬性,分類結果10類
    # 匯入資料,資料有16的屬性
    # data 資料型別class 'pandas.core.frame.DataFrame'
    data = pd.read_csv("C:/Users/54376/Desktop/data/data.csv")
    # 去掉表中的序號從1到1308
    data = data[
        ['CON_STAT', 'CPU_USED', 'MEM_TOTAL', 'MEM_USED', 'ETH0_RECV_BYTES', 'ETH0_RECV_PKTS', 'ETH2_SEND_BYTES',
         'ETH2_SEND_PKTS', 'FD_PROCESS_CPU', 'FD_PROCESS_MEM', 'OTHER_PRO_CPU', 'SYS_UPTIME', 'DF_RESTART', 'SN',
         'AVG_LEN',
         'FDPKTS']]

    # 匯入標籤
    # 標籤資料型別<class 'pandas.core.series.Series'>
    label = pd.read_csv("C:/Users/54376/Desktop/data/label.csv")
    # 去掉表中的序號從1到1308
    label = label['YL']

    # 檢視資料的數量(1308, 16)
    print(data.shape)
    # 檢視標籤的數量(1308,)
    print(label.shape)

    # 對資料進行分割,train:test = 8:2比例
    x_train, x_test, y_train, y_test = train_test_split(data, label, train_size=0.5, random_state=1)
    X_train = array(x_train)
    y_train = array(pd.get_dummies(y_train))
    X_test = array(x_test)
    y_test = array(pd.get_dummies(y_test))
    test(X_train, y_train, X_test, y_test)
    return


def test(X_train, y_train, X_test, y_test):
    for i in logspace(-4, 5, 10):
        elm(X_train, y_train, X_test, y_test, 2000, i)
        print("i:", i)
    return


def elm(X_train, y_train, X_test, y_test, M, C):
    # 標籤種類
    label_kind = 10
    # 標籤種類
    m = label_kind
    # 輸入維數
    n = int(shape(X_train)[1])
    # 訓練資料個數
    N_train = int(shape(X_train)[0])
    # 測試資料個數
    N_test = int(shape(X_test)[0])
    # 隱藏層節點個數:1000
    # M = 1000
    # 正則化因子C
    # C = 1

    #  訓練的開始時間
    time_train_start = datetime.datetime.now()
    #  計算隱藏層的權值矩陣
    w = random.rand(n, M) * 2 - 1
    b_train = (random.rand(1, M) * 2 - 1) + ones((N_train, 1))
    H_temp_train = dot(X_train, w) + b_train
    del X_train
    del b_train
    H_train = 1 / (1 + exp(-H_temp_train))
    del H_temp_train
    beta = dot(dot(linalg.inv((eye(shape(H_train)[1]) / float(C)) + dot(H_train.T, H_train)), H_train.T), y_train)
    print("beta size:\n", shape(beta))
    #  預測訓練值
    y_train_predict = dot(H_train, beta)
    # print shape(y_train_predict)
    del H_train
    #  訓練的結束時間
    time_train_end = datetime.datetime.now()
    #  訓練時間
    train_time = time_train_end - time_train_start
    #  測試的開始時間
    time_test_start = datetime.datetime.now()
    #  預測測試值
    b_test = random.rand(1, M) + ones((N_test, 1))
    H_temp_test = dot(X_test, w) + b_test
    del X_test
    del b_test
    H_test = 1 / (1 + exp(-H_temp_test))
    del H_temp_test
    y_test_predict = dot(H_test, beta)
    del H_test
    #  測試的結束時間
    time_test_end = datetime.datetime.now()
    #  測試時間
    test_time = time_test_end - time_test_start

    #  對訓練和測試分類錯誤進行計數
    MissClassify_train = 0
    MissClassify_test = 0

    for i in range(len(y_train)):
        location_train = argmax(y_train[i, :])
        location_predict = argmax(y_train_predict[i, :])
        if location_train != location_predict:
            MissClassify_train += 1
    print("MissClassify_train:", MissClassify_train)
    # 訓練正確率
    Training_accuracy = 1 - float(MissClassify_train) / len(y_train)

    for i in range(len(y_test)):
        location_train = argmax(y_test[i, :])
        location_predict = argmax(y_test_predict[i, :])
        if location_train != location_predict:
            MissClassify_test += 1
    print("MissClassify_test:", MissClassify_test)
    # 測試正確率
    Testing_accuracy = 1 - float(MissClassify_test) / len(y_test)

    print("訓練正確率:", Training_accuracy)
    print("測試正確率:", Testing_accuracy)
    # print("訓練時間:", train_time)
    # print("測試時間:", test_time)

    return


if __name__ == '__main__':
    main()
程式碼為RELM的python版本