西瓜書 課後習題3.4 十折交叉和留一法,對率迴歸
阿新 • • 發佈:2018-11-27
import csv import numpy as np def readData(filename): """ :param filename:cvs資料檔案 :return: X1,y1,X2,y2,X3,y3 X: list with shape[50,4],特徵 //更新:此處應該為[x;1],為shape[50,5],後面對應修改 y: list with shape[50,],標籤 """ X1, X2, X3 = [], [], [] y1, y2, y3 = [], [], [] # 讀資料 with open(filename, 'r') as f: reader = csv.reader(f) for line in reader: if line[4] == 'Iris-setosa': X1.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0]) y1.append(line[4]) elif line[4] == 'Iris-versicolor': X2.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0]) y2.append(line[4]) else: X3.append([float(line[0]), float(line[1]), float(line[2]), float(line[3]), 1.0]) y3.append(line[4]) return X1, X2, X3, y1, y2, y3 def tenfolddata(X1, X2): """ 產生十折訓練資料,每折5個正例,5個反例 :param X1: list with shape[50,4], positive shape[50,5] :param X2: list with shape[50,4], negative shape[50,5] :return: folds: list with shape[10,10,4] shape[10,10,5] y; list with shape[10,10] """ folds = [] y = [] for i in range(10): fold = [] fold += X1[i * 5: (i + 1) * 5] fold += X2[i * 5: (i + 1) * 5] folds.append(fold) y.append([1] * 5 + [0] * 5) return folds, y def LR(X, y): """ 訓練邏輯迴歸模型,梯度遞降法 :param X: np.array with shape[N,d], Input 包括111... :param y: np.array with shape[N,1], label :return: beta with shape[1,d],包括b Optimal params with gradDescent method """ N, d = X.shape lr = 0.01 ############ 對結果影響很大 beta = np.ones((1, d)) * 0.1 z = X.dot(beta.T) # [N,1] for i in range(150): p1 = np.exp(z) / (1 + np.exp(z)) # shape[N,1] first_order = -np.sum(X * (y - p1), 0, keepdims=True) # shape[1,d] # update beta -= first_order * lr z = X.dot(beta.T) l = np.sum(-y * z + np.log(1 + np.exp(z))) return beta def testing(beta, X, y): """ 基於邏輯迴歸進行分類任務測試 :param beta: np.array with shape[1,d], 邏輯迴歸引數 :param X: np.array wiht shape[N,d], testing instances :param y: np.array with shape[N,1], testing labels :return: error_num, LR演算法分類錯誤個數 """ predicts = (X.dot(beta.T) >= 0) # shape[N,1] error_num = np.sum(predicts != y) return error_num def tenFoldCrossValidation(folds, y): """ 十折交叉驗證 :param folds: list with shape[10,10,5] :param y: list with shape[10,10] :return:ten_fold_error_nums """ ten_fold_error_nums = 0 for i in range(10): train_X = folds[:i] + folds[i + 1:] train_y = y[:i] + y[i + 1:] val_X = folds[i] val_y = y[i] train_X = np.array(train_X).reshape(-1, 5) # -1指的是在不知道有多少行的情況下直接進行劃分,最終為shape[n,4] train_y = np.array(train_y).reshape([-1, 1]) val_X = np.array(val_X).reshape(-1, 5) val_y = np.array(val_y).reshape([-1, 1]) beta = LR(train_X, train_y) error_num = testing(beta, val_X, val_y) ten_fold_error_nums += error_num return ten_fold_error_nums def Loo(X, y): """ 留一法進行預測 :param X: list with shape[100,4] :param y: list with shape[100] :return: Loo_error_nums """ loo_error_nums = 0 for i in range(100): train_X = X[:i] + X[i + 1:] train_y = y[:i] + y[i + 1:] val_X = X[i] val_y = y[i] train_X = np.array(train_X).reshape(-1, 5) train_y = np.array(train_y).reshape(-1, 1) val_X = np.array(val_X).reshape(-1, 5) val_y = np.array(val_y).reshape(-1, 1) beta = LR(train_X, train_y) error_num = testing(beta, val_X, val_y) loo_error_nums += error_num return loo_error_nums if __name__ == '__main__': dataset = 'C:\\Users\\14399\\Desktop\\iris.csv' X1, X2, X3, y1, y2, y3 = readData(dataset) # 十折交叉驗證 # X1 and X2 folds, y = tenfolddata(X1, X2) # print(folds) round1_ten_fold_error_nums = tenFoldCrossValidation(folds, y) print(round1_ten_fold_error_nums) # X1 and X3 folds, y = tenfolddata(X1, X3) round2_ten_fold_error_nums = tenFoldCrossValidation(folds, y) print(round2_ten_fold_error_nums) # X2 and X3 folds, y = tenfolddata(X2, X3) round3_ten_fold_error_nums = tenFoldCrossValidation(folds, y) print(round3_ten_fold_error_nums) # 留一法 # X1 and X2 X = X1 + X2 y = [1] * len(X1) + [0] * len(X2) round1_Loo_error_nums = Loo(X, y) print(round1_Loo_error_nums) # X1 and X3 X = X1 + X3 y = [1] * len(X1) + [0] * len(X3) round2_Loo_error_nums = Loo(X, y) print(round2_Loo_error_nums) # X2 and X3 X = X2 + X3 y = [1] * len(X2) + [0] * len(X3) round3_Loo_error_nums = Loo(X, y) print(round3_Loo_error_nums)
結果:十折交叉: 0 0 15 ////對X進行拓展後的結果更好了,分別為: 0 0 3 和 0 0 4
留一法: 0 0 11
資料集:UCI iris資料集
連結:https://pan.baidu.com/s/1CWMvPZdsYsKYncJsl0P5bQ 提取碼:lx4r