周志華西瓜書習題5.5

阿新 • • 發佈：2020-08-07

前言

這是第一次自己嘗試著把書上的程式碼去編寫成程式，但遺憾的是，沒有達到預想的結果

但是，我調了半天，終於調出來了，hahahahahahahaha

主要是犯了兩個重大錯誤：

1，只調eta2,卻忘記了調eta1，eta1要比eta2重要的多，

2，自己粗心，測試時居然用資料集X_train進行測試，而用y_test和測試集進行對比，我TM。。。。。。。。。。

第一次

第一次使用3.0a資料集，隱層有三個神經元，程式碼如下

'''
主要按照圖5.8的虛擬碼進行
使用資料集3.0a,兩個輸入一個輸出，對於資料集3.0，不知道怎麼把類別轉化為數字
使用的神經網路模型如下
         +++++++++++++++++++++++++
         +          O            +     輸出層  有一個閾值 theta
         +        / | \          +
         +       /  |  \         +     隱層到輸出層的三個權值 w
         +      /   |   \        +
         +     O    O    O       +     隱層    有三個閾值 gamma
         +                       +
         +  中間太難畫了，直接省略了  +     輸入層到隱層有三個權值 v
         +                       +
         +      O       O        +     輸入層
         +++++++++++++++++++++++++
 
'''

import numpy as np
import matplotlib.pyplot as plt
import self_def

#讀取資料集
data = np.loadtxt('watermelon_3a.csv',delimiter=',')
X = data[:,1:3]
y = data[:,3]

#劃分資料集
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.4, random_state=0)
m,n  
= np.shape(X_train)

#引數初始化
theta = np.random.rand(1) #np.random.rand()可產生0到1內的隨機數
w = np.random.rand(3)
gamma = np.random.rand(3)
v = np.random.rand(3,2)
eta1 = 0.1
eta2 = 0.2

#引數的歷史資料，用於檢視迭代情況
theta_history = np.zeros(m)
w_history = np.zeros((m,3))
gamma_history = np.zeros((m,3))
v_history = np.zeros((3*m,2))

 
#訓練
for k in range(m):

    #計算三個隱層神經元的輸出
    b = np.zeros(3)
    for h in range(3):
        b[h] = self_def.neuron_out1(X_train[k],v[h],gamma[h])

    #輸出層神經元的輸出估計,式5.3
    y_esti = self_def.neuron_out1(b,w,theta)

    #計算g,式5.10
    g = y_esti*(1-y_esti)*(y_train[k]-y_esti)

    #計算e,式5.15
    e = np.zeros(3)
    for h in range(3):
        e[h] = b[h]*(1-b[h])*(w[h]*g)  # j = 1,5.15的求和式不必再計算

    #計算5.11-5.14
    delta_w = np.zeros(3)
    for h in range(3):
        delta_w[h] = eta1*g*b[h]

    delta_theta = -1*eta1*g #檢查一下

    delta_v = np.zeros((3,2))
    for h in range(3):
        for i in range(2):
            delta_v[h,i] = eta2*e[h]*X_train[k,i]

    delta_gamma = -1*eta2*e

    #引數更新
    theta += delta_theta
    w += delta_w
    gamma += delta_gamma
    v += delta_v

    #記錄歷史資料
    theta_history[k] = theta
    w_history[k] = w
    gamma_history[k] = gamma
    v_history[3*k:3*k+3] = v

#訓練
mm = np.shape(X_test)[0]
y_pred = np.zeros((mm,1))
for k in range(mm):

    #計算三個隱層神經元的輸出
    for h in range(3):
        b[h] = self_def.neuron_out1(X_train[k],v[h],gamma[h])

    #輸出層神經元的輸出估計,式5.3
    y_esti = self_def.neuron_out1(b,w,theta)
    if y_esti >= 0.5:
        y_pred[k] = 1

#計算混淆矩陣
cfmat = np.zeros((2, 2))
for i in range(mm):

    if y_pred[i] == y_test[i] == 0:
        cfmat[0, 0] += 1

    elif y_pred[i] == y_test[i] == 1:
        cfmat[1, 1] += 1

    elif y_pred[i] == 0:
        cfmat[1, 0] += 1

    elif y_pred[i] == 1:
        cfmat[0, 1] += 1

print(cfmat)

#檢視迭代情況
t = np.arange(m)
p1 = plt.subplot(411)
p1.plot(t,theta_history)

p2 = plt.subplot(412)
w0 = np.ravel(w_history[:,0])
p2.plot(t,w0)
w1 = np.ravel(w_history[:,1])
p2.plot(t,w1)
w2 = np.ravel(w_history[:,2])
p2.plot(t,w2)

p3 = plt.subplot(413)
gamma0 = np.ravel(gamma_history[:,0])
p3.plot(t,gamma0)
gamma1 = np.ravel(gamma_history[:,1])
p3.plot(t,gamma1)
gamma2 = np.ravel(gamma_history[:,2])
p3.plot(t,gamma2)

plt.show()

print('end')

但是結果不怎麼樣，我感覺是因為資料量太小，啥也訓不出來，於是換資料集

第二次

資料集選用的是UCI資料集iris的一部分，共有100個，正例反例各50個

程式碼如下

'''
主要按照圖5.8的虛擬碼進行
使用資料集UCI中的iris的一個子資料集,四輸入一個輸出
使用的神經網路模型如下
         +++++++++++++++++++++++++
         +          O            +     輸出層  有一個閾值 theta_j     1
         +       // | \ \        +
         +     / /  |  \ \       +     隱層到輸出層的五個權值 w_hj     5*1
         +   /  /   |   \  \     +
         + O   O    O   O   O    +     隱層    有五個閾值 gamma_h     5
         +                       +
         +  中間太難畫了，直接省略了 +     輸入層到隱層有5*4個權值 v_hj    5*4
         +                       +
         +    O   O    O   O     +     輸入層
         +++++++++++++++++++++++++
'''

#以下是自己改程序序的過程
#2、把三個神經元改為五個神經元
#3、調節eta2的值
#4、引數不再隨機化賦值，改為賦指定值
#5、初始引數賦指定值，調節eta2
#6、將訓練過程和測試過程簡化為函式,見main_3
import numpy as np
import matplotlib.pyplot as plt
import self_def

#讀取資料集
data = np.loadtxt('iris_2.csv',delimiter=',')
X = data[:,0:4]
y = data[:,4]

#劃分資料集
from sklearn import model_selection
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.25, random_state=0)
m,n = np.shape(X_train)

#引數初始化
theta = np.random.rand(1) #np.random.rand()可產生0到1內的隨機數
w = np.random.rand(5)
gamma = np.random.rand(5)
v = np.random.rand(5,4)

#引數賦指定值
theta = 0.5
w = np.arange(0,1,0.2)
gamma = np.arange(0,1,0.2)
for ii in range(5):
    v[ii] = np.arange(0,1,0.25)

eta1 = 0.4
eta2 = 0.5

delta_w = np.zeros(5)
delta_theta = 0
delta_v = np.zeros((5,4))
delta_gamma = 0

#引數的歷史資料，用於檢視迭代情況
theta_history = np.zeros(m)
w_history = np.zeros((m,5))
gamma_history = np.zeros((m,5))
v_history = np.zeros((5*m,4))

#訓練
for k in range(m):

    #計算三個隱層神經元的輸出
    b = np.zeros(5)
    for h in range(5):
        b[h] = self_def.neuron_out1(X_train[k],v[h],gamma[h])

    #輸出層神經元的輸出估計,式5.3
    y_esti = self_def.neuron_out1(b,w,theta)

    #計算g,式5.10
    g = y_esti*(1-y_esti)*(y_train[k]-y_esti)

    #計算e,式5.15
    e = np.zeros(5)
    for h in range(5):
        e[h] = b[h]*(1-b[h])*(w[h]*g)  # j = 1,5.15的求和式不必再計算

    #計算5.11-5.14
    for h in range(5):
        delta_w[h] = eta1*g*b[h]

    delta_theta = -1*eta1*g #檢查一下

    for h in range(5):
        for i in range(4):
            delta_v[h,i] = eta2*e[h]*X_train[k,i]

    delta_gamma = -1*eta2*e

    #引數更新
    theta += delta_theta
    w += delta_w
    gamma += delta_gamma
    v += delta_v

    #記錄歷史資料
    theta_history[k] = theta
    w_history[k] = w
    gamma_history[k] = gamma
    v_history[5*k:5*k+5] = v


#預測
mm = np.shape(X_test)[0]
y_pred = np.zeros((mm,1))
for k in range(mm):

    #計算三個隱層神經元的輸出
    for h in range(5):
        b[h] = self_def.neuron_out1(X_test[k],v[h],gamma[h])

    #輸出層神經元的輸出估計,式5.3
    y_esti = self_def.neuron_out1(b,w,theta)
    #y_pred[k] = y_test[k]
    if y_esti >= 0.5:
        y_pred[k] = 1

#計算混淆矩陣
cfmat = np.zeros((2, 2))
for i in range(mm):

    if y_pred[i] == y_test[i] == 0:
        cfmat[0, 0] += 1

    elif y_pred[i] == y_test[i] == 1:
        cfmat[1, 1] += 1

    elif y_pred[i] == 0:
        cfmat[1, 0] += 1

    elif y_pred[i] == 1:
        cfmat[0, 1] += 1

print(cfmat)
print('end')
# 檢視迭代情況
t = np.arange(m)
p1 = plt.subplot(411)
p1.plot(t, theta_history)
plt.ylabel('theta')

p2 = plt.subplot(412)
w0 = np.ravel(w_history[:, 0])
plt.ylabel('w')
p2.plot(t, w0)
w1 = np.ravel(w_history[:, 1])
p2.plot(t, w1)
w2 = np.ravel(w_history[:, 2])
p2.plot(t, w2)

p3 = plt.subplot(413)
gamma0 = np.ravel(gamma_history[:, 0])
plt.ylabel('gamma')
p3.plot(t, gamma0)
gamma1 = np.ravel(gamma_history[:, 1])
p3.plot(t, gamma1)
gamma2 = np.ravel(gamma_history[:, 2])
p3.plot(t, gamma2)
gamma3 = np.ravel(gamma_history[:, 3])
p3.plot(t, gamma3)

plt.show()