神經網路實現基本的與或異或邏輯
阿新 • • 發佈:2019-01-01
平時計算機領域的OR AND XOR邏輯問題就不去詳說,大家都有學習過。
基本的邏輯圖如下:
最開始神經網路解決線性可分問題給它帶來了一次小高峰,但是在幾年之後一個學者提出了XOR非線性問題的時候,並且專門寫了一篇論文論述神經網路對非線性問題求解的無能為力直接給當年的神經網路的發展帶來了寒冰時代。直到十幾年後,多層網路的出現,也就是俗稱的MLP(Multiply layer perceptron)才把Neural Network帶來不斷髮展的時期。
我們知道OR或者AND都是線性可分,而XOR卻是非線性可分的,用一幅圖表示:
對於第三個座標是無論如何也無法畫出一條2維座標上的直線把星星和圓圓直接分開的。
接下來我們使用最開始的神經網路程式碼來實現,也就是一個輸入層,然後加上各自的權重後再總體加上偏置得到輸出。
程式碼:
percetron.py
import numpy as np class Perceptron: def __init__(self, N, alpha=0.1): self.W = np.random.randn(N + 1) / np.sqrt(N) self.alpha = alpha def step(self, x): return 1 if x > 0 else 0 def fit(self, X, y, epochs=10): X = np.c_[X, np.ones((X.shape[0]))] for epoch in np.arange(0, epochs): for (x, target) in zip(X, y): p = self.step(np.dot(x, self.W)) if p != target: error = p - target self.W += -self.alpha * error * x def predict(self, X, addBias=True): X = np.atleast_2d(X) if addBias: X = np.c_[X, np.ones((X.shape[0]))] return self.step(np.dot(X, self.W))
test.py
from perceptron import Perceptron import numpy as np X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y_or = np.array([[0], [1], [1], [1]]) y_and = np.array([[0], [0], [0], [1]]) y_xor = np.array([[1], [0], [0], [1]]) print("[INFO] training perceptron....") p = Perceptron(X.shape[1], alpha=0.1) p.fit(X, y_or, epochs=20) print("[INFO] testing perceptron OR...") for (x, target) in zip(X, y_or): pred = p.predict(x) print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred)) print("[INFO] training perceptron AND....") p = Perceptron(X.shape[1], alpha=0.1) p.fit(X, y_and, epochs=20) print("[INFO] testing perceptron AND...") for (x, target) in zip(X, y_and): pred = p.predict(x) print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred)) print("[INFO] training perceptron XOR....") p = Perceptron(X.shape[1], alpha=0.1) p.fit(X, y_xor, epochs=200) print("[INFO] testing perceptron XOR...") for (x, target) in zip(X, y_xor): pred = p.predict(x) print("[INFO] data={}, ground_truth={}, pred={}".format(x, target[0], pred)) print("X.shape\n", X.shape) print("X.shape[0]\n", X.shape[0]) print("X.shape[1]\n", X.shape[1])
result:
可見對於XOR問題,沒有隱藏層存在的情況下,神經網路基本學不到那種分類能力。然後我們改進網路,加入hidden layers,然後看能否解決問題,這裡只加入一層的隱藏層。
neuralnetwork.py
import numpy as np
# 將完整的神經網路結構定義成類
class NeuralNetwork:
# 初始化,建構函式
def __init__(self, layers, alpha=0.1):
self.W = []
self.layers = layers
self.alpha = alpha
# 除了最後兩層網路外,其他的都初始化Weight
for i in np.arange(0, len(layers) - 2):
# 先初始化常規的weights矩陣
w = np.random.randn(layers[i] + 1, layers[i+1] + 1)
# 歸一化
self.W.append(w / np.sqrt(layers[i]))
# print("W without bias trick:\n", self.W)
# 使用bias trick也就是在W矩陣最後一列加入新的一列作為bias然後weight和bias合併為一個大W矩陣
# biases可以作為學習引數進行學習
w= np.random.randn(layers[-2] + 1, layers[-1])
# 歸一化
self.W.append(w / np.sqrt(layers[-2]))
# print("W with bias trick:\n", self.W)
# 過載python的magic函式
def __repr__(self):
return "NeuralNetwork:{}".format("-".join(str(l) for l in self.layers))
def sigmoid(self, x):
return 1.0 / (1 + np.exp(-x))
# 對sigmoid函式求導
def sigmoid_deriv(self, x):
'''
y = 1.0 / (1 + np.exp(-x))
return y * (1 - y)
'''
return x * (1 - x)
def fit(self, X, y, epochs=1000, displayUpdate=100):
X = np.c_[X, np.ones((X.shape[0]))]
losses = []
# 根據每一層網路進行反向傳播,然後更新W
for epoch in np.arange(0, epochs):
for (x, target) in zip(X, y):
self.fit_partial(x, target)
# 控制顯示,並且加入loss
if epoch == 0 or (epoch + 1) % displayUpdate == 0:
loss = self.calculate_loss(X, y)
losses.append(loss)
print("[INFO] epoch={}, loss={:.7f}".format(epoch + 1, loss))
return losses
# 鏈式求導
def fit_partial(self, x, y):
A = [np.atleast_2d(x)]
for layer in np.arange(0, len(self.W)):
net = A[layer].dot(self.W[layer])
out = self.sigmoid(net)
A.append(out)
# backprogation algorithm
error = A[-1] - y
D = [error * self.sigmoid_deriv(A[-1])]
for layer in np.arange(len(A) - 2, 0, -1):
delta = D[-1].dot(self.W[layer].T)
delta = delta * self.sigmoid_deriv(A[layer])
D.append(delta)
D = D[::-1]
# 更新權值W
for layer in np.arange(0, len(self.W)):
self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])
# 預測
def predict(self, X, addBias=True):
p = np.atleast_2d(X)
# 是否加入偏置
if addBias:
p = np.c_[p, np.ones((p.shape[0]))]
# 正常的前向傳播得到預測的輸出值
for layer in np.arange(0, len(self.W)):
p = self.sigmoid(np.dot(p, self.W[layer]))
return p
# 計算loss,就是計算MSE
def calculate_loss(self, X, targets):
targets = np.atleast_2d(targets)
predictions = self.predict(X, addBias=False)
loss = 0.5 * np.sum((predictions - targets) ** 2)
return loss
if __name__ == '__main__':
nn = NeuralNetwork([2, 2, 1])
print(nn)
test.py
from neuralnetwork import NeuralNetwork
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# 生成的資料
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_or = np.array([[0], [1], [1], [1]])
y_and = np.array([[0], [0], [0], [1]])
y_xor = np.array([[0], [1], [1], [0]])
# 構造2-2-1結構的神經網路,2個節點輸入層,2個節點的隱藏層,1個節點的輸出層
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
# 模型開始訓練,更新得到最終不斷迭代更新的weigh矩陣
losses = nn.fit(X, y_xor, epochs=2000000)
# 列印輸出
for (x, target) in zip(X, y_xor):
pred = nn.predict(x)[0][0]
step = 1 if pred > 0.5 else 0
print("[INFO] data-{}, ground_truth={}, pred={:.4f}, step={}"
.format(x, target[0], pred, step))
# 視覺化訓練過程
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
cm_dark = mpl.colors.ListedColormap(['g', 'b'])
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y_xor.ravel(), cmap=cm_dark, s=80)
# print(testY)
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()
print("W\n", nn.W)
result:
很好,可見加入一層hidden layer之後,可以很好解決非線性問題。
這裡當然也可以把網路定義成之前的沒有隱藏層的結構:
test.py
from neuralnetwork import NeuralNetwork
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
# 生成的資料
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_or = np.array([[0], [1], [1], [1]])
y_and = np.array([[0], [0], [0], [1]])
y_xor = np.array([[0], [1], [1], [0]])
# 構造2-2-1結構的神經網路,2個節點輸入層,2個節點的隱藏層,1個節點的輸出層
nn = NeuralNetwork([2, 1], alpha=0.5)
# 模型開始訓練,更新得到最終不斷迭代更新的weigh矩陣
losses = nn.fit(X, y_xor, epochs=2000000)
# 列印輸出
for (x, target) in zip(X, y_xor):
pred = nn.predict(x)[0][0]
step = 1 if pred > 0.5 else 0
print("[INFO] data-{}, ground_truth={}, pred={:.4f}, step={}"
.format(x, target[0], pred, step))
# 視覺化訓練過程
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
cm_dark = mpl.colors.ListedColormap(['g', 'b'])
plt.scatter(X[:, 0], X[:, 1], marker="o", c=y_xor.ravel(), cmap=cm_dark, s=80)
# print(testY)
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()
print("W\n", nn.W)
result:
我們將這一MLP演算法應用到MNIST資料集上看看:
testMNIST.py
from neuralnetwork import NeuralNetwork
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
# 從sklearn包中匯入資料MNIST,其實是MNIST資料集的縮小版,僅包含1797張images
print("[INFO] loading mnist dataset...")
digits = datasets.load_digits()
data = digits.data.astype("float")
# print(data)
# 歸一化到(0, 1)
data = (data - data.min()) / (data.max() - data.min())
print("[INFO] samples:{}, dim:{}".format(data.shape[0], data.shape[1]))
# print(data)
# 75%做訓練資料集,25%做測試資料集
(trainX, testX, trainY, testY) = train_test_split(data, digits.target, test_size=0.25)
print("trainY:\n", trainY)
print("testY:\n", testY)
# 將標籤值向量化,即是one-hot編碼,如0--[1,0,0,0,0,0,0,0,0,0],1--[0,1,0,0,0,0,0,0,0,0],9--[0,0,0,0,0,0,0,0,0,1]
trainY = LabelBinarizer().fit_transform(trainY)
testY = LabelBinarizer().fit_transform(testY)
print("Vectorize trainY:\n", trainY)
print("trainY[0]\n", testY[0])
print("Vectroize testY:\n", testY)
# 定義網路結構64-32-32-16-10,64表示輸入層有64個nodes(因為8x8=64),輸出層有10個nodes(10個數值0-9輸出)
print("[INFO] training network...")
nn = NeuralNetwork([trainX.shape[1], 32, 32, 16, 10])
print("[INFO] {}".format(nn))
# print("trainX.shape[0]:\n", trainX.shape[0])
# print("trainX.shape:\n", trainX.shape)
print("trainX.shape\n", trainX.shape)
print("testY.shape\n", testY.shape)
# 訓練模型
losses = nn.fit(trainX, trainY, epochs=5000)
# 預測,並生成報告
print("[INFO] evaluating network...")
predictions = nn.predict(testX)
predictions = predictions.argmax(axis=1)
print(classification_report(testY.argmax(axis=1), predictions))
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, len(losses)), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()
print("W\n", nn.W)
result:
得到97%的平均準確率還是可以的。