人工神經網路ANN程式的理解(python3.0)——單隱層MINST
阿新 • • 發佈:2018-12-09
1資料說明:MNIST包 資料為:28*28*1 單通道28*28的0~9的數字圖片; 訓練資料60000張;測試資料10000張;並且每張圖片對應的label是圖片中顯示的數字 train-images-idx3-ubyte 訓練資料影象 (60,000) train-labels-idx1-ubyte 訓練資料label t10k-images-idx3-ubyte 測試資料影象 (10,000) t10k-labels-idx1-ubyte 測試資料label 2圖示說明系統結構 3矩陣大小 輸入層到隱藏層w1:784*40 ;b1:784*1 ; 隱藏層到輸出層w2:40*10 ;b2:40*1
5為什麼要如下編碼呢 因為經過sigmoid函式非線性對映後函式值的範圍為(0,1),則將0——9的數字也對映為0,1形式
理想情況下輸出表示數字的方式
理想情況下輸出的十個神經元表示0:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
理想情況下輸出的十個神經元表示1:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
理想情況下輸出的十個神經元表示2:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
實際情況以輸出10個神經元中 數值最大的下標為表示數字值 實際情況下可能輸出的十個神經元表示0:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
0.98 | 0.01 | 0 .01 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
實際情況下可能輸出的十個神經元表示1:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
0.005 | 0.987 | 0.005 | 0.003 | 0 | 0 | 0 | 0 | 0 | 0 |
實際情況下可能輸出的十個神經元表示2:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
0 | 0.01 | 0.96 | 0.02 | 0.01 | 0 | 0 | 0 | 0 | 0 |
# python3
# 輸入單元(節點)784= 28*28*1 隱藏層單元40個 輸出神經元10個
# 輸出節點10個 0,1,2,3,4,5,6,7,8,9
# 隱藏層只有一層 因此為淺神經網路SNN 傳統的ANN
import numpy as np
import random
import os, struct
from array import array as pyarray
from numpy import append, array, int8, uint8, zeros
import matplotlib.pyplot as pl
class NeuralNet(object):
# 初始化神經網路,sizes是神經網路的層數和每層神經元個數
# randn正態分佈隨機數
def __init__(self, sizes):
self.sizes_ = sizes
self.num_layers_ = len(sizes) # 層數
self.w_ = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] # w_、b_初始化為正態分佈隨機數
self.b_ = [np.random.randn(y, 1) for y in sizes[1:]]
# Sigmoid函式,S型曲線,
def sigmoid(self, z):
return 1.0 / (1.0 + np.exp(-z))
# Sigmoid函式的導函式
def sigmoid_prime(self, z):
return self.sigmoid(z) * (1 - self.sigmoid(z))
# outh
# wx+b
def feedforward(self, x):
for b, w in zip(self.b_, self.w_):
x = self.sigmoid(np.dot(w, x) + b)
return x
# 鏈式求導法則
# b '= total偏導out * out 偏導 net
# w '= b * out
def backprop(self, x, y):
nabla_b = [np.zeros(b.shape) for b in self.b_]
nabla_w = [np.zeros(w.shape) for w in self.w_]
activation = x
activations = [x]
zs = []
for b, w in zip(self.b_, self.w_):
z = np.dot(w, activation) + b
zs.append(z) #net
activation = self.sigmoid(z)
activations.append(activation) #out
# cost:tol偏導out1
# sigmoid——prime :out偏導net
# out :net偏導w
# 以上三者相乘即為 total對w的偏導 #換行符 一行容納不了
####### target = y activations #out
delta = self.cost_derivative(activations[-1], y) * \
self.sigmoid_prime(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in range(2, self.num_layers_):
z = zs[-l]
sp = self.sigmoid_prime(z)
delta = np.dot(self.w_[-l + 1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
return (nabla_b, nabla_w)
# 跟新 w, b
def update_mini_batch(self, mini_batch, eta):
nabla_b = [np.zeros(b.shape) for b in self.b_]
nabla_w = [np.zeros(w.shape) for w in self.w_]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.w_ = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.w_, nabla_w)]
self.b_ = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.b_, nabla_b)]
# training_data是訓練資料(x, y);epochs是訓練次數;mini_batch_size是每次訓練樣本數;eta是learning rate
def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
if test_data:
n_test = len(test_data)
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch, eta)
if test_data:
print("Epoch {0}: {1} / {2}".format(j, self.evaluate(test_data), n_test))
else:
print("Epoch {0} complete".format(j))
def evaluate(self, test_data):
test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
def cost_derivative(self, output_activations, y):
return (output_activations - y)
# 預測
def predict(self, data):
value = self.feedforward(data)
print("value :",value)
print("dealValue :" ,value.tolist().index(max(value)))
# 返回最大的下標
return value.tolist().index(max(value))
# 儲存訓練模型
def save(self):
pass # 把_w和_b儲存到檔案(pickle)
def load(self):
pass
#預設為訓練資料
def load_mnist(dataset="training_data", digits=np.arange(10), path="."):
if dataset == "training_data":
fname_image = os.path.join(path, 'train-images.idx3-ubyte')
fname_label = os.path.join(path, 'train-labels.idx1-ubyte')
elif dataset == "testing_data":
fname_image = os.path.join(path, 't10k-images.idx3-ubyte')
fname_label = os.path.join(path, 't10k-labels.idx1-ubyte')
else:
raise ValueError("dataset must be 'training_data' or 'testing_data'")
flbl = open(fname_label, 'rb')
magic_nr, size = struct.unpack(">II", flbl.read(8))
lbl = pyarray("b", flbl.read())
flbl.close()
fimg = open(fname_image, 'rb')
magic_nr, size, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = pyarray("B", fimg.read())
fimg.close()
ind = [k for k in range(size) if lbl[k] in digits]
N = len(ind)
images = zeros((N, rows, cols), dtype=uint8)
labels = zeros((N, 1), dtype=int8)
for i in range(len(ind)):
images[i] = array(img[ind[i] * rows * cols: (ind[i] + 1) * rows * cols]).reshape((rows, cols))
labels[i] = lbl[ind[i]]
return images, labels
def load_samples(dataset="training_data"):
image, label = load_mnist(dataset)
X = [np.reshape(x, (28 * 28, 1)) for x in image]
X = [x / 255.0 for x in X] # 灰度值範圍(0-255),轉換為(0-1)
# 5 -> [0,0,0,0,0,1.0,0,0,0]; 1 -> [0,1.0,0,0,0,0,0,0,0]
def vectorized_Y(y):
e = np.zeros((10, 1))
e[y] = 1.0
return e
if dataset == "training_data":
Y = [vectorized_Y(y) for y in label]
pair = list(zip(X, Y))
return pair
elif dataset == 'testing_data':
pair = list(zip(X, label))
return pair
else:
print('Something wrong')
if __name__ == '__main__':
INPUT = 28 * 28
OUTPUT = 10
net = NeuralNet([INPUT, 40, OUTPUT])
train_set = load_samples(dataset='training_data')
test_set = load_samples(dataset='testing_data')
net.SGD(train_set, 13, 100, 3.0, test_data=test_set)
# 準確率
correct = 0;
for test_feature in test_set:
if net.predict(test_feature[0]) == test_feature[1][0]:
correct += 1
print("準確率: ", correct / len(test_set))
x = np.linspace(-8.0, 8.0)
y = net.sigmoid(x)
pl.plot(x, y)
pl.show()