numpy實現簡單的任意層數的BP神經網路設計
阿新 • • 發佈:2020-12-19
技術標籤:造輪子
手寫了一個簡單的BP神經網路:其原理不再贅述了,傳遞函式只有sigmoid,tansig或者是Weak_relu,使用了一個3輸入2輸出的運算元為例,損失函式為平方誤差:
# -*- coding: utf-8 -*- """ Created on Wed Dec 16 19:03:01 2020 @author: SUE """ import numpy as np import matplotlib.pyplot as plt import math def sigmoid(x): y=np.zeros((len(x),1)) for i in range(len(x)): y[i]= 1/(1+math.exp(-x[i])) return y def Weak_Relu(x): alpha=0.05 y=np.zeros((len(x),1)) for i in range(len(x)): if(x[i]>0): y[i]=x[i] else: y[i]=alpha*x[i] return y def tansig(x): y=np.zeros((len(x),1)) for i in range(len(x)): y[i]= (math.exp(x[i])-math.exp(-x[i]))/(math.exp(x[i])+math.exp(-x[i])) return y def diff_sigmoid(x): y=np.zeros((len(x),1)) for i in range(len(x)): y[i]= math.exp(-x[i])/(1+math.exp(-x[i]))/(1+math.exp(-x[i])) return y def diff_weak_Relu(x): alpha=0.05 y=np.zeros((len(x),1)) for i in range(len(x)): if(x[i]>0): y[i]=1 else: y[i]=alpha return y def diff_tansig(x): y=np.zeros((len(x),1)) for i in range(len(x)): y[i]= 1-(math.exp(x[i])-math.exp(-x[i]))/(math.exp(x[i])+math.exp(-x[i]))*(math.exp(x[i])-math.exp(-x[i]))/(math.exp(x[i])+math.exp(-x[i])) return y def process(W,b,x,stimulate_fun): return stimulate_fun(np.dot(W,x)+b) def qua_Err(x,OUTPUT): DE=[] for i in range(len(x)): DE.append(2*(x[i]-OUTPUT[i])) return DE def set_up_BP(NeuNumList): W=[]#連線陣 b=[]#偏置 for i in range(len(NeuNumList)): if(i<len(NeuNumList)-1): W.append(np.random.rand(NeuNumList[i+1],NeuNumList[i])*1-0.5) b.append(np.random.rand(NeuNumList[i+1],1)) return W,b def get_first_x(W,b,Act,INPUT):#輸入引數Act是啟用函式指標 x=[] x.append(INPUT) for i in range(len(b)): x.append(process(W[i],b[i],x[len(x)-1],Act[i])) return x def get_diff_list(W,b,Act,x,OUTPUT,ERRFUN):#最後一列是損失函式型別 #返回的是最終誤差對於每個引數的偏導數 ERR=ERRFUN(x[len(x)-1],OUTPUT) DW=[] Db=[] for i in range(len(b)): TW=[] Tb=[] D=[] if(Act[len(b)-1-i]==sigmoid): D=process(W[len(b)-1-i],b[len(b)-1-i],x[len(b)-1-i],diff_sigmoid) elif(Act[len(b)-1-i]==Weak_Relu): D=process(W[len(b)-1-i],b[len(b)-1-i],x[len(b)-1-i],diff_weak_Relu) elif(Act[len(b)-1-i]==tansig): D=process(W[len(b)-1-i],b[len(b)-1-i],x[len(b)-1-i],diff_tansig) Tb=ERR*D TW=np.dot(Tb,x[len(b)-1-i].T) ERR=[] ERR=np.dot(W[len(b)-1-i].T,Tb) DW.append(TW) Db.append(Tb) return DW,Db def Drill(W,b,Act,INPUT,OUTPUT,ERRFUN,epoch,step):#最後兩個元素是訓練代數和步長 for i in range(epoch): s=0 Db=[] DW=[] for j in range(len(INPUT)): x=[] x=get_first_x(W,b,Act,np.array(INPUT[j],ndmin=2).T) OP=np.array(OUTPUT[j],ndmin=2).T s=s+sum((abs(x[len(x)-1]-OP))) TW=[] Tb=[] TW,Tb=get_diff_list(W,b,Act,x,OP,qua_Err) if(DW==[]): DW=TW Db=Tb else: for k in range(len(DW)): DW[k]=DW[k]+TW[k] Db[k]=Db[k]+Tb[k] for j in range(len(b)): W[j]=W[j]-step*DW[len(b)-1-j] b[j]=b[j]-step*Db[len(b)-1-j] print('第 %d 次迭代的誤差之和(一範數)是: '%(i+1),end='') print("%f" %s) return W,b def predict(W,b,Act,INPUT): x=np.array(INPUT,ndmin=2).T for i in range(len(b)): x=process(W[i],b[i],x,Act[i]) return x def main(): W,b=set_up_BP([3,10,10,2]) Act=[tansig,tansig,tansig] INPUT=[[-1.0000 , -1.0000, -1.0000], [-0.9057, -0.8800, -0.9429], [-0.7595, -0.8000, -0.9429], [-0.6717, -0.7600, -0.8571], [-0.5559, -0.6400, -0.6857], [-0.5235, -0.4000, -0.6000], [-0.4805, -0.3200, -0.6000], [-0.3258, -0.2000, -0.3429], [-0.2081, -0.1200, -0.3429], [-0.1248, 0, -0.2857], [-0.0729, 0.2400, -0.2286], [-0.0299, 0.2800, -0.2286], [ 0.0669, 0.3200, -0.1714], [ 0.1996, 0.4000, 0.1429], [ 0.3348, 0.5200, 0.3429], [ 0.6138, 0.6000, 0.4286], [ 0.7555, 0.6800, 0.4286], [ 0.8069, 0.8000, 0.6571], [ 0.9271, 0.8800, 0.7143], [ 1.0000, 1.0000, 1.0000]] OUTPUT=[[-1.0000 , -1.0000], [-0.9431, -0.9862], [-0.8641, -0.9856], [-0.7903, -0.9842], [-0.7217, -0.9586], [-0.6734, -0.9536], [-0.6230, -0.9419], [-0.4457, -0.7000], [-0.3125, -0.3295], [-0.2326, -0.2513], [-0.1706, -0.0410], [-0.2506, -0.0309], [-0.2014, -0.0969], [-0.0885, -0.0394], [ 0.0424, 0.1750], [ 0.4773, 0.5097], [ 0.6543, 0.6955], [ 0.8480, 0.8950], [0.9721, 0.9027], [1.0000, 1.0000]] W,b=Drill(W,b,Act,INPUT,OUTPUT,qua_Err,5000,0.005) pre=[] for row in INPUT: pre.append(predict(W,b,Act,row)) p=np.zeros((2,20)) q=np.array(OUTPUT,ndmin=2).T for i in range(20): p[0][i]=pre[i][0] p[1][i]=pre[i][1] plt.plot(p[0],p[1],label="Sigmoid",color = "blue") plt.plot(q[0],q[1],label="Sigmoid",color = "red") main()
執行效果:
其中紅色的是真實資料,藍色的是擬合曲線