機器學習---實驗三程式碼理解
阿新 • • 發佈:2022-04-04
from matplotlib.font_manager import FontProperties import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl # 預處理data1.txt def loaddata(): data = np.loadtxt('data1.txt',delimiter=',') n = data.shape[1] - 1 # 特徵數 X = data[:, 0:n] y = data[:, -1].reshape(-1, 1) return X, y # 將data1.txt繪製 def plot(X,y): pos = np.where(y==1) neg = np.where(y==0) plt.scatter(X[pos[0],0],X[pos[0],1],marker='x') plt.scatter(X[neg[0], 0], X[neg[0], 1], marker='o') plt.xlabel('Exam 1 score') plt.ylabel('Exam 2 score') plt.show() X,y = loaddata() plot(X,y) # sigmoid函式:1/( 1+pow(e,(-z)) ) def sigmoid(z): r = 1/(1+np.exp(-z)) return r # 返回邏輯迴歸函式(線性迴歸函式的結果y,放到sigmod函式中去) def hypothesis(X,theta): z=np.dot(X,theta) return sigmoid(z) # 計算代價函式的程式碼 # l(θ)=ln(L(θ))=∑m(i=1)(yi*ln(gθ(xi))+(1−yi)ln(1−gθ(xi))) # ln(L(θ)) * (-m)為代價函式 # 代價函式:-y*log(hypothesis+正規化因子)-(1-y)*log(1-hypothesis+正規化因子) # 用梯度下降法求出使得損失最小對應的引數θ def computeCost(X,y,theta): m = X.shape[0] #補充計算代價的程式碼; z = -y * np.log(hypothesis(X,theta) + 1e-6) - (1 - y) * np.log(1 - hypothesis(X,theta) + 1e-6) return np.sum(z)/m diff = [] #梯度下降法 def gradientDescent(X,y,theta,iterations,alpha): #取資料條數 m = X.shape[0] #在x最前面插入全1的列 X = np.hstack((np.ones((m, 1)), X)) for i in range(iterations): #補充引數更新程式碼; theta_temp = theta - alpha * np.dot(X.T,hypothesis(X,theta) - y) / m # 梯度下降公式如下: # theta = theta - 學習率 * 損失函式 theta = theta_temp # 對應到每個權重公式為: # w = w - 學習率 * (損失函式對wi求偏導) # 每迭代1000次輸出一次損失值 if(i%10000==0): diff.append([i,computeCost(X,y,theta)]) # 將每個10000*k次迭代的損失函式的值送進diff[] print('第',i,'次迭代,當前損失為:',computeCost(X,y,theta),'theta=',theta) return theta # 預測函式 def predict(X): # 在x最前面插入全1的列 c = np.ones(X.shape[0]).transpose() X = np.insert(X, 0, values=c, axis=1) #求解假設函式的值 h = hypothesis(X,theta) #根據概率值決定最終的分類,>=0.5為1類,<0.5為0類 h[h>=0.5]=1 h[h<0.5]=0 return h X,y = loaddata() n = X.shape[1]#特徵數 theta = np.zeros(n+1).reshape(n+1, 1) # theta是列向量,+1是因為求梯度時X前會增加一個全1列 theta_temp = np.zeros(n+1).reshape(n+1, 1) iterations = 250000 alpha = 0.008 # 學習率 theta = gradientDescent(X,y,theta,iterations,alpha) print('theta=\n',theta) def plotDescisionBoundary(X,y,theta): cm_dark = mpl.colors.ListedColormap(['g', 'r']) plt.xlabel('Exam 1 score') plt.ylabel('Exam 2 score') plt.scatter(X[:,0],X[:,1],c=np.array(y).squeeze(),cmap=cm_dark,s=30) #補充畫決策邊界程式碼; x1 = np.linspace(0,150,500) x2 = (-theta[0] - theta[1] * x1) / theta[2] plt.plot(x1,x2) plt.show() def plotLoss(): d = np.array(diff) x = d[:,0] y = d[:,1] plt.plot(x,y) plt.title("損失函式變化圖",fontsize = 20,fontproperties = "kaiti") plt.show() def plotPred(): test_x = [] for i in range(233): tx = np.random.uniform(0.0,100.0) ty = np.random.uniform(0.0,100.0) test_x.append([tx,ty]) test_x = np.array(test_x) test_y = predict(test_x) cm_dark = mpl.colors.ListedColormap(['b', 'pink']) plt.scatter(test_x[:, 0], test_x[:, 1], c=np.array(test_y).squeeze(), cmap=cm_dark, s=30) x1 = np.linspace(0, 150, 500) x2 = (- theta[0] - theta[1] * x1) / theta[2] plt.plot(x1,x2) plt.title("預測",fontproperties = 'kaiti',fontsize = 20) plt.show() plotDescisionBoundary(X,y,theta) plotLoss() plotPred()