Coursera NG 機器學習第五週正則化 bias Vs variance Python實現

阿新 • • 發佈：2019-01-01

ex5.py

import scipy.optimize as op
import numpy as np
from scipy.io import loadmat
from ex5modules import *

#Part 1: Loading and visualize data
data=loadmat('ex5data1.mat')
X=data['X']
y=data['y']
Xtest=data['Xtest']
ytest=data['ytest']
Xval=data['Xval']
yval=data['yval']

plotData(X,y)

#Part 2: Costs and gradients
Xone=np.column_stack((np.ones((X.shape[0],1)),X))
theta_t=np.array([1,1]).flatten()

cost=lrCostFunc(theta_t,Xone,y,1)
grad=lrgradient(theta_t,Xone,y,1)
print("Cost at theta_t=[1,1] with lambda=1: %f  (should be about 303.993)"%cost)
print("Gradient at theta_t=[1,1] with lambda=1: [%f,%f]  (should be about [-15.30,598.250])"%(grad[0],grad[1]))

#Part 3: Train Linear Regression
init_theta=np.zeros(Xone.shape[1])
lamda=0

theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,args=(Xone,y,lamda),\
                 fprime=lrgradient,disp=False).reshape((Xone.shape[1],1))
plotLinearRegression(X,y,theta)

#Part 4: Learning curve for Linear Regression
train_error,val_error=learningCurve(X,y,Xval,yval,init_theta,lamda)
plotLearningCurve(train_error,val_error)

#Part 5: Feature Mapping for Polynomial Regression
p=8

X_poly=polyFeatures(X,p)
X_poly,mu,sigma=featureNormalize(X_poly)
X_poly_one=np.column_stack((np.ones((X_poly.shape[0],1)),X_poly))

X_poly_test=polyFeatures(Xtest,p)
X_poly_test=(X_poly_test-mu)/sigma
X_poly_test_one=np.column_stack((np.ones((X_poly_test.shape[0],1)),X_poly_test))

X_poly_val=polyFeatures(Xval,p)
X_poly_val=(X_poly_val-mu)/sigma
X_poly_val_one=np.column_stack((np.ones((X_poly_val.shape[0],1)),X_poly_val))

lamda=3
poly_init_theta=np.zeros(X_poly_one.shape[1])

theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),\
                 fprime=lrgradient,disp=False,maxiter=200).reshape((X_poly_one.shape[1],1))
plotPolyRegression(X,y,mu,sigma,theta,p)

#Part 6: Learning curve for Polynomial Regression
train_error,val_error=learningCurve(X_poly,y,X_poly_val,yval,poly_init_theta,lamda)
plotLearningCurve(train_error,val_error)

#Part 7: Validation for Selecting Lambda

lamda_vec,train_error,val_error=validationCurve(X_poly_one, y, X_poly_val_one, yval,poly_init_theta)
plotLambdaError(lamda_vec,train_error,val_error)

print("\nlambda\t\tTrain Error\tValidation Error")
for i in range(lamda_vec.shape[0]):
    print(' %f\t%f\t%f'%(lamda_vec[i], train_error[i], val_error[i]))

#Part 8: Testing error with best lambda=3
lamda=3
theta=op.fmin_cg(f=lrCostFunc,x0=poly_init_theta,args=(X_poly_one,y,lamda),fprime=lrgradient,disp=False,maxiter=200)
test_error=lrCostFunc(theta,X_poly_test_one,ytest,0)
print("Test error with lambda=3:  %f  (should be about 3.8599)" % test_error)

ex5modules.py

import scipy.optimize as op
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1/(1+np.exp(-x))

def lrCostFunc(theta,X,y,lamda):
    m = X.shape[0]
    theta = theta.reshape((theta.shape[0], 1))
    J=np.sum(np.square(np.dot(X,theta)-y))/(2*m)+ \
      lamda*np.sum(np.square(theta[1:]))/(2*m)
    return J

def lrgradient(theta,X,y,lamda):
    m = X.shape[0]
    theta = theta.reshape((theta.shape[0], 1))
    grad=np.dot(X.T, X.dot(theta) - y) / m + theta * lamda / m
    grad[0] = grad[0] - theta[0] * lamda / m
    return grad.flatten()

def learningCurve(X,y,Xval,yval,init_theta,lamda):
    train_error=np.zeros(X.shape[0])
    val_error=np.zeros(X.shape[0])
    X=np.column_stack((np.ones((X.shape[0],1)),X))
    for i in range(X.shape[0]):
        results = op.fmin_cg(f=lrCostFunc, x0=init_theta, disp=False,\
                             args=(X[:i+1,:], y[:i+1,:],lamda), fprime=lrgradient,maxiter=400)
        theta = results.reshape((X.shape[1], 1))
        train_error[i]=lrCostFunc(theta,X[:i+1,:],y[:i+1,:],lamda)
        val_error[i]=lrCostFunc(theta,np.column_stack((np.ones(Xval.shape[0]),Xval)),yval,lamda)
    return train_error,val_error

def validationCurve(X, y, Xval, yval,init_theta):
    lamda_vec=np.array([0,0.001,0.003,0.01,0.03,.1,.3,1,3,10])
    train_error=np.zeros((lamda_vec.shape[0],1))
    val_error = np.zeros((lamda_vec.shape[0],1))
    i=-1
    for lamda in lamda_vec:
        i=i+1
        theta=op.fmin_cg(f=lrCostFunc,x0=init_theta,disp=False,\
                         args=(X,y,lamda),fprime=lrgradient,maxiter=200)
        train_error[i]=lrCostFunc(theta,X,y,0)
        val_error[i]=lrCostFunc(theta,Xval,yval,0)
    return lamda_vec,train_error,val_error

def polyFeatures(X,p):
    X_p = np.zeros((X.shape[0], p))
    for i in range(p):
        X_p[:, i] = np.power(X.flatten(), i + 1)
    return X_p

def featureNormalize(X):
    mu=np.mean(X,axis=0)
    sigma=np.std(X,axis=0,ddof=1)
    X_norm = (X - mu) / sigma
    return X_norm,mu,sigma

def plotData(X,y):
    plt.plot(X, y, 'rx')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.show()

def plotLinearRegression(X,y,theta):
    Xone=np.column_stack((np.ones((X.shape[0],1)),X))
    plt.plot(X, y, 'rx')
    plt.plot(X, Xone.dot(theta), 'b')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.show()

def plotLearningCurve(train_error,val_error):
    l1, = plt.plot(np.arange(1, 13), train_error, 'b', label='train')
    l2, = plt.plot(np.arange(1, 13), val_error, 'g', label='cv')
    plt.ylim(0, 150)
    plt.legend(handles=[l1, l2], loc=1)
    plt.title("Learning curve for linear regression")
    plt.xlabel("Number of training examples")
    plt.ylabel("Error")
    plt.show()

def plotPolyRegression(X,y,mu,sigma,theta,p):
    plt.plot(X, y, 'rx')
    xmin = np.min(X) - 15
    xmax = np.max(X) + 25
    x = np.arange(xmin, xmax, .05)
    x_poly = polyFeatures(x, p)
    x_poly = (x_poly - mu) / sigma
    x_poly = np.column_stack((np.ones((x_poly.shape[0], 1)), x_poly))
    plt.plot(x, x_poly.dot(theta), '--b')
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.xlim(-80, 80)
    plt.ylim(np.min(x_poly.dot(theta))-10,np.max(x_poly.dot(theta)+20))
    plt.show()

def plotLambdaError(lamda_vec,train_error,val_error):
    l1,=plt.plot(lamda_vec,train_error,'b',label='Train')
    l2,=plt.plot(lamda_vec,val_error,'g',label='Cross Validation')
    plt.legend(handles=[l1,l2],loc=1)
    plt.xlabel("Lambda")
    plt.ylabel("Error")
    plt.xlim(0,10)
    plt.ylim(0,25)
    plt.show()

測試損失函式和梯度函式是否正確。

線性迴歸：

學習曲線：當lambda=0，不使用正則化時，隨著訓練集的增多，train error增加，而且值不小，cv error也不小，存在high bias的問題。

多項式迴歸：

lambda=0時，存在很嚴重的variance問題。

lambda=1時，train error跟cv error都很小，而且兩者差距不大，是個合理的lambda，中和了bias和variance。

lambda=100時，既存在嚴重的bias，又存在嚴重的variance。

通過cv集，發現當ambda為1或者3時，train error和validation error相差最小，效果最好。

用CV集選擇了lambda之後，最後用測試集看看這個模型的效果到底如何。

Coursera NG 機器學習第五週正則化 bias Vs variance Python實現

ex5.py import scipy.optimize as op import numpy as np from scipy.io import loadmat from ex5modules import * #Part 1: Loading and visuali

吳恩達機器學習（五）正則化（解決過擬合問題）

目錄 0. 前言學習完吳恩達老師機器學習課程的正則化，簡單的做個筆記。文中部分描述屬於個人消化後的理解，僅供參考。 0. 前言在分類或者回歸時，通常存在兩個問題，“過擬合”（overfitting）和“欠擬合”（underfitting）. 過擬

coursera-斯坦福-機器學習-吳恩達-正則化

正則化(Regularization) 1.1 過擬合的問題參考視訊: 7 - 1 - The Problem of Overfitting (10 min).mkv 到現在為止，我們已經學習了幾種不同的學習演算法，包括線性迴歸和邏輯迴歸，它們能夠有效地解決許多問題，但是當將它們應用到

機器學習（五）--------正則化(Regularization)

技術分享應用 regular 邏輯 ima 好的 parameter 大小機器學習過擬合(over-fitting) 欠擬合正好過擬合怎麽解決 1.丟棄一些不能幫助我們正確預測的特征。可以是手工選擇保留哪些特征，或者使用一些模型選擇的算法來幫忙（例如 PC

Coursera-吳恩達-機器學習-第五週-程式設計作業: Neural Networks Learning

本次文章內容： Coursera吳恩達機器學習課程，第五週程式設計作業。程式語言是Matlab。學習演算法分兩部分進行理解，第一部分是根據code對演算法進行綜述，第二部分是程式碼。 0 Introduction 在這個練習中，將應用 backpropagation

Coursera機器學習-第五週-Neural Network BackPropagation

Cost Function and Backpropagation Cost Function 假設有樣本m個。x(m)表示第m個樣本輸入，y(m)表示第m個樣本輸出，L表示網路的層數，sl表示在l層下，神經單元的總個數（不包括偏置bias units）

機器學習第五週

代價函式和反向傳播代價函式首先引入一些新的我們即將討論的變數名稱： L = 網路的總層數 sjsj = 網路第 j 層的單元節點數（不包含偏置項） K = 最後一層輸出的單元數 m = 樣本的個數在神經網路中，我們可能有很多輸出節點，我用

python爬蟲學習第五章正則

多行匹配 href out 地址常見 apt 分別是 all arch html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,b

吳恩達機器學習 - 邏輯迴歸的正則化吳恩達機器學習 - 邏輯迴歸的正則化

原吳恩達機器學習 - 邏輯迴歸的正則化 2018年06月19日 15:07:25 離殤灬孤狼閱讀數：181 更多

機器學習筆記4：正則化（Regularization）

機器學習筆記4：正則化（Regularization） Andrew Ng機器學習課程學習筆記4 過擬合與欠擬合　　線性擬合時，有兩種擬合效果不好的情況，分別是過擬合與欠擬合。　　過擬合(overfitting)，也叫高方差(variance)。主要是擬合曲線過於彎曲，雖然

機器學習筆記05：正則化(Regularization)、過擬合(Overfitting)

說明：文章中的所有圖片均屬於Stanford機器學習課程（一）過擬合問題(The Problem of Overfitting) 不管是在線性迴歸還是在邏輯迴歸中，我們都會遇到過擬合的問題。先從例子來看看什麼是過擬合。 i.線性迴歸中的過擬合

Coursera-吳恩達-機器學習-第七週-測驗-Support Vector Machines

忘記截圖了，做了二次的，有點繞這裡，慢點想就好了。正確選項是，It would be reasonable to try increasing C. It would also be reasonable to try decreasing σ2. &n

Coursera-吳恩達-機器學習-第七週-程式設計作業: Support Vector Machines

本次文章內容： Coursera吳恩達機器學習課程，第七週程式設計作業。程式語言是Matlab。本文只是從程式碼結構上做的小筆記，更復雜的推導不在這裡。演算法分兩部分進行理解，第一部分是根據code對演算法進行綜述，第二部分是程式碼。本次作業分兩個part，第一個是using SVM，第

NG機器學習總結-（五）正則化 Regularization

一、過擬合問題在解釋什麼是過擬合問題之前，首先還是以房價預測為例。假設這裡我們用三種不同的模型去擬合數據集，如下圖三種情況：從第一張圖看，我們發現我們是用一條直線去擬合數據，但是這樣的擬合效果並不好。從資料中，很明顯隨著房子面積的增大，房價的變化趨於穩定或者說越往

Coursera-吳恩達-機器學習-第十週-測驗-Large Scale Machine Learning

本片文章內容： Coursera吳恩達機器學習課程，第十週 Large Scale Machine Learning 部分的測驗，題目及答案截圖。 1.cost increase ，說明資料diverge。減小learning rate。 stochastic不需要每步都是減

Coursera-吳恩達-機器學習-第八週-程式設計作業: K-Means Clustering and PCA

本次文章內容： Coursera吳恩達機器學習課程，第八週程式設計作業。程式語言是Matlab。本文只是從程式碼結構上做的小筆記，更復雜的推導不在這裡。演算法分兩部分進行理解，第一部分是根據code對演算法進行綜述，第二部分是程式碼。本次作業分兩個part，第一個是K-Means Clu

Coursera-吳恩達-機器學習-第八週-測驗-Principal Component Analysis

本片文章內容： Coursera吳恩達機器學習課程，第八週的測驗，題目及答案截圖。

Coursera-吳恩達-機器學習-第六週-程式設計作業: Regularized Linear Regression and Bias/Variance

本次文章內容： Coursera吳恩達機器學習課程，第六週程式設計作業。程式語言是Matlab。學習演算法分兩部分進行理解，第一部分是根據code對演算法進行綜述，第二部分是程式碼。 0 Introduction 在這個練習中，應用regularized linea

Coursera-吳恩達-機器學習-第六週-測驗-Machine Learning System Design

說實話，這一次的測驗對我還是有一點難度的，為了刷到100分，刷了7次（哭）。無奈，第2道和第4道題總是出錯，後來終於找到錯誤的地方，錯誤原因是思維定式，沒有動腦和審題正確。這兩道題細節會在下面做出講解。第二題分析：題意問，使用大量的資料，在哪兩種情況時

Stanford機器學習---第三週.邏輯迴歸、正則化

第三週邏輯迴歸與正則化學完前三週簡要做個總結，梳理下知識框架：第一講邏輯迴歸Logistic Regression 1.分類問題Classification Problem for e

Coursera NG 機器學習 第五週 正則化 bias Vs variance Python實現

相關推薦

Coursera NG 機器學習第五週正則化 bias Vs variance Python實現