線性迴歸的python實現
阿新 • • 發佈:2019-01-26
最近在學習機器學習的一些演算法,從最開始的線性迴歸開始。線性迴歸其實就是用一條直線來模擬你的資料,並且讓所產生的誤差儘可能的小。
#coding=utf-8 import random import numpy as np from matplotlib import pyplot as pp random.seed(1)#產生隨機種子 house_size = [random.randrange(70,200) for i in range(10000)] distance_from_citycenter = [random.randrange(1,30) for i in range(10000)] floor = [random.randrange(1,20) for i in range(10000)] house_price = [] for i in range(10000): price = house_size[i]*random.randrange(5e4,10e4)+distance_from_citycenter[i]*(-1e4)+floor[i]*1e4+random.randrange(1,1e6)\ house_price.append(price)#假設已知的房價 x = [[1,house_size[i],distance_from_citycenter[i],floor[i]] for i in range(10000)]#構建所需要的資料為一個向量 x_matrix = np.array(x)#將它轉換為矩陣的形式 y_matrix = np.array(house_price)#已知價格的矩陣 y_matrix = y_matrix.reshape(len(y_matrix),1)#將它轉換為列向量的形式 theta = [0 for i in range(4)]#假設初始的未知數全為0 theta_matrix = np.array(theta) theta_matrix = theta_matrix.reshape(len(theta_matrix),1) def cost_function(x,theta,y):#定義代價函式 y_pred = x.dot(theta) diff = y_pred - y squared_error = np.power(diff,2) return np.sum(squared_error)/(2*len(y)) def gradient(x,theta,y):#實現梯度下降,以此來獲取最佳的未知數的值
y_pred = x.dot(theta) diff = y_pred - y gradient = (1/len(y))*x.T.dot(diff) return gradient theta_matrix = theta_matrix.astype("float64") max_item=10000#迭代次數 learning_rate = 0.00001#學習效率 for i in range(max_item): theta_matrix -= gradient(x_matrix,theta_matrix,y_matrix)*learning_rate # if i%20 ==0: # print(cost_function(x_matrix,theta_matrix,y_matrix)) print(theta_matrix)