1. 程式人生 > 其它 >[Python]Python版吳恩達《機器學習》習題——線性迴歸

[Python]Python版吳恩達《機器學習》習題——線性迴歸

技術標籤:機器學習Pandas機器學習python

胡話

主要給自己備忘,放部落格方便檢視,程式碼梯度下降部分主要參考文末連結,正規方程法程式碼自己寫的,雖然很簡單但算是機器學習的“Hello World”,有點小激動。

雖然之前也看著《機器學習實戰》也寫出過梯度下降,但那時候理解不深,並沒有太多欣喜,現在對它理解程度進了一步,再寫才感覺是真的第一次。

我是懶人,所以不太願意花太多時間在部落格上,所以看官可能不會太方便,但我註釋儘量寫全呼了,複製貼上看應該還好。

程式碼

梯度下降

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: CK
# Date: 2021/1/11 import os import sys import pandas as pd import matplotlib.pyplot as plt import numpy as np abs_path = os.path.dirname(os.path.abspath(sys.argv[0])) sys.path.append(os.path.dirname(abs_path)) class GradientDescent: def __init__(self): pass @staticmethod def cost
(x, y, theta): """ 計算損失函式 :param x: 資料集 :param y: 標籤 :param theta: 引數 :return: """ # 誤差計算公式 return sum(np.power((np.dot(x, theta.T) - y), 2)) / (2 * len(x)) def gradient_descent(self, x, y, theta,
alpha, epoch=1000): """ 梯度下降 :param x: 訓練集 :param y: 標籤 :param theta: 引數 :param alpha: 學習率 :param epoch: 迭代次數 :return: """ cost = np.zeros(epoch) # 為之後繪圖作資料準備 m = x.shape[0] # 資料集數量 for i in range(epoch): # 梯度下降公式 theta -= (alpha / m) * (x.dot(theta.T) - y).T.dot(x) cost[i] = self.cost(x, y, theta) return theta, cost def run(self): """主入口""" data = pd.read_csv(os.path.join(abs_path, 'ex1', 'ex1data1.txt'), names=['Population', 'Profit'], header=None) # 新增x_0 data.insert(0, 'Ones', 1) column_num = data.shape[1] # 獲取x、y數列 x = np.array(data.iloc[:, : column_num-1].values) y = np.array(data.iloc[:, column_num-1: column_num].values) # 初始化theta theta = np.zeros([1, 2]) print(self.cost(x, y, theta)) final_theta, cost = self.gradient_descent(x, y, theta, 0.01) # print(final_theta, cost) final_cost = self.cost(x, y, final_theta) print(final_theta, final_cost) population = np.linspace(data.Population.min(), data.Population.max(), 100) # 計算相應值以便之後編制曲線 profit = final_theta[0, 0] + (final_theta[0, 1] * population) # matplotlib繪圖 fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(population, profit, 'r', label='Prediction') ax.scatter(data['Population'], data['Profit'], label='TrainingData') # 原始資料散點 ax.legend(loc=4) ax.set_xlabel('Population') ax.set_ylabel('Profit') ax.set_title('PredictionProfit') plt.show() def main(): gd = GradientDescent() gd.run() if __name__ == '__main__': main()

正規方程

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Author: CK
# Date: 2021/1/12
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

abs_path = os.path.dirname(os.path.abspath(sys.argv[0]))
sys.path.append(os.path.dirname(abs_path))
from gradient_descent import GradientDescent


class NormalEquation:
    def __init__(self):
        pass

    @staticmethod
    def normal_equation(x, y):
        """
        正規方程法
        :param x: 資料集
        :param y: 標籤
        :return:
        """
        return np.linalg.pinv(x.T.dot(x)).dot(x.T).dot(y)

    def run(self, x, y, theta):
        """主入口"""
        # 呼叫之前寫的梯度下降法,得到的theta
        gd = GradientDescent()
        final_theta, cost = gd.gradient_descent(x, y, theta, 0.4)
        print(final_theta, gd.cost(x, y, theta))
        # 繪製學習曲線,1000是因為梯度下降預設迭代數量
        plt.plot(range(1000), cost)
        plt.show()
        # 通過正規方程法直接計算得到theta
        ne_theta = self.normal_equation(x, y)
        print(ne_theta.T, gd.cost(x, y, ne_theta.T))


def main():
    ne = NormalEquation()
    data = pd.read_csv(os.path.join(abs_path, 'ex1', 'ex1data2.txt'),
                       names=['Size', 'Bedrooms', 'Price'], header=None)
    column_num = data.shape[1]
    x = data.iloc[:, : column_num-1]
    y = data.iloc[:, column_num-1: column_num]
    # 資料歸一化
    # x_normalization = (x - x.min()) / (x.max() - x.min())
    x_normalization = (x - x.mean()) / x.std()
    x_normalization.insert(0, 'Ones', 1)
    x_set = np.array(x_normalization.values)
    y_set = np.array(y.values)
    theta = np.zeros([1, column_num])
    ne.run(x_set, y_set, theta)


if __name__ == '__main__':
    main()

參考連結

吳恩達機器學習課後習題ex1