1. 程式人生 > >邏輯迴歸之考試是否錄取

邏輯迴歸之考試是否錄取

一、其中LogisticRegression是自己寫的模組,該模組是波士頓房價預測裡的程式碼 波士頓房價預測

二、完全程式碼

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from LogisticRegression import gradDescent,cost_function,accuracy,feature_scalling

def load_data():
    data = pd.read_csv('./data/LogiReg_data.txt', names=['exam1', 'exam2', 'label']).as_matrix()#加names是新增pa格式的列的標籤,as_matrix是做成numpy格式的資料,沒有了橫和豎的標籤
    X = data[:, :-1]  # 取前兩列
    y = data[:, -1:]  # 取最後一列
    print(X)
    print(X.shape)#100*2
    print(X.shape[0])#100
    shuffle_index = np.random.permutation(X.shape[0])#其中X.shape[0]=100,這一行的作用是將列的索引打亂。np.random.permutation為隨機打亂函式
    print(shuffle_index)#從0到99隨機打亂的陣列
    X = X[shuffle_index]
    print(X)
    y = y[shuffle_index]
    return X, y#得到打亂的特徵X和打亂的標籤y


def visualize_data(X, y):
    positive = np.where(y == 1)[0]#得到正樣本的索引
    negative = np.where(y == 0)[0]#得到負樣本的索引
    plt.scatter(X[positive,0],X[positive,1],s=30,c='b',marker='o',label='Admitted')
    plt.scatter(X[negative,0],X[negative,1],s=30,c='r',marker='o',label='Not Admitted')
    plt.legend()#用於顯示標註admitted和not admitted
    plt.show()

def visualize_cost(ite,cost):#用來畫出代價函式和迭代次數曲線
    plt.plot(np.linspace(0,ite,ite),cost,linewidth=1)#np.linspace(0,ite,ite)表均分為#10000份,不是橫座標顯示出一萬份,因為cost對應有10000份,所以np.linspace也應該有10000份
    plt.title('cost history',color='r')
    plt.xlabel('iterations')
    plt.ylabel('cost J')
    plt.show()


if __name__ == '__main__':
    # Step 1.  Load data
    X, y = load_data()
    # Step 2.  Visualize data
    visualize_data(X, y)
    #
    m, n = X.shape#100*2
    X = feature_scalling(X)
    alpha = 0.1
    W = np.random.randn(n, 1)
    b = 0.1
    maxIt = 10000
    W, b, cost_history = gradDescent(X, y, W, b, alpha, maxIt)
    print("******************")
    print(cost_history[:20])
    visualize_cost(maxIt,cost_history)
    print("accuracys is :         " + str(accuracy(X, y, W, b)))
    print("W:",W)
    print("b: ",b)
    print("******************")

三、資料

https://github.com/TolicWang/MachineLearningWithMe/blob/master/Lecture_02/data/LogiReg_data.txt

四、用sklearn來實現

程式碼:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from LogisticRegression import feature_scalling
from sklearn.linear_model import LogisticRegression

def load_data():
    data = pd.read_csv('./data/LogiReg_data.txt', names=['exam1', 'exam2', 'label']).as_matrix()
    X = data[:, :-1]  # 取前兩列
    y = data[:, -1:]  # 取最後一列
    shuffle_index = np.random.permutation(X.shape[0])
    X = X[shuffle_index]
    y = y[shuffle_index]
    return X, y


def visualize_cost(ite,cost):
    plt.plot(np.linspace(0,ite,ite),cost,linewidth=1)
    plt.title('cost history',color='r')
    plt.xlabel('iterations')
    plt.ylabel('cost J')
    plt.show()


if __name__ == '__main__':
    X, y = load_data()
    X = feature_scalling(X)
    lr = LogisticRegression()
    lr.fit(X,y)
    print("******************")
    print("accuracys is :" ,lr.score(X,y))
    print("W:{},b:{}".format(lr.coef_,lr.intercept_))
    print("******************")