幾種常用迴歸演算法的比較
# -*- coding:utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
import random
def text2num(string):
str_list = string.replace("\n", " ").split(" ")
while '' in str_list:
str_list.remove('')
num_list = [float(i) for i in str_list]
return num_list
def sigmoid(x):
return 1.0 / (1 + np.exp(-x))
def data_plot(data_list, weight):
x_data = [list(i[0:2]) for i in data_list if i[2] == 0.0]
y_data = [list(i[0:2]) for i in data_list if i[2] == 1.0]
x_data = np.reshape(x_data, np.shape(x_data))
y_data = np.reshape(y_data, np.shape(y_data))
linear_x = np.arange(-4, 4, 1)
linear_y = (-weight[0] - weight[1] * linear_x) / weight[2]
print(linear_y)
plt.figure(1)
plt.scatter(x_data[:, 0], x_data[:, 1], c='r')
plt.scatter(y_data[:, 0], y_data[:, 1], c='g')
print(linear_x)
print(linear_y.tolist()[0])
plt.plot(linear_x, linear_y.tolist()[0])
plt.show()
def grad_desc(data_mat, label_mat, rate, times):
data_mat = np.mat(data_mat)
label_mat = np.mat(label_mat)
m,n = np.shape(data_mat)
weight = np.ones((n, 1))
for i in range(times):
h = sigmoid(data_mat * weight)
error = h - label_mat
weight = weight - rate * data_mat.transpose() * error
return weight
def random_grad_desc(data_mat, label_mat, rate, times):
data_mat = np.mat(data_mat)
m,n = np.shape(data_mat)
weight = np.ones((n, 1))
for i in range(times):
for j in range(m):
h = sigmoid(data_mat[j] * weight)
error = h - label_mat[j]
weight = weight - rate * data_mat[j].transpose() * error
return weight
def improve_random_grad_desc(data_mat, label_mat, times):
data_mat = np.mat(data_mat)
m,n = np.shape(data_mat)
weight = np.ones((n, 1))
for i in range(times):
index_data = [i for i in range(m)]
for j in range(m):
rate = 0.0001 + 4 / (i + j + 1)
index = random.sample(index_data, 1)
h = sigmoid(data_mat[index] * weight)
error = h - label_mat[index]
weight = weight - rate * data_mat[index].transpose() * error
index_data.remove(index[0])
return weight
def main():
file = open("/Users/chenzu/Documents/code-machine-learning/data/LR", "rb")
file_lines = file.read().decode("UTF-8")
data_list = text2num(file_lines)
data_len = int(len(data_list) / 3)
data_list = np.reshape(data_list, (data_len, 3))
data_mat_temp = data_list[:, 0:2]
data_mat = []
for i in data_mat_temp:
data_mat.append([1, i[0], i[1]])
print(data_mat)
label_mat = data_list[:, 2:3]
#梯度下降求引數
weight = improve_random_grad_desc(data_mat, label_mat, 500)
print(weight)
data_plot(data_list, weight)
if __name__ == '__main__':
main()