手寫 KMeans演算法並繪製動圖
阿新 • • 發佈:2021-01-20
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import imageio
def Kmeans(center): #【【】,【】,【】】
#3. 第一聚類:
for i in range(3):
#計算每個點與三個類中心的距離:
distance = np.sqrt(((x_train - center[i, :])**2).sum(axis=1))
columns_name = '類{}' .format(i)
data[columns_name] = distance
#4. 完成第一次聚類結果:
data['custer'] = data.loc[:, ['類0', '類1', '類2']].idxmin(axis=1)
#5.更新類中心點:
new_center = data.loc[:, ['平均消費週期(天)', '平均每次消費金額', 'custer']].groupby(by='custer').mean().values
return new_center
#繪圖
def plot_image(times, data):
plt.figure()
for i in ['類0', '類1', '類2']:
x = data.loc[data['custer']==i, '平均消費週期(天)']
y = data.loc[data['custer']==i, '平均每次消費金額']
plt.scatter(x, y)
image_name = '第{}次聚類結果.png'.format(times)
plt.savefig(image_name)
plt.close('all')
return image_name
if __name__ == '__main__':
data = pd.read_csv('company.csv', encoding='gbk')
#0. 生成圖片的list:
image_list = []
# 1. 挑選出特徵值:
x_train = data.loc[:, ['平均消費週期(天)', '平均每次消費金額']]
# 2.給出初始聚類中心: k=3
center = np.array([[10, 100], [20, 200], [30, 300]])
#3.完成第一次聚類
new_center = Kmeans(center)
print(data)
times = 1
image_name=plot_image(times, data)
image_list.append(image_name)
#第一次跌代
while True:
if (center==new_center).all():
print(times)
break
times+=1
center = new_center.copy()
new_center = Kmeans(center)
image_name = plot_image(times, data)
image_list.append(image_name)
#讀取圖片
frame_list = []
for image_name in image_list:
#讀取圖片:
im = imageio.imread(image_name)
#存放所有讀進來的圖:
frame_list.append(im)
#生產gif圖片
# duration: 兩張圖片間的間隔,建議寫0.2 - 0.5,
duration= 0.7
imageio.mimsave('聚類結果.gif', frame_list, 'GIF', duration=round(duration, 2))