1. 程式人生 > >用TSNE進行資料降維並展示聚類結果

用TSNE進行資料降維並展示聚類結果

TSNE提供了一種有效的資料降維方式,讓我們可以在2維或3維的空間中展示聚類結果。

# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from sklearn.manifold import TSNE
import pandas as pd
import matplotlib.pyplot as mp

inputfile = 'data/consumption_data.xls'
outputfile = 'tmp/data_type3.xls'
data = pd.read_excel(inputfile, index_col='
Id') data_zs = 1.0 * (data - data.mean()) / data.std() from sklearn.cluster import KMeans k = 3 iteration = 800 model = KMeans(n_clusters=k, n_jobs =4, max_iter=iteration) # n_jobs 併發數設定為4 model.fit(data_zs) # 開始聚類 r1 = pd.Series(model.labels_).value_counts() # 統計各個類別的數目 r2 = pd.DataFrame(model.cluster_centers_) #
找出聚類中心 r = pd.concat([data,pd.Series(model.labels_,index = data.index)],axis=1) r.columns = list(data.columns) + [r'聚類類別'] tsne = TSNE() tsne.fit_transform(data_zs) # 進行資料降維 tsne = pd.DataFrame(tsne.embedding_, index=data_zs.index) mp.rcParams['font.sans-serif'] = ['SimHei'] mp.rcParams['axes.unicode_minus
'] = False d = tsne[r[r'聚類類別'] == 0] mp.plot(d[0], d[1], 'r.') d = tsne[r[r'聚類類別'] == 1] mp.plot(d[0], d[1], 'go') d = tsne[r[r'聚類類別'] == 2] mp.plot(d[0], d[1], 'b*') mp.show()

執行結果: