python實現簡單的kmeans聚類演算法
阿新 • • 發佈:2019-02-02
問題描述:一堆二維資料,用kmeans演算法對其進行聚類,下面例子以分k=3為例。
原資料:
1.5,3.1
2.2,2.9
3,4
2,1
15,25
43,13
32,42
0,0
8,9
12,5
9,12
11,8
22,33
24,25
實現程式碼:
#coding:utf-8 from numpy import * import string import math def loadDataSet(filename): dataMat = [] fr = open(filename) for line in fr.readlines(): element = line.strip('\n').split(',') number = [] for i in range(len(element)): number.append(string.atof(element[i])) dataMat.append(number) return dataMat def distEclud(vecA, vecB): count = len(vecA) s = 0.0 for i in range(0, count): s = s + power(vecA[i]-vecB[i], 2) return sqrt(s) def clusterOfElement(means, element): min_dist = distEclud(means[0], element) lable = 0 for index in range(1, len(means)): dist = distEclud(means[index], element) if(dist < min_dist): min_dist = dist lable = index return lable def getMean(cluster): #cluster=[[[1,2],[1,2],[1,2]....],[[2,1],[2,1],[2,1],[2,1]...]] num = len(cluster) #1個簇的num,如上為3個 res = [] temp = 0 dim = len(cluster[0]) for i in range(0, dim): for j in range(0, num): temp = temp + cluster[j][i] temp = temp / num res.append(temp) return res def kMeans(): k = 3 data = loadDataSet('data.txt') print "data is ", data inite_mean = [[1.1, 1], [1, 1],[1,2]] count = 0 while(count < 1000): count = count + 1 clusters = [] means = [] for i in range(k): clusters.append([]) means.append([]) for index in range(len(data)): lable = clusterOfElement(inite_mean, data[index]) clusters[lable].append(data[index]) for cluster_index in range(k): mea = getMean(clusters[cluster_index]) for mean_dim in range(len(mea)): means[cluster_index].append(mea[mean_dim]) for mm in range(len(means)): for mmm in range(len(means[mm])): inite_mean[mm][mmm] = means[mm][mmm] print "result cluster is ", clusters print "result means is ", inite_mean kMeans()