簡單聚類
阿新 • • 發佈:2019-01-08
#學習自機器學習實戰 from numpy import * def loadDataSet(fileName): dataMat=[] fr=open(fileName) for line in fr.readlines(): curLine=line.strip().split('\t') #curLine=float(curLine) dataMat.append(curLine) return dataMat def distEclud(vecA,vecB): n=shape(vecA)[1] print("n=" ,n) s=0.0 for i in range(n): a=vecA[0, i] print("a=" ,a) b=float(vecB[i]) print("b=" ,b) s+=sqrt(power(a-b,2)) print(s) return s def randCent(dataSet,k): dataMat=array(dataSet) #print("dataMat:" ,dataMat) n=shape(dataMat)[1] centriose=mat(zeros((k,n))) for j in range(n): minJ=float(min(dataMat[:,j])) #print(minJ) maxJ=float(max(dataMat[:,j])) #print(maxJ) rangeJ=float(maxJ)-float(minJ) centriose[:,j]=minJ+rangeJ*random.rand(k,1) return centriose def changtofloat(dataSet): m,n=shape(dataSet) dset=mat(zeros((m,n))) for i in range(m): for j in range(n): dset[i,j]=float(dataSet[i,j]) print(dset) return dset def kMeans(dataset,k,disMeans=distEclud,createCent=randCent): dataSet=array(dataset) m=shape(dataSet)[0] clusterAssment=mat(zeros((m,2))) cent=createCent(dataset,k) clusterChanged=True while clusterChanged: clusterChanged=False# unchange for i in range(m): minDict=inf;minIndex=-1#initilize for j in range(k):#find the minest distance distJI=disMeans(cent[j,:],dataSet[i,:]) if distJI<minDict: minDict=distJI minIndex=j if clusterAssment[i,0]!=minIndex:clusterChanged=True#update the class clusterAssment[i,:]=minIndex,minDict**2 dsett=changtofloat(dataSet) for cnt in range(k): pstInClust=dsett[nonzero(clusterAssment[:,0].A==cnt)[0]] cent[cnt,:]=mean(pstInClust,axis=0)#update the center return cent,clusterAssment