PCA降維的python實現
阿新 • • 發佈:2019-02-06
#-*- coding:utf-8 -*-
from numpy import *
import matplotlib.pyplot as plt
def pca(data):
# 減去均值
m = mean(data,axis=0)
data -= m
print m
# 協方差矩陣
C = cov(transpose(data))
# 計算特徵值特徵向量,按降序排序
evals,evecs = linalg.eig(C)
print evals,evecs
indices = argsort(evals) #得到的是索引值
indices = indices[::-1]#降序
print 'indices',indices
evecs = evecs[:,indices]
print evecs
evals = evals[indices]
print evals
# 產生新的資料矩陣
x = dot(transpose(evecs),transpose(data))
print x
# 重新計算原資料
y=transpose(dot(evecs,x))+m
return x,y,evals,evecs
x = random.normal(5 ,.5,1000)
y = random.normal(3,1,1000)
a = x*cos(pi/4) + y*sin(pi/4)
b = -x*sin(pi/4) + y*cos(pi/4)
print a
plt.plot(a,b,'r.')
plt.xlabel('x')
plt.ylabel('y')
plt.title('raw dataset')
data = zeros((1000,2))
data[:,0] = a
data[:,1] = b
x,y,evals,evecs = pca(data)
print evecs
plt.figure()
plt.plot(y[:,0],y[:,1],'.' )
plt.xlabel('x')
plt.ylabel('y')
plt.title('new dataset')
plt.show()