R語言主成分分析之SVD
阿新 • • 發佈:2019-01-06
#全資料集PCA all_col_mean = colMeans(data.learn.x) #計算訓練集每一列的均值 data.learn.PCAx = data.learn.x cols = ncol(data.learn.x) #獲取列數 all_col_sd = apply(data.learn.x,2,sd) for (j in 1:cols){ data.learn.PCAx[,j] = data.learn.x[,j] - all_col_mean[j] data.learn.PCAx[,j] = data.learn.PCAx[,j]/all_col_sd[j] } #對訓練集每一列特徵值進行標準化 data.learn.cov <- cov(data.learn.PCAx,data.learn.PCAx)#求協方差矩陣 data.learn.svd = svd(data.learn.cov)#SVD分解為 U d V all_U <- data.learn.svd$u[,1:REDUCTION] #保留REDUCTION維,約一半 lamda = 1/sqrt(data.learn.svd$d) #計算方差倒數 lamda = lamda[1:REDUCTION] #選擇前REDUCTION維 for (i in 1:REDUCTION){ all_U[,i] <- all_U[,i] * lamda[i] #ZCA白化 } data.learn.PCAx = data.learn.PCAx%*%all_U #原特徵正交旋轉並降維 colnames(data.learn.PCAx) = c("V1","V2","V3","V4") data.valid.PCAx = data.valid.x for (j in 1:cols){ data.valid.PCAx[,j] = data.valid.x[,j] - all_col_mean[j] data.valid.PCAx[,j] = data.valid.PCAx[,j]/all_col_sd[j] } #對測試集每一列特徵值進行標準化 data.valid.PCAx = data.valid.PCAx%*%all_U #原特徵正交旋轉並降維 colnames(data.valid.PCAx) = c("V1","V2","V3","V4")