1. 程式人生 > >R語言主成分分析之SVD

R語言主成分分析之SVD

 #全資料集PCA
    all_col_mean = colMeans(data.learn.x) #計算訓練集每一列的均值
    data.learn.PCAx = data.learn.x
    cols = ncol(data.learn.x) #獲取列數
    all_col_sd = apply(data.learn.x,2,sd)
    for (j in 1:cols){
      data.learn.PCAx[,j] =  data.learn.x[,j] - all_col_mean[j]
      data.learn.PCAx[,j] = data.learn.PCAx[,j]/all_col_sd[j]
    } #對訓練集每一列特徵值進行標準化
    
    data.learn.cov <- cov(data.learn.PCAx,data.learn.PCAx)#求協方差矩陣
    data.learn.svd = svd(data.learn.cov)#SVD分解為 U d V
    all_U <- data.learn.svd$u[,1:REDUCTION] #保留REDUCTION維,約一半
    lamda = 1/sqrt(data.learn.svd$d) #計算方差倒數
    lamda = lamda[1:REDUCTION] #選擇前REDUCTION維
    for (i in 1:REDUCTION){
      all_U[,i] <-  all_U[,i] * lamda[i] #ZCA白化
    }
    data.learn.PCAx = data.learn.PCAx%*%all_U #原特徵正交旋轉並降維
    colnames(data.learn.PCAx) = c("V1","V2","V3","V4")
    
    data.valid.PCAx = data.valid.x
    for (j in 1:cols){
      data.valid.PCAx[,j] =  data.valid.x[,j] - all_col_mean[j]
      data.valid.PCAx[,j] = data.valid.PCAx[,j]/all_col_sd[j]
    } #對測試集每一列特徵值進行標準化
    data.valid.PCAx = data.valid.PCAx%*%all_U #原特徵正交旋轉並降維    
    colnames(data.valid.PCAx) = c("V1","V2","V3","V4")