1. 程式人生 > >EM,SEM演算法操作例項:《Statistical Analysis with Missing Data》習題9.1 & 9.2

EM,SEM演算法操作例項:《Statistical Analysis with Missing Data》習題9.1 & 9.2

一、題目

Example 9.1 & 9.2

重現書中Example 9.1與9.2。

先貼出SEM演算法:

SEM

SEM

下面是Example 9.1與Example 9.2原例:

Example 9.1



Example 9.2



二、解答

a)Example 9.1

賦一些初值:

y1 <- 38
y2 <- 34

eps <- 1e-30     # 當新舊兩次\theta的差距小於eps時,迭代停止
theta_t <- 0     # 引數\theta的初值
theta_diff <- 1  # 由於下面使用了while語句,所以需要給兩個\theta的差賦初值

下面開始進行第一次迭代迴圈,估計引數並實時輸出,直至新舊兩次 θ \theta 的差距小於eps時,迭代停止。

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  theta_t <- theta_t_new
  print(theta_t)
}

經過EM演算法迭代,估計出的 θ \theta

(theta_hat <- theta_t)

下面進行DM的估計(注意:這裡設定的eps誤差是 1 0

8 10^{-8} ,而前面是 1 0 30 10^{-30} 。因為必須要前面EM的精度要遠高於後面SEM的精度,後面估計DM時,才能穩定收斂,不然SEM只有中間一部分穩定。同樣的在做EX9.2時,也用了相同的處理方法):

# 設定初值
eps <- 1e-8
theta_t <- 0
theta_diff <- 1

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  
  DM <- (theta_t_new - theta_hat) / (theta_t - theta_hat) # 這一步是相對於前面EM多出來的一步
  theta_t <- theta_t_new
  print(DM) # 看DM的迭代過程
}

最後我們進行 V c o m V_{com} 的估計,結果與書上一致。

y3_hat <- 125 * (theta_hat / 4) / (theta_hat / 4 + 0.5)
V_com <- theta_hat * (1 - theta_hat) / (y1 + y2 + y3_hat)
(V_obs <- V_com / (1 - DM))

下面計算進行logit變換後,重新估計引數與 V o b s V_{obs}

# 定義變換函式
logit <- function(x) return(log(x / (1 - x)))

# 重新估計引數,直接進行變換
(logit(theta_hat))

重新進行SEM迭代與估計:

# 引數初始化
eps <- 1e-8
theta_t <- 0
theta_diff <- 1

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  
  DM_logit <- (logit(theta_t_new) - logit(theta_hat)) / (logit(theta_t) - logit(theta_hat)) # 注意,這裡全部進行變換
  theta_t <- theta_t_new
  print(DM_logit)
}

y3_hat <- 125 * (theta_hat / 4) / (theta_hat / 4 + 0.5)

# 要格外注意,這裡有Fisher資訊量的變換公式,是需要除以變換函式導數的平方
V_com <- 1 / (y1 + y2 + y3_hat) / (theta_hat * (1 - theta_hat)) 
(V_obs <- V_com / (1 - DM_logit))

最後的估計結果與書上相差一個量級,感覺可能是書上寫錯了。

b)Example 9.2

首先載入兩個在計算過程中需要用到的兩個包:

library(matlib) # sweep operator 需要用到
library(gtools) # permutations 需要用到

初始化一些引數,與EM裡面涉及到的變換需要用到的函式。

# 定義y1, y2
y1 <- c(8, 6, 11, 22, 14, 17, 18, 24, 19, 23, 26, 40, 4, 4, 5, 6, 8, 10)
y2 <- c(59, 58, 56, 53, 50, 45, 43, 42, 39, 38, 30, 27, rep(NA, 6))
ind_na <- which(is.na(y2))

# 初始化引數
n <- length(y1)
p <- 2
eps <- 1e-20

# 定義轉換函式
Z <- function(rho) {
  return(0.5 * log((1 + rho) / (1 - rho)))
}

# 定義逆變換函式
Z_inv <- function(z) {
  return((exp(2 * z) - 1) / (exp(2 * z) + 1))
}

EM計算構建函式(按照課本第十一章高維正態的公式進行編寫):

EM1 <- function(param) {
  # 轉化引數,主要需要注意\rho,\sigma_1,\sigma_2之間的關係
  param_trans <- param
  param_trans[3] <- exp(param[3])
  param_trans[5] <- exp(param[4])
  param_trans[4] <- Z_inv(param[5]) * (param_trans[3] * param_trans[5]) ^ (1/2)

  # 輸入引數
  param_mat <- matrix(nrow = p + 1, ncol = p + 1)
  param_mat[lower.tri(param_mat, diag = T)] <- c(-1, param_trans)
  param_mat[upper.tri(param_mat)] <- param_mat[lower.tri(param_mat)]
  
  # sweep operator
  swp_result <- swp(param_mat, 2)
  
  y2_new <- y2
  y2_new[ind_na] <- swp_result[1, 3] + swp_result[2, 3] * y1[ind_na] 
  
  mu1 <- mean(y1)
  mu2 <- mean(y2_new)
  
  sigma11 <- cov(y1, y1) * (n - 1) / n
  sigma12 <- cov(y1, y2_new) * (n - 1) / n
  sigma22 <- cov(y2_new, y2_new) * (n - 1) / n + swp_result[3, 3] * 6 / n # 後面那塊兒很關鍵,之前忘記加了,結果一直輸出錯誤
  
  mu2_diff <- abs(param_trans[2] - mu2)
  
  # 引數轉換
  param <- c(mu1, mu2, log(sigma11), log(sigma22), Z(sigma12 * (sigma11 * sigma22) ^ (-1/2)))
  
  return(list(param = param, mu2_diff = mu2_diff))
}

進行引數估計(如果中間加個print顯示迭代輸出過程,可以發現 μ 1 \mu_1 log ( σ 11 ) \text{log}(\sigma_{11}) 一步就會收斂):

param <- 1:5
mu2_diff <- 1
while(mu2_diff > eps) {
  EM_result <- EM1(param)
  param <- EM_result$param
  
  # print(param)
  mu2_diff <- EM_result$mu2_diff
}

(final_param <- param)

輸出結果與書上一致。下面進行DM的估計以及 Δ V \Delta V^* 的估計:

SEM <- function(param, i, j) {
  # 轉化引數
  param_trans <- param
  param_trans[3] <- exp(param[3])
  param_trans[5] <- exp(param[4])
  param_trans[4] <- Z_inv(param[5]) * (param_trans[3] * param_trans[5]) ^ (1/2)
  
  # 輸入引數
  param_mat <- matrix(nrow = p + 1, ncol = p + 1)
  param_mat[lower.tri(param_mat, diag = T)] <- c(-1, param_trans)
  param_mat[upper.tri(param_mat)] <- param_mat[lower.tri(param_mat)]
  
  # sweep operator
  swp_result <- swp(param_mat, 2)
  
  y2_new <- y2
  y2_new[ind_na] <- swp_result[1, 3] + swp_result[2, 3] * y1[ind_na]
  
  mu1 <- mean(y1)
  mu2 <- mean(y2_new)
  
  sigma11 <- cov(y1, y1) * (n - 1) / n
  sigma12 <- cov(y1, y2_new) * (n - 1) / n
  sigma22 <- cov(y2_new, y2_new) * (n - 1) / n + swp_result[3, 3] * 6 / n
  
  mu2_diff <- abs(param_trans[2] - mu2)
  param <- c(mu1, mu2, log(sigma11), log(sigma22), Z(sigma12 * (sigma11 * sigma22) ^ (-1/2)))
  
  # SEM核心新增的步驟
  theta_t_i <- final_param
  theta_t_i[i] <- param[i]
  theta_t1_i_j <- EM1(theta_t_i)$param[j]
  r_ij <- (theta_t1_i_j - final_param[j]) / (param[i] - final_param[i])
  
  return(list(param = param, mu2_diff = mu2_diff, r_ij = r_ij))
}


DM <- function(x) {
  i <- x[1]
  j <- x[2]
  
  param_all <- param <- 1:5
  mu2_diff <- 1
  eps <- 1e-5
  
  while(mu2_diff > eps) {
    EM_result <- SEM(param, i, j)
    param <- EM_result$param
    
    # print(EM_result$r_ij)
    mu2_diff <- EM_result$mu2_diff
  }
  return(EM_result$r_ij)
}

檢視最終輸出的DM矩陣:

ind_param <- permutations(5, 2, 1:5, repeats = TRUE)
ind3 <- c(2, 4, 5)
(DM_mat <- matrix(apply(ind_param, 1, DM), 5, byrow = TRUE)[ind3, ind3])

找到SEM的原始論文,核對後發現DM估計與論文上的結果一致。

G1 <- matrix(c(4.9741, 0, 0, 0.1111), 2)
G2 <- matrix(c(-5.0387, 0, 0, 0.0890, 0, -0.0497), 2)
G3 <- matrix(c(6.3719, 0, 0, 0, 0.1111, -0.0497, 0, -0.0497, 0.0556), 3)

(Delta_V_star <- (G3 - t(G2) %*% solve(G1) %*% G2) %*% DM_mat %*% solve(diag(p + 1) - DM_mat))
# round(Delta_V_star, 3)

最後計算 Δ V \Delta V^* ,與書上結果幾乎一致。(理論上應該是對稱矩陣,顯示有一點點不對稱應該是由於計算精度造成,保留小數點後3位看起來就是對稱的了)