1. 程式人生 > 其它 >R語言統計

R語言統計

分佈

# 生成隨機數
rnorm(n = 10, mean = 0, sd = 1) # 正態分佈
rpois(n = 10, lambda = 1) # 泊松分佈
rbinom(n = 10, size = 100, prob = 0.3) # 二項分佈
runif(10, min = 0, max = 1) # 均勻分佈

# 檢視臨界值對應的alpha值
pchisq(3.84, 1)
1 - pchisq(3.84, 1)

# 檢視分佈不同alpha的臨界值
qchisq(0.95, 1)

迴歸分析

# 線性迴歸
fit <- lm(disp~mpg, data = mtcars)

pred <- predict(fit, se.fit = T)
m <- pred$fit
lo <- pred$fit - 1.96*pred$se.fit
up <- pred$fit + 1.96*pred$se.fit
d <- cbind(mtcars$mpg, m, lo, up) %>% as.data.frame()

ggplot(mtcars, aes(mpg, disp)) +
  geom_point() +
  stat_smooth(method = "lm") +
  geom_line(data = d, aes(V1, m), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, lo), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, up), linetype = 2, col = "red")

# logistic迴歸
fit <- glm(vs~mpg, data = mtcars, family = "binomial")

pred <- predict(fit, se.fit = T)
m <- plogis(pred$fit)
lo <- plogis(pred$fit - 1.96*pred$se.fit)
up <- plogis(pred$fit + 1.96*pred$se.fit)
d <- cbind(mtcars$mpg, m, lo, up) %>% as.data.frame()

ggplot(mtcars, aes(mpg, vs)) +
  geom_point() +
  stat_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_line(data = d, aes(V1, m), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, lo), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, up), linetype = 2, col = "red")

# logistic迴歸
fit <- glm(vs~mpg, data = mtcars, family = "binomial")

pred <- predict(fit, se.fit = T, type = "link")
m <- pred$fit 
lo <- pred$fit - 1.96*pred$se.fit 
up <- pred$fit + 1.96*pred$se.fit 
m <- fit$family$linkinv(m)
lo <- fit$family$linkinv(lo)
up <- fit$family$linkinv(up)
d <- cbind(mtcars$mpg, m, lo, up) %>% as.data.frame()

ggplot(mtcars, aes(mpg, vs)) +
  geom_point() +
  stat_smooth(method = "glm", method.args = list(family = "binomial")) +
  geom_line(data = d, aes(V1, m), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, lo), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, up), linetype = 2, col = "red")

Emax模型

library(DoseFinding)
data(IBScovars)

fitemax <- fitMod(dose, resp, data = IBScovars, model = "emax", bnds = c(0.01,5))
plot(fitemax, CI = TRUE, plotData = "meansCI")

newdat <- data.frame(dose = seq(0,4,0.01))
pred <- predict(fitemax, newdata=newdat, predType = "full-model", se.fit = TRUE)
m <- pred$fit
lo <- pred$fit - 1.96*pred$se.fit
up <- pred$fit + 1.96*pred$se.fit
d <- cbind(newdat$dose, m, lo, up) %>% as.data.frame()

ggplot(IBScovars, aes(dose, resp)) +
  geom_line(data = d, aes(V1, m), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, lo), linetype = 2, col = "red") +
  geom_line(data = d, aes(V1, up), linetype = 2, col = "red")