powersim_workshop_2026/05_mixed1/powersim-lmm.R

#' ---
#' title: "Power for mixed-effects models"
#' author: ""
#' date: "Last modified: 2026-01-09"
#' bibliography: ../lit.bib
#' ---

library("lattice")
library("lme4")

#' # Reanalysis

#' ## Application context: Depression and type of diagnosis
#'
#' - @ReisbyGram77 studied the effect of Imipramin on 66 inpatients
#'   treated for depression
#' - Depression was measured with the Hamilton depression rating scale
#' - Patients were classified into endogenous and non-endogenous depressed
#' - Depression was measured weekly for 6 time points
#'
#' Data: [reisby.txt](../data/reisby.txt)

dat      <- read.table("../data/reisby.txt", header = TRUE)
dat$id   <- factor(dat$id)
dat$diag <- factor(dat$diag, levels = c("nonen", "endog"))
dat      <- na.omit(dat)     # drop missing values
head(dat, n = 13)

xyplot(hamd ~ week | id, data = dat, type=c("g", "r", "p"),
  pch = 16, layout = c(11, 6), ylab = "HDRS score", xlab = "Time (week)")


#' ## Random-intercept model
#'
#' $$
#' \begin{aligned}
#' Y_{ij} &= \beta_0 + \beta_1 \, \mathtt{week}_{ij}
#'         + \upsilon_{0i}
#'         + \varepsilon_{ij} \\
#' \upsilon_{0i} &\sim N(0, \sigma^2_{\upsilon_0}) \text{ i.i.d.} \\
#' \mathbf{\varepsilon}_i &\sim N(0, \, \sigma^2)  \text{ i.i.d.} \\
#' i &= 1, \ldots, I, \quad j = 1, \ldots n_i
#' \end{aligned}
#' $$

m1 <- lmer(hamd ~ week + (1 | id), data = dat, REML = FALSE)
summary(m1)

#' ## Random-slope model
#'
#' $$
#' \begin{aligned}
#' Y_{ij} &= \beta_0 + \beta_1 \, \mathtt{week}_{ij}
#'         + \upsilon_{0i} + \upsilon_{1i}\, \mathtt{week}_{ij}
#'         + \varepsilon_{ij} \\
#' \begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} &\sim
#'    N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \,
#'            \mathbf{\Sigma}_\upsilon =
#' \begin{pmatrix}
#'           \sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
#'           \sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
#' \end{pmatrix} \right)
#'       \text{ i.i.d.} \\
#' \mathbf{\varepsilon}_i &\sim N(\mathbf{0}, \, \sigma^2 \mathbf{I}_{n_i})
#'       \text{ i.i.d.} \\
#' i &= 1, \ldots, I, \quad j = 1, \ldots n_i
#' \end{aligned}
#' $$

m2 <- lmer(hamd ~ week + (week | id), data = dat, REML = FALSE)
summary(m2)

#' ## Partial pooling

#+ fig.height = 10, fig.width = 6.5, fig.aling = "center"
indiv <- unlist(
  sapply(unique(dat$id),
         function(i) predict(lm(hamd ~ week, dat[dat$id == i, ])))
)

xyplot(hamd + predict(m2, re.form = ~ 0) + predict(m2) + indiv ~ week | id,
  data = dat, type = c("p", "l", "l", "l"), pch = 16, grid = TRUE,
  distribute.type = TRUE, layout = c(11, 6), ylab = "HDRS score",
  xlab = "Time (week)",
  # customize colors
  col = c("#434F4F", "#3CB4DC", "#FF6900", "#78004B"),
  # add legend
  key = list(space = "top", columns = 3,
             text = list(c("Population", "Mixed model", "Within-subject")),
             lines = list(col = c("#3CB4DC", "#FF6900", "#78004B")))
  )

#' ## By-group random-slope model

m3 <- lmer(hamd ~ week + diag + (week | id), data = dat, REML = FALSE)
m4 <- lmer(hamd ~ week * diag + (week | id), data = dat, REML = FALSE)
anova(m3, m4)

#' ## Means and predicted HDRS score by group

dat2 <- aggregate(hamd ~ week + diag, dat, mean)
dat2$m4 <- predict(m4, newdata = dat2, re.form = ~ 0)

plot(m4 ~ week, dat2[dat2$diag == "endog", ], type = "l",
     ylim=c(0, 28), xlab="Week", ylab = "HDRS score")
lines(m4 ~ week, dat2[dat2$diag == "nonen", ], lty = 2)
points(hamd ~ week, dat2[dat2$diag == "endog", ], pch = 16)
points(hamd ~ week, dat2[dat2$diag == "nonen", ], pch = 21, bg = "white")
legend("topright", c("Endogenous", "Non endogenous"),
       lty = 1:2, pch = c(16, 21), pt.bg = "white", bty = "n")

#' ## Gory details

fixef(m4)
getME(m4, "theta")
t(chol(VarCorr(m4)$id))[lower.tri(diag(2), diag = TRUE)] / sigma(m4)

#' # Power simulation

#' ## Setup

## Study design and sample sizes
n_week <- 6
n_subj <- 80
n <- n_week * n_subj

dat <- data.frame(
     id = factor(rep(seq_len(n_subj), each = n_week)),
   week = rep(0:(n_week - 1), times = n_subj),
  treat = factor(rep(0:1, each = n/2), labels = c("ctr", "trt"))
)

## Fixed effects and variance components
beta <- c(22, -2, 0, -1)
Su <- matrix(c(12, -1.5, -1.5, 2), nrow = 2)
se <- 3.5

#' ## Power

#+ cache = TRUE, warning = FALSE
pval <- replicate(200, {

  # Data generation
  means <- model.matrix( ~ week * treat, dat) %*% beta
  ranu <- MASS::mvrnorm(n_subj, mu = c(0, 0), Sigma = Su)
  e <- rnorm(n_subj * n_week, mean = 0, sd = se)

  y <- means + ranu[dat$id, 1] + ranu[dat$id, 2] * dat$week + e

  # Fitting model to test H0
  m0 <- lmer(y ~ week + treat + (1 + week | id), data = dat, REML = FALSE)
  m1 <- lmer(y ~ week * treat + (1 + week | id), data = dat, REML = FALSE)
  anova(m0, m1)["m1", "Pr(>Chisq)"]
  }
)

mean(pval < 0.05)

#' ## Parameter recovery

#+ cache = TRUE, warning = FALSE
par <- replicate(200, {

  means <- model.matrix( ~ week * treat, dat) %*% beta
  ranu <- MASS::mvrnorm(n_subj, mu = c(0, 0), Sigma = Su)
  e <- rnorm(n_subj * n_week, mean = 0, sd = se)

  y <- means + ranu[dat$id, 1] + ranu[dat$id, 2] * dat$week + e

  m1 <- lmer(y ~ week * treat + (1 + week | id), data = dat, REML = FALSE)
  list(fixef = fixef(m1), theta = getME(m1, "theta"), sigma = sigma(m1))
  }, simplify = FALSE
)

rowMeans(sapply(par, function(x) x$fixef))
rowMeans(sapply(par, function(x) x$theta))
mean(sapply(par, function(x) x$sigma))

beta
Lt <- chol(Su)
t(Lt)[lower.tri(Lt, diag = TRUE)] / se
se

#' ### References