790 lines
26 KiB
TeX
790 lines
26 KiB
TeX
\documentclass[aspectratio=169]{beamer}
|
|
|
|
\usepackage{listings}
|
|
\usepackage[utf8,latin1]{inputenc}
|
|
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
|
\usepackage{tikz}
|
|
\addbibresource{lit.bib}
|
|
|
|
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
|
|
|
\beamertemplatenavigationsymbolsempty
|
|
\setbeamertemplate{itemize items}[circle]
|
|
\setbeamertemplate{section in toc}[circle]
|
|
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
|
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
|
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
|
|
|
% Definitions for biblatex
|
|
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
|
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
|
\setbeamertemplate{bibliography item}{}
|
|
|
|
\definecolor{iwmorange}{RGB}{255,105,0}
|
|
\definecolor{iwmgray}{RGB}{67,79,79}
|
|
\definecolor{iwmblue}{RGB}{60,180,220}
|
|
\definecolor{iwmgreen}{RGB}{145,200,110}
|
|
\definecolor{iwmpurple}{RGB}{120,0,75}
|
|
|
|
\setbeamercolor{title}{fg=iwmorange}
|
|
\setbeamercolor{frametitle}{fg=iwmorange}
|
|
\setbeamercolor{structure}{fg=iwmorange}
|
|
\setbeamercolor{normal text}{fg=iwmgray}
|
|
\setbeamercolor{author}{fg=iwmgray}
|
|
\setbeamercolor{date}{fg=iwmgray}
|
|
|
|
\lstset{language = R,%
|
|
basicstyle = \ttfamily\color{iwmgray},
|
|
frame = single,
|
|
rulecolor = \color{iwmgray},
|
|
commentstyle = \slshape\color{iwmgreen},
|
|
keywordstyle = \bfseries\color{iwmgray},
|
|
identifierstyle = \color{iwmpurple},
|
|
stringstyle = \color{iwmblue},
|
|
numbers = none,%left,numberstyle = \tiny,
|
|
basewidth = {.5em, .4em},
|
|
showstringspaces = false,
|
|
emphstyle = \color{red!50!white}}
|
|
|
|
\title{Introduction to mixed-effects models}
|
|
\subtitle{(for longitudinal data)}
|
|
\author{Nora Wickelmaier}
|
|
\institute{\includegraphics[scale=.2]{../figures/iwm_logo_rgb}}
|
|
\date{2026-04-28}
|
|
|
|
\AtBeginSection[]{
|
|
\frame{
|
|
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
|
|
|
\setbeamertemplate{headline}{
|
|
\begin{beamercolorbox}{section in head}
|
|
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
|
\end{beamercolorbox}
|
|
}
|
|
|
|
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
|
|
|
\begin{document}
|
|
|
|
\begin{frame}{}
|
|
\thispagestyle{empty}
|
|
\titlepage
|
|
\end{frame}
|
|
|
|
\begin{frame}{Plan for today}
|
|
\begin{itemize}[<+->]
|
|
\item We will walk through an example for a longitudinal data set (6 time
|
|
points per subject)
|
|
\item I will explain the general concepts with the slides
|
|
\item We will switch to R and use the lme4 package to fit the models
|
|
\item You will use R to fit the model and try out different specifications
|
|
\item We will discuss the results
|
|
\item All the materials are here:
|
|
\url{https://gitea.iwm-tuebingen.de/nwickelmaier/lead_longitudinal}
|
|
\\~\\
|
|
\item[$\to$] Try to code along in R! Ask as many questions as possible
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Outline}
|
|
\tableofcontents
|
|
\end{frame}
|
|
|
|
\section[Introduction]{Introduction to longitudinal data}
|
|
|
|
\begin{frame}[fragile]{Longitudinal data}
|
|
\begin{itemize}
|
|
\item Consist of repeated measurements on the same subject taken over time
|
|
\item Are a frequent use case for mixed-effects models
|
|
\item Contain time as a predictor:
|
|
time trends within and between subjects are of interest
|
|
\end{itemize}
|
|
\begin{lstlisting}
|
|
library("lme4")
|
|
data("sleepstudy")
|
|
?sleepstudy
|
|
str(sleepstudy)
|
|
summary(sleepstudy)
|
|
head(sleepstudy)
|
|
\end{lstlisting}
|
|
\nocite{Bates10lme4, Bates15}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Sleep study}
|
|
\begin{itemize}
|
|
\item Average reaction time per day for subjects in a sleep deprivation
|
|
study
|
|
\item On day 0, the subjects had their normal amount of sleep
|
|
\item Starting that night they were restricted to 3 hours of sleep per
|
|
night
|
|
\item Observations represent the average reaction time on a series of
|
|
tests given each day to each subject
|
|
\end{itemize}
|
|
|
|
\vfill
|
|
A data frame with 180 observations on the following 3 variables\\~\\
|
|
|
|
\begin{tabular}{ll}
|
|
\hline
|
|
\texttt{Reaction} & Average reaction time (ms) \\
|
|
\texttt{Days} & Number of days of sleep deprivation \\
|
|
\texttt{Subject} & Subject number on which the observation was made \\
|
|
\hline
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}[fragile]{Visualization of data}
|
|
\begin{columns}
|
|
\begin{column}{.53\textwidth}
|
|
\includegraphics[scale=.8]{../figures/sleep_box}
|
|
\end{column}
|
|
\begin{column}{.55\textwidth}
|
|
\begin{lstlisting}
|
|
boxplot(Reaction ~ Days, sleepstudy)
|
|
\end{lstlisting}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Visualization of individual data}
|
|
\begin{columns}
|
|
\begin{column}{.53\textwidth}
|
|
\includegraphics[scale=.5]{../figures/sleep_subjects}
|
|
\end{column}
|
|
\begin{column}{.55\textwidth}
|
|
\begin{lstlisting}
|
|
library("lattice")
|
|
|
|
xyplot(Reaction ~ Days | Subject,
|
|
data = sleepstudy,
|
|
type = c("g", "b"),
|
|
xlab = "Days of sleep deprivation",
|
|
ylab = "Average reaction time (ms)",
|
|
aspect = "xy")
|
|
\end{lstlisting}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Random intercept model}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item The random intercept model adds a random intercept for each subject
|
|
\[
|
|
y_{ij} = \beta_0 + \beta_1\,Days_{ij} + \upsilon_{0i} + \varepsilon_i
|
|
\]
|
|
with $\upsilon_{0i} \overset{iid}{\sim} N(0, \sigma^2_{\upsilon})$,
|
|
$\varepsilon_{ij} \overset{iid}{\sim} N(0, \sigma^2_\varepsilon)$
|
|
\item The slope is identical for each subject (and the population)
|
|
\end{itemize}
|
|
\vspace{2.2cm}
|
|
\end{column}
|
|
\begin{column}{.5\textwidth}
|
|
\includegraphics[scale=.5]{../figures/sleep_random_intercept}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}{Fixed and random effects for random intercept model}
|
|
% m1 <- lmer(Reaction ~ Days + (1 | Subject), sleepstudy)
|
|
% tab <- cbind(
|
|
% sleepstudy[, c("Subject", "Days", "Reaction")],
|
|
% yhat = fixef(m1)["(Intercept)"] +
|
|
% fixef(m1)["Days"] * sleepstudy[["Days"]] +
|
|
% ranef(m1)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]] +
|
|
% resid(m1),
|
|
% Intercept = fixef(m1)["(Intercept)"],
|
|
% Slope = fixef(m1)["Days"] * sleepstudy[["Days"]],
|
|
% SubInt = ranef(m1)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]],
|
|
% Resid = resid(m1)
|
|
% )
|
|
% tab <- cbind(
|
|
% sleepstudy[, c("Subject", "Days", "Reaction")],
|
|
% Intercept = fixef(m1)["(Intercept)"] |> unname(),
|
|
% Slope = fixef(m1)["Days"] |> unname(),
|
|
% SubInt = ranef(m1)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]],
|
|
% Resid = resid(m1)
|
|
% )
|
|
% print(
|
|
% xtable(tab[tab$Subject %in% c("308", "309", "310"), ],
|
|
% digits = c(NA, 0, 0, 0, 1, 1, 1, 1)),
|
|
% include.rownames = FALSE, math.style.negative = TRUE)
|
|
% )
|
|
% latex table generated in R 4.5.2 by xtable 1.8-8 package
|
|
% Thu Mar 5 11:18:32 2026
|
|
\centering\footnotesize
|
|
\begin{tabular}{ccccccc}
|
|
\hline
|
|
Subject & Days & Reaction & Fixed & & Random & \\
|
|
\cline{4-5}
|
|
\cline{6-7}
|
|
& & & Intercept & Slope & SubInt & Resid \\
|
|
\hline
|
|
308 & 0 & 250 & 251.4 & 10.5 & 40.8 & $-$42.6 \\
|
|
308 & 1 & 259 & 251.4 & 10.5 & 40.8 & $-$44.0 \\
|
|
308 & 2 & 251 & 251.4 & 10.5 & 40.8 & $-$62.3 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots\\
|
|
308 & 8 & 431 & 251.4 & 10.5 & 40.8 & 54.7 \\
|
|
308 & 9 & 466 & 251.4 & 10.5 & 40.8 & 80.0 \\
|
|
\hline
|
|
309 & 0 & 223 & 251.4 & 10.5 & $-$77.8 & 49.2 \\
|
|
309 & 1 & 205 & 251.4 & 10.5 & $-$77.8 & 21.2 \\
|
|
309 & 2 & 203 & 251.4 & 10.5 & $-$77.8 & 8.5 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots\\
|
|
\hline
|
|
310 & 0 & 199 & 251.4 & 10.5 & $-$63.1 & 10.8 \\
|
|
310 & 1 & 194 & 251.4 & 10.5 & $-$63.1 & $-$4.4 \\
|
|
310 & 2 & 234 & 251.4 & 10.5 & $-$63.1 & 25.1 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots\\
|
|
\hline
|
|
& & & & & $\sigma^2_{\upsilon}$ & $\sigma^2_\varepsilon$\\
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Random slope model}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item The random slope model adds a random intercept and a random slope for each
|
|
subject
|
|
\[
|
|
y_{ij} = \beta_0 + \beta_1\,Days_{ij} + \upsilon_{0i} +
|
|
\upsilon_{1i}\,Days_{ij} + \varepsilon_{ij}
|
|
\]
|
|
with
|
|
\begin{align*}
|
|
\begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} &\overset{iid}{\sim}
|
|
N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \mathbf{\Sigma}_\upsilon =
|
|
\begin{pmatrix}
|
|
\sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
|
|
\sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
|
|
\end{pmatrix} \right)
|
|
\\
|
|
\varepsilon_{ij} & \overset{iid}{\sim} N(0, \sigma^2_\varepsilon)
|
|
\end{align*}
|
|
\vspace{-.5cm}
|
|
\item Individual slopes for each subject
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}{.5\textwidth}
|
|
\includegraphics[scale=.5]{../figures/sleep_random_slope}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}{Fixed and random effects for random slope model}
|
|
% m2 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy)
|
|
% tab <- cbind(
|
|
% sleepstudy[, c("Subject", "Days", "Reaction")],
|
|
% yhat = fixef(m2)["(Intercept)"] +
|
|
% fixef(m2)["Days"] * sleepstudy[["Days"]] +
|
|
% ranef(m2)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]] +
|
|
% ranef(m2)[["Subject"]][["Days"]][sleepstudy[["Subject"]]] *
|
|
% sleepstudy[["Days"]] +
|
|
% resid(m2),
|
|
% Intercept = fixef(m2)["(Intercept)"],
|
|
% Slope = fixef(m2)["Days"] * sleepstudy[["Days"]],
|
|
% SubInt = ranef(m2)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]],
|
|
% SubSlp = ranef(m2)[["Subject"]][["Days"]][sleepstudy[["Subject"]]] *
|
|
% sleepstudy[["Days"]],
|
|
% Resid = resid(m2)
|
|
% )
|
|
% tab <- cbind(
|
|
% sleepstudy[, c("Subject", "Days", "Reaction")],
|
|
% Intercept = fixef(m2)["(Intercept)"] |> unname(),
|
|
% Slope = fixef(m2)["Days"] |> unname(),
|
|
% SubInt = ranef(m2)[["Subject"]][["(Intercept)"]][sleepstudy[["Subject"]]],
|
|
% SubSlp = ranef(m2)[["Subject"]][["Days"]][sleepstudy[["Subject"]]],
|
|
% Resid = resid(m2)
|
|
% )
|
|
% print(
|
|
% xtable(tab[tab$Subject %in% c("308", "309", "310"), ],
|
|
% digits = c(NA, 0, 0, 0, 1, 1, 1, 1, 1)),
|
|
% include.rownames = FALSE, math.style.negative = TRUE)
|
|
% )
|
|
% latex table generated in R 4.5.2 by xtable 1.8-8 package
|
|
% Thu Mar 5 11:18:32 2026
|
|
\centering\footnotesize
|
|
\begin{tabular}{cccccccc}
|
|
\hline
|
|
Subject & Days & Reaction & Fixed & & Random & & \\
|
|
\cline{4-5}
|
|
\cline{6-8}
|
|
& & & Intercept & Slope & SubInt & SubSlp & Resid \\
|
|
\hline
|
|
308 & 0 & 250 & 251.4 & 10.5 & 2.3 & 9.2 & $-$4.1 \\
|
|
308 & 1 & 259 & 251.4 & 10.5 & 2.3 & 9.2 & $-$14.6 \\
|
|
308 & 2 & 251 & 251.4 & 10.5 & 2.3 & 9.2 & $-$42.2 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
|
|
308 & 8 & 431 & 251.4 & 10.5 & 2.3 & 9.2 & 19.6 \\
|
|
308 & 9 & 466 & 251.4 & 10.5 & 2.3 & 9.2 & 35.7 \\
|
|
\hline
|
|
309 & 0 & 223 & 251.4 & 10.5 & $-$40.4 & $-$8.6 & 11.7 \\
|
|
309 & 1 & 205 & 251.4 & 10.5 & $-$40.4 & $-$8.6 & $-$7.6 \\
|
|
309 & 2 & 203 & 251.4 & 10.5 & $-$40.4 & $-$8.6 & $-$11.7 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
|
|
\hline
|
|
310 & 0 & 199 & 251.4 & 10.5 & $-$39.0 & $-$5.4 & $-$13.4 \\
|
|
310 & 1 & 194 & 251.4 & 10.5 & $-$39.0 & $-$5.4 & $-$23.1 \\
|
|
310 & 2 & 234 & 251.4 & 10.5 & $-$39.0 & $-$5.4 & 11.8 \\
|
|
\vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots & \vdots \\
|
|
\hline
|
|
&&&&& $\sigma^2_{\upsilon_0}$ & $\sigma^2_{\upsilon_1}$ &
|
|
$\sigma^2_\varepsilon$\\[-6pt]
|
|
&&&&& \multicolumn{2}{c}{$\sigma_{\upsilon_0\upsilon_1}$} & \\
|
|
\end{tabular}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Partial pooling}
|
|
\begin{columns}
|
|
\begin{column}{.5\textwidth}
|
|
\includegraphics[scale=.5]{../figures/sleep_shrinkfit}
|
|
\end{column}
|
|
\begin{column}{.5\textwidth}
|
|
\begin{itemize}
|
|
\item Within-subject regression line shows regression line fitted to
|
|
data for each individual
|
|
\item Population regression line shows fixed effects for mixed-effects
|
|
model
|
|
\item Mixed model regression line shows individual regression lines as
|
|
predicted by mixed-effects models
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\section[Growth curve model]{Example: Depression and Imipramin}
|
|
|
|
|
|
\begin{frame}{Quadratic time trends}
|
|
\begin{columns}
|
|
\begin{column}{.35\textwidth}
|
|
\begin{itemize}
|
|
\item A lot of times the assumption of a linear time trend is too
|
|
simple
|
|
\item Change is not happening unbraked linearly but flattens out
|
|
\end{itemize}
|
|
\end{column}
|
|
\begin{column}{.65\textwidth}
|
|
\includegraphics[width=9cm]{../figures/quad}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Quadratic time trends}
|
|
\begin{itemize}
|
|
\item Quadratic regression model
|
|
\begin{align*}
|
|
y_{ij} &= b_{0i} + b_{1i}\,t_{ij} + b_{2i}\,t^2_{ij} + \varepsilon_{ij}\\
|
|
&= b_{0i} + (b_{1i} + b_{2i}\,t_{ij}) t_{ij} + \varepsilon_{ij}
|
|
\end{align*}
|
|
\item The linear change depends on time $t$
|
|
\[
|
|
\frac{\partial y}{\partial t} = b_{1i} + 2b_{2i} \, t
|
|
\]
|
|
\item The intercept $t = -b_{1i}/(2 b_{2i})$ is the point in time when a
|
|
positive (negative) trend becomes negative (postive)
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Depression and Imipramin \citep{ReisbyGram77}}
|
|
\begin{itemize}
|
|
\item \citet{ReisbyGram77} studied the effect of Imipramin on 66
|
|
inpatients treated for depression
|
|
\item Depression was measured with the Hamilton depression rating scale
|
|
(HDRS)
|
|
\item Additionally, the concentration of Imipramin and its metabolite
|
|
Desipramin was measured in their blood plasma
|
|
\item Patients were classified into endogenous and non-endogenous
|
|
depressed
|
|
\item Depression was measured weekly for 6 time points; the effect of
|
|
the antidepressant was observed starting at week 2 for four weeks
|
|
\end{itemize}
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\begin{frame}{Descriptive statistics}
|
|
\begin{columns}
|
|
\begin{column}{.55\textwidth}
|
|
\includegraphics[scale=.8]{../figures/hdrs-box}
|
|
\end{column}
|
|
\begin{column}{.6\textwidth}
|
|
HDRS score\\[1ex]
|
|
{\footnotesize
|
|
\begin{tabular}{rrrrrrr}
|
|
\hline
|
|
$t$ & W0 & W1 & W2 & W3 & W4 & W5 \\
|
|
\hline
|
|
$M$ & 23.44 & 21.84 & 18.31 & 16.42 & 13.62 & 11.95 \\
|
|
$SD$ & 4.53 & 4.70 & 5.49 & 6.42 & 6.97 & 7.22 \\
|
|
$n$ & 61 & 63 & 65 & 65 & 63 & 58 \\
|
|
\hline
|
|
\end{tabular}
|
|
}
|
|
|
|
\vspace{.5cm}
|
|
Empirical correlation matrix of HDRS score\\[1ex]
|
|
{\footnotesize
|
|
\begin{tabular}{rrrrrrr}
|
|
\hline
|
|
& W0 & W1 & W2 & W3 & W4 & W5 \\
|
|
\hline
|
|
Week 0 & 1 & .49 & .41 & .33 & .23 & .18 \\
|
|
Week 1 & .49 & 1 & .49 & .41 & .31 & .22 \\
|
|
Week 2 & .41 & .49 & 1 & .74 & .67 & .46 \\
|
|
Week 3 & .33 & .41 & .74 & 1 & .82 & .57 \\
|
|
Week 4 & .23 & .31 & .67 & .82 & 1 & .65 \\
|
|
Week 5 & .18 & .22 & .46 & .57 & .65 & 1 \\
|
|
\hline
|
|
\end{tabular}
|
|
}
|
|
|
|
\vspace{1cm}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Predictions random slope model}
|
|
\begin{columns}
|
|
\begin{column}{.6\textwidth}
|
|
\includegraphics[scale=.5]{../figures/hdrs-ind_pred-randomslope}
|
|
\end{column}
|
|
\begin{column}{.4\textwidth}
|
|
\[
|
|
y_{ij} = \beta_0 + \beta_1 time + \upsilon_{0i} + \upsilon_{1i} time +
|
|
\varepsilon_{ij}
|
|
\]
|
|
with
|
|
\begin{align*}
|
|
\begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} &\overset{iid}\sim
|
|
N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
|
|
\begin{pmatrix}
|
|
\sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
|
|
\sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
|
|
\end{pmatrix} \right)\\
|
|
\boldsymbol{\varepsilon}_i &\overset{iid}\sim N(\mathbf{0}, \, \sigma^2
|
|
\mathbf{I}_{n_i})
|
|
\end{align*}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{}
|
|
\begin{block}{Exercise}
|
|
\begin{itemize}
|
|
\item What would be a possible extension of this model?\pause
|
|
\item Write down the model equations
|
|
\begin{itemize}
|
|
\item What changes for the fixed effects?
|
|
\item How do the variance components for the random effects change?
|
|
\end{itemize}\pause
|
|
\item How can we interpret the random quadratic effects for this model?\pause
|
|
\item How do we add (random) quadratic effects to a random slope model
|
|
using \texttt{lme4::lmer()}?\pause
|
|
\item Fit this growth curve model in R\pause
|
|
\end{itemize}
|
|
\end{block}
|
|
\begin{lstlisting}
|
|
dat <- read.table("data/reisby.txt", header = TRUE)
|
|
dat$id <- factor(dat$id)
|
|
dat$diag <- factor(dat$diag, levels = c("nonen", "endog"))
|
|
dat <- na.omit(dat) # drop missing values
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Model with quadratic trend}
|
|
\begin{itemize}
|
|
\item Model with quadratic individual and quadratic group trend
|
|
\[
|
|
y_{ij} = \beta_0 + \beta_1\,t_{ij} + \beta_2\,t^2_{ij} + \upsilon_{0i} +
|
|
\upsilon_1\,t_{ij} + \upsilon_2\,t^2_{ij} + \varepsilon_{ij}
|
|
\]
|
|
with
|
|
\begin{align*}
|
|
\begin{pmatrix}
|
|
\upsilon_{0i}\\
|
|
\upsilon_{1i}\\
|
|
\upsilon_{2i}
|
|
\end{pmatrix} &\overset{iid}{\sim}
|
|
N \left(\begin{pmatrix}
|
|
0\\ 0\\ 0
|
|
\end{pmatrix}, \,
|
|
\begin{pmatrix}
|
|
\sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} & \sigma_{\upsilon_0 \upsilon_2}\\
|
|
\sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} & \sigma_{\upsilon_1 \upsilon_2}\\
|
|
\sigma_{\upsilon_0 \upsilon_2} & \sigma_{\upsilon_1 \upsilon_2} & \sigma^2_{\upsilon_2}\\
|
|
\end{pmatrix} \right) \\
|
|
\boldsymbol{\varepsilon}_i &\overset{iid}{\sim} N(\mathbf{0}, \, \sigma^2 \mathbf{I}_{n_i})
|
|
\end{align*}
|
|
\end{itemize}
|
|
\vspace{-.5cm}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Model with quadratic trend}
|
|
\begin{lstlisting}
|
|
library("lme4")
|
|
|
|
# random intercept model
|
|
lme1 <- lmer(hamd ~ week + (1 | id), data = dat, REML = FALSE)
|
|
|
|
# random slope model
|
|
lme2 <- lmer(hamd ~ week + (week | id), data = dat, REML = FALSE)
|
|
|
|
# model with quadratic time trend
|
|
lme3 <- lmer(hamd ~ week + I(week^2) + (week + I(week^2) | id),
|
|
data = dat, REML = FALSE)
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Model predictions}
|
|
\begin{columns}
|
|
\begin{column}{6cm}
|
|
\includegraphics[width=6cm]{../figures/hdrs-quad}
|
|
\end{column}
|
|
%
|
|
\begin{column}{5cm}
|
|
\begin{itemize}
|
|
\item Averaged over persons an approximately linear trend is obtained,
|
|
$\hat{\beta}_1 = -2.63$, $\hat{\beta}_2 = 0.05$
|
|
\item Some of the predicted individual trends are strongly nonlinear
|
|
\end{itemize}
|
|
\end{column}
|
|
\end{columns}
|
|
\begin{itemize}
|
|
\item Test against a model without individual quadratic trends\\[2ex]
|
|
|
|
H$_0$: $\sigma^2_{\upsilon_2} = \sigma_{\upsilon_0 \upsilon_2} =
|
|
\sigma_{\upsilon_1 \upsilon_2} = 0$ \qquad
|
|
$G^2(3) = 10.98$, $p = .012$
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Model predictions}
|
|
\begin{columns}
|
|
\begin{column}{8cm}
|
|
\includegraphics[scale=.5]{../figures/hdrs-ind_pred-quad}
|
|
\end{column}
|
|
\begin{column}{6cm}
|
|
\begin{lstlisting}
|
|
xyplot(
|
|
hamd + predict(lme3)
|
|
~ week | id,
|
|
data = dat,
|
|
type = c("p", "l", "g"),
|
|
pch = 16,
|
|
distribute.type = TRUE,
|
|
ylab = "HDRS score",
|
|
xlab = "Time (Week)")
|
|
\end{lstlisting}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
% \begin{frame}[fragile]{Implied marginal covariance matrix}
|
|
% Predicted
|
|
% \begin{align*}
|
|
% \mathbf{Z}_i \boldsymbol{\hat\Sigma}_\upsilon \mathbf{Z}'_i +
|
|
% \hat\sigma^2 \mathbf{I}_{n_i} &=
|
|
% \begin{pmatrix}
|
|
% 20.96 & 9.41 & 8.16 & 6.68 & 4.98 & 3.06 \\
|
|
% 9.41 & 23.86 & 15.57 & 16.08 & 14.88 & 11.97 \\
|
|
% 8.16 & 15.57 & 31.07 & 23.11 & 23.26 & 20.98 \\
|
|
% 6.68 & 16.08 & 23.11 & 38.31 & 30.12 & 30.09 \\
|
|
% 4.98 & 14.88 & 23.26 & 30.12 & 45.98 & 39.29 \\
|
|
% 3.06 & 11.97 & 20.98 & 30.09 & 39.29 & 59.11
|
|
% \end{pmatrix}\\
|
|
% %
|
|
% \intertext{Observed}
|
|
% %
|
|
% \widehat{Cov}(\mathbf{y}_i) &=
|
|
% \begin{pmatrix}
|
|
% 20.55 & 10.11 & 10.14 & 10.09 & 7.19 & 6.28 \\
|
|
% 10.11 & 22.07 & 12.28 & 12.55 & 10.26 & 7.72 \\
|
|
% 10.14 & 12.28 & 30.09 & 25.13 & 24.63 & 18.38 \\
|
|
% 10.09 & 12.55 & 25.13 & 41.15 & 37.34 & 23.99 \\
|
|
% 7.19 & 10.26 & 24.63 & 37.34 & 48.59 & 30.51 \\
|
|
% 6.28 & 7.72 & 18.38 & 23.99 & 30.51 & 52.12 \\
|
|
% \end{pmatrix}\\
|
|
% \end{align*}
|
|
% % \vspace{-1cm}
|
|
% % \begin{lstlisting}
|
|
% % getVarCov(lme3, type="marginal") # predicted cov. matrix
|
|
% %
|
|
% % var.d <- crossprod(getME(lme3,"Lambdat"))
|
|
% % Zt <- getME(lme3, "Zt")
|
|
% % vr <- sigma(lme3)^2
|
|
% %
|
|
% % var.b <- vr*(t(Zt) %*% var.d %*% Zt)
|
|
% % sI <- vr * Diagonal(nrow(dat2))
|
|
% % var.y <- var.b + sI
|
|
% %
|
|
% % \end{lstlisting}
|
|
% \end{frame}
|
|
|
|
\begin{frame}{Centering variables}
|
|
\begin{itemize}
|
|
\item If multiples of the time variables ($t$, $t^2$, $t^3$, etc.) are
|
|
entered into the regression equation, multicollinearity can become a
|
|
problem
|
|
\item For example, $t = 0, 1, 2, 3$ and $t^2 = 0, 1, 4, 9$ correlate almost
|
|
perfectly
|
|
\item By centering the variables, this problem can be diminished: $(t -
|
|
\bar{t}) = -1.5, -0.5, 0.5, 1.5$ and $(t - \bar{t})^2 = 2.25, 0.25, 0.25,
|
|
2.25$ are uncorrelated
|
|
\item By centering variables the interpretation of the intercept in a linear
|
|
model changes:
|
|
\begin{itemize}
|
|
\item Uncentered intercepts represent the difference to the first time
|
|
point ($t = 0$)
|
|
\item Centered intercepts represent the difference after half of the
|
|
time
|
|
\end{itemize}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{}
|
|
\begin{block}{Exercise}
|
|
\begin{itemize}
|
|
\item Center time\pause
|
|
\item Compare the correlation between time and time$^2$ with and without
|
|
centering\pause
|
|
\item Refit the model with quadratic effects when time is centered\pause
|
|
\item Which parameters change?
|
|
\begin{itemize}
|
|
\item What changes for the fixed effects?
|
|
\item How do the variance components for the random effects change?
|
|
\end{itemize}\pause
|
|
\item \emph{Why} do the variance components for the random effects change?
|
|
\end{itemize}
|
|
\end{block}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Analysis with centered time variable}
|
|
\begin{lstlisting}
|
|
dat$week_c <- dat$week - mean(dat$week)
|
|
cor(dat$week, dat$week^2) # 0.96
|
|
cor(dat$week_c, dat$week_c^2) # 0.01
|
|
|
|
# random slope model
|
|
lme2c <- lmer(hamd ~ week_c + (week_c | id), data = dat, REML = FALSE)
|
|
|
|
# model with quadratic time trend
|
|
lme3c <- lmer(hamd ~ week_c + I(week_c^2) + (week_c + I(week_c^2)|id),
|
|
data = dat, REML = FALSE)
|
|
\end{lstlisting}
|
|
% \begin{itemize}
|
|
% \item When comparing the estimated parameters, it becomes obvious that not
|
|
% only the intercept changes but the estimates for the (co)variances do as
|
|
% well
|
|
% \item Why?\pause
|
|
% ~Be sure to make an informed choice when centering your
|
|
% variables!
|
|
% \end{itemize}
|
|
\nocite{Alday2025, Hedeker2006}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]{Investigating random effects structure}
|
|
\begin{itemize}
|
|
\item In order to get a better understanding of the necessary random effects
|
|
it might be a good idea to take a closer look at them
|
|
\item Two plots often used are the so-called caterpillar and shrinkage plots
|
|
\item Play around with different models and compare how, e.\,g., the
|
|
caterpillar plots change with and without covariances in the model!
|
|
\end{itemize}
|
|
\vfill
|
|
\begin{lstlisting}
|
|
library("lattice")
|
|
dotplot(ranef(lme3), scales = list( x = list(relation = "free")))$id
|
|
\end{lstlisting}
|
|
\end{frame}
|
|
|
|
|
|
\begin{frame}[fragile]%{Investigating random effects structure}
|
|
{Caterpillar plot}
|
|
\begin{columns}
|
|
\begin{column}{1.13\textwidth}
|
|
\includegraphics[scale=.31]{../figures/hdrs-caterpillar}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
\begin{frame}[fragile]%{Investigating random effects structure}
|
|
{Shrinkage plots}
|
|
\begin{columns}
|
|
\begin{column}{.35\textwidth}
|
|
\includegraphics[scale=.35]{../figures/hdrs_shrinkage_int-week}
|
|
\end{column}
|
|
\begin{column}{.35\textwidth}
|
|
\includegraphics[scale=.35]{../figures/hdrs_shrinkage_int-weeksq}
|
|
\end{column}
|
|
\begin{column}{.35\textwidth}
|
|
\includegraphics[scale=.35]{../figures/hdrs_shrinkage_week-weeksq}
|
|
\end{column}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
% \begin{frame}{Higher-order polynomials}
|
|
% \begin{itemize}
|
|
% \item Nonlinear time trends can be modelled in a flexibel and parsimonious
|
|
% way by using higher-order polynomials
|
|
% \item For example, saddle or reversal points in a time trend can be
|
|
% described
|
|
% \item Polynomials have the advantage that the regression model stays linear
|
|
% in its parameters
|
|
% \item They have the disadvantage that extrapolated values can quickly be
|
|
% outside of a range that can still be interpreted in a meaningful way
|
|
% \end{itemize}
|
|
% \end{frame}
|
|
%
|
|
% \begin{frame}{Cubic time trends}
|
|
% \begin{center}
|
|
% \includegraphics[width=9cm]{../figures/cubic}
|
|
% \end{center}
|
|
% \end{frame}
|
|
%
|
|
% \begin{frame}{Polynomial regression: Extrapolation}
|
|
% \begin{center}
|
|
% \includegraphics[width=9cm]{../figures/cubic-gone-bad}
|
|
% \end{center}
|
|
% \end{frame}
|
|
|
|
\begin{frame}{What we learned today\dots}
|
|
{\dots and how to go on}
|
|
\pause
|
|
\begin{enumerate}[<+->]
|
|
\item We learned
|
|
\begin{itemize}
|
|
\item The basic concept of random effects and why to include them in a
|
|
model
|
|
\item How to model data collected over several time points
|
|
\item How to compute mixed-effects model with quadratic time trends in R
|
|
using \texttt{lmer()} from the lme4 package
|
|
\item How to interpret parameters in mixed-effects model with quadratic
|
|
effects
|
|
\end{itemize}
|
|
\item Next steps
|
|
\begin{itemize}
|
|
\item Do this exercise
|
|
\url{https://gitea.iwm-tuebingen.de/nwickelmaier/lead_longitudinal/src/branch/master/exercises/schizo.md}
|
|
\item It has a very similar structure than the depression dataset and
|
|
this will help you to generalize the concepts we learned today
|
|
\item You can send questions to me and even make an appointment with me
|
|
to go over your solution
|
|
\end{itemize}
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\appendix
|
|
%\begin{frame}[allowframebreaks]{References}
|
|
\begin{frame}{References}
|
|
\printbibliography
|
|
\vfill
|
|
\end{frame}
|
|
|
|
\end{document}
|
|
|