lead_lmm/slides/lead_lmm.tex

\documentclass[aspectratio=169]{beamer}

\usepackage{listings}
\usepackage[utf8,latin1]{inputenc}
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
\usepackage{tikz}
\addbibresource{lit.bib}

\makeatletter \def\newblock{\beamer@newblock} \makeatother

\beamertemplatenavigationsymbolsempty
\setbeamertemplate{itemize items}[circle]
\setbeamertemplate{section in toc}[circle]
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
\setbeamercolor{block body}{bg=iwmorange!50!white}
\setbeamercolor{block title}{fg=white, bg=iwmorange}

% Definitions for biblatex
\setbeamercolor{bibliography entry note}{fg=iwmgray}
\setbeamercolor{bibliography entry author}{fg=iwmgray}
\setbeamertemplate{bibliography item}{}

\definecolor{iwmorange}{RGB}{255,105,0}
\definecolor{iwmgray}{RGB}{67,79,79}
\definecolor{iwmblue}{RGB}{60,180,220}
\definecolor{iwmgreen}{RGB}{145,200,110}
\definecolor{iwmpurple}{RGB}{120,0,75}

\setbeamercolor{title}{fg=iwmorange}
\setbeamercolor{frametitle}{fg=iwmorange}
\setbeamercolor{structure}{fg=iwmorange}
\setbeamercolor{normal text}{fg=iwmgray}
\setbeamercolor{author}{fg=iwmgray}
\setbeamercolor{date}{fg=iwmgray}

\lstset{language = R,%
  basicstyle = \ttfamily\color{iwmgray},
  frame = single,
  rulecolor = \color{iwmgray},
  commentstyle = \slshape\color{iwmgreen},
  keywordstyle = \bfseries\color{iwmgray},
  identifierstyle = \color{iwmpurple},
  stringstyle = \color{iwmblue},
  numbers = none,%left,numberstyle = \tiny,
  basewidth = {.5em, .4em},
  showstringspaces = false,
  emphstyle = \color{red!50!white}}

\title{Introduction to mixed-effects models}
\subtitle{(for hierarchical data)}
\author{Nora Wickelmaier}
\institute{\includegraphics[scale=.2]{iwm_logo_rgb}}
\date{2025-06-24}

\AtBeginSection[]{
  \frame{
    \tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}

\setbeamertemplate{headline}{
 \begin{beamercolorbox}{section in head}
   \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
 \end{beamercolorbox}
}

\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}

\begin{document}

\begin{frame}{}
\thispagestyle{empty}
\titlepage
\end{frame}

\begin{frame}{Plan for today}
  \begin{itemize}[<+->]
    \item We will walk through an example for a hierarchical data set (students
      in schools)
    \item I will explain the general concepts with the slides
    \item We will switch to R and use the lme4 package to fit the models
    \item You will use R to fit an extension of the model
    \item We will discuss the results
    \item All the materials are here: \url{https://gitea.iwm-tuebingen.de/nwickelmaier/lead_lmm}
      \\~\\
    \item[$\to$] Try to go along in R! Ask as many questions as possible, also
      the ones you usually do not dare to ask (because you are supposed to know
      them already or something\dots)
  \end{itemize}
\end{frame}

\begin{frame}{Outline}
\tableofcontents
\end{frame}

\section[Introduction]{Introduction to random effects}

\begin{frame}{Hierarchical data}
  \begin{itemize}[<+->]
    \item Observations often do not come from a simple random sample, but result
      from a hierarchical structure
      \begin{itemize}
        \item Individuals are organized in groups (e.g., students nested in
          classes, or schools)
        \item Persons are observed multiple times (observations nested in
          persons, longitudinal data\nocite{Hedeker2006})
      \end{itemize}
    \item Statistical models for this kind of data are called multilevel models,
      mixed-effects models, random-effects models, covariance components models,
      or hierarchical models
  \end{itemize}
  \vfill
\end{frame}

\begin{frame}{Example: Mathematics achievement study}
\begin{itemize}
  \item The \texttt{hsbdataset.txt} file contains data from the National Center
    for Education Statistics' (NCES) ``High School \& Beyond'' national survey
    of U.S.\ public and Catholic high schools
    \citep{BrykRaudenbush2002}\pause
  \item The data set consists of information on 7,185 students from 160 schools
    on student performance on a mathematics test and information concerning
    their socioeconomic status\pause
  \item Hierarchical data structure
  \begin{itemize}
    \item Students are organized in schools
    \begin{tabular}{ll}
        $y_{ij}$ & mathematics achievement of student $j$ in school $i$ \\
        $x_{ij}$ & (relative) socioeconomic status of student $j$ \\
                 &  in school $i$ (overall mean 0, centered)
    \end{tabular}\pause
    \item Two levels
    \begin{itemize}
      \item Level 1: Student attributes
      \item Level 2: School attributes
    \end{itemize}
  \end{itemize}
\end{itemize}
\end{frame}

\begin{frame}{Regression with random school effects}
  \begin{itemize}
    \item What is the mean math achievement of the students?
    \item How much do schools vary in mean math achievement?
  \end{itemize}\pause
  \vspace{.8cm}
    \begin{columns}
      \begin{column}{.4\textwidth}
        \begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
        \draw[->] (0,0) -- coordinate (x axis mid) (10,0);
        \draw[->] (0,0) -- coordinate (y axis mid) (0,30);
        \node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
        \node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
        %
        \draw[dashed] (0, 15) -- (10, 15) node [right] {$\beta_0$};
        \draw (0.5, 10) -- (4.5, 10);
        \draw plot[only marks, mark size=0.5pt, mark=*]
            coordinates {(1, 9) (2, 12) (3, 13) (4, 8)};
        \draw (5.5, 20) -- (9.5, 20);
        \draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 15) -- (7.5, 17.5) node [right] {$\upsilon_{0i}$} -- (7.5, 20);
        \draw plot[only marks, mark size=0.5pt, mark=*]
            coordinates {(6, 22) (7, 18) (8, 24) (9, 19)};
        \draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 20) -- (8, 22) node [right] {$\varepsilon_{ij}$} -- (8, 24);
        \end{tikzpicture}
      \end{column}\pause
      \begin{column}{.6\textwidth}
        Model equation
    \begin{align*}
    \text{(Level 1)} ~\quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
    \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
    \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
    \end{align*}
    with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
    $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
        $\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
      \end{column}
    \end{columns}
\end{frame}

% \begin{frame}{Models with random school effects}
% \begin{itemize}
%   \item Model equation
%     \begin{align*}
%     \text{(Level 1)}  \quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
%     \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
%     \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
%     \end{align*}
%     with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
%     $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d, $\upsilon_{0i}$ and
%     $\varepsilon_{ij}$ independent\pause
%   \item Interpretation\\[1ex]
%     \begin{tabular}{lp{9.5cm}}
%         $\beta_0$ & mean math achievement\\
%         $\upsilon_{0i}$ & random school effect, mean deviation of school $i$
%         from math achievement\\
%         $\varepsilon_{ij}$ & residual of student $j$ in school $i$
%     \end{tabular}
% \end{itemize}
% \end{frame}

\begin{frame}{Null model with random intercepts}
  {Subset of 9 schools}
  \centering
  \vspace{-.5cm}
  \includegraphics[scale = .6]{../figures/hsb_model1}
\end{frame}

\begin{frame}{Random effects}
  \begin{itemize}[<+->]
  \item The problem of grouping observations in schools and the thereby induced
    dependencies is solved by introducing school effects
  \item For many schools this calls for (too) many parameters
  \item School effects are therefore modeled as random effects (random
    variables) $\upsilon_{0i}$
  \item Only their variance $\sigma^2_\upsilon$ has to be estimated in the model
  \item The total variance of $y_{ij}$ is decomposed into the variance between
    schools $\sigma^2_\upsilon$ and within schools $\sigma^2$
\end{itemize}
\vfill
\end{frame}

\begin{frame}{Results}
  \begin{itemize}[<+->]
  \item The above posed research questions can be answered based on the
    parameter estimates $\hat\beta_0$, $\hat\sigma^2_\upsilon$ and
    $\hat\sigma^2$
  \begin{itemize}
    \item The estimated mean math achievement of students is $\hat\beta_0$
    \item The estimated variance of schools in mean math achievement is
      $\hat\sigma^2_\upsilon$
    \item The proportion of the total variance accounted for by the variance
      between schools is
    \[
      \text{ICC} = \frac{\sigma^2_\upsilon}{\sigma^2_\upsilon + \sigma^2}
    \]
    (Intra-class correlation)
  \end{itemize}

\end{itemize}
\end{frame}


\begin{frame}{Adding socioeconomic status as a predictor}
  \begin{itemize}
    \item How strong is the relationship between students' socioeconomic status
      and their math achievement on average?
    \item How much do schools vary in mean math achievement for students with
      average socioeconomic status?
  \end{itemize}\pause
  \vspace{.4cm}
    \begin{columns}
      \begin{column}{.4\textwidth}
        \begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
        \draw[->] (0,0) -- coordinate (x axis mid) (10,0);
        \draw[->] (0,0) -- coordinate (y axis mid) (0,30);
        \node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
        \node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
        %
        \draw[dashed] (0, 10) node [above right = -0.1cm] {$\beta_0$} -- (10, 20);
        \draw (1.5, 4.5) -- (5.5, 8.5);
        \draw plot[only marks, mark size=0.5pt, mark=*]
            coordinates {(2, 3.5) (3, 7.5) (4, 9.5) (5, 6.5)};
        \draw (5.5, 22.5) -- (9.5, 26.5);
        \draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 17.5) -- (7.5, 22) node [right] {$\upsilon_{0i}$} -- (7.5, 24.5);
        \draw plot[only marks, mark size=0.5pt, mark=*]
            coordinates {(6, 24) (7, 21) (8, 29) (9.5, 25)};
        \draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 25) -- (8, 28) node [right] {$\varepsilon_{ij}$} -- (8, 29);
        \draw (3, 13) -- (6, 13) -- (6, 14) node [right] {$\beta_1$} -- (6, 16);
        \end{tikzpicture}
      \end{column}\pause
      \begin{column}{.6\textwidth}
        Model equation
    \begin{align*}
    \text{(Level 1)} ~\quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
    \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
                      \quad b_{1i} &= \beta_1\\
    \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
                                      \upsilon_{0i} + \varepsilon_{ij}
    \end{align*}
    with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
    $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
        $\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
      \end{column}
    \end{columns}
\end{frame}

% \begin{frame}{Models with random school effects}
% \begin{itemize}
%   \item Regression with random school effects
%     \begin{align*}
%     \text{(Level 1)}  \quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
%     \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
%                       \quad b_{1i} &= \beta_1\\
%     \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
%                                       \upsilon_{0i} + \varepsilon_{ij}
%     \end{align*}
%     with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ independent,
%     $\varepsilon_{ij} \sim N(0, \sigma^2)$ independent, $\upsilon_{0i}$ and
%     $\varepsilon_{ij}$ independent
%   \item Interpretation\\[1ex]
%     \begin{tabular}{lp{10cm}}
%         $\beta_0$ & mean mathematics performance for $x_{ij} = 0$, intercept\\
%         $\beta_1$ & mean change in mathematics performance for unit change in
%         socioeconomic status\\
%         $\upsilon_{0i}$ & school-specific deviation from the mean y-intercept\\
%         $\varepsilon_{ij}$ & residual of student $j$ in school $i$
%     \end{tabular}
% \end{itemize}
% \end{frame}

\begin{frame}{Model with covariate and random intercepts}
  {Subset of 9 schools}
  \centering
  \vspace{-.5cm}
  \includegraphics[scale = .6]{../figures/hsb_model2}
\end{frame}

\begin{frame}{}
  \begin{block}{Exercise}
    \begin{itemize}
      \item What would be the next possible extension of this model?\pause
      \item Write down the model equations
        \begin{itemize}
          \item What changes for the fixed effects?
          \item How do the variance components for the random effects change?
        \end{itemize}\pause
      \item How can we interpret the random slopes for this model?\pause
      \item How do we add random slopes to a random intercept model using
        \texttt{lme4::lmer()}?\pause
      \item Fit a model with random slopes for socioeconomic status in R
    \end{itemize}
  \end{block}
\end{frame}

\begin{frame}{Model with covariate and random slopes}
  {Subset of 9 schools}
  \centering
  \vspace{-.5cm}
  \includegraphics[scale = .6]{../figures/hsb_model3}
\end{frame}


% \begin{frame}{Mixed-effects models}
% Possible extensions of the models used so far
% \begin{itemize}
%     \item Random effects
%     \begin{itemize}
%       \item School specific slopes
%       \item Correlations between intercept and slope
%     \end{itemize}
%     \item Fixed effects
%     \begin{itemize}
%       \item At school level: type of school (public vs.\ catholic)
%       \item At student level: gender, intelligence
%     \end{itemize}
%     \item Grouping, levels
%     \begin{itemize}
%       \item Classes, school districts or states (hierarchical)
%       \item Teacher effect within schools (crossed)
%     \end{itemize}
% \end{itemize}
% \end{frame}

% TODO: Include profiling at the end? Maybe to provide some "new" input for
% people who are already more advanced with LMMs?

\section{Hierarchical modeling}

\begin{frame}{HSB data set}
  \centering
  \begin{tabular}{llp{10cm}}
    \hline
    Level & Variable & Description \\
    \hline
    1 & \texttt{mathach} & Performance in mathematics test \\
    1 & \texttt{ses} & (relative) socioeconomic status (overall mean 0) \\
    2 & \texttt{meanses} & mean socioeconomic status of the school (overall mean 0) \\
    1 & \texttt{cses} & Centered socioeconomic status of the student (mean for each school 0, difference \texttt{ses - meanses}) \\
    2 & \texttt{school} & school ID \\
    2 & \texttt{sector} & Public (0) or Catholic High School (1) \\
    \hline
  \end{tabular}
\end{frame}

\begin{frame}{Hierarchical regression model}
  Model equation
  \begin{align*}
    \text{(Level 1)} ~\quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
    \text{(Level 2)}  \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
                      \quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
    \text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i\\
                                   & + \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
                                   & + \upsilon_{0i} + \upsilon_{1i}cses_{ij} + \varepsilon_{ij}
    \end{align*}
with
\begin{align*}
  \begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} & \sim
    N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
      \begin{pmatrix}
        \sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
        \sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
      \end{pmatrix} \right)~\text{i.i.d} \\
  \boldsymbol{\varepsilon}_i & \sim N(\mathbf{0}, \, \sigma^2
  \mathbf{I}_{n_i})~\text{i.i.d}
\end{align*}
\end{frame}

\begin{frame}{Decomposing socioeconomic status}
  \begin{itemize}[<+->]
    \item In this model, by decomposing the socioeconomic status according to
      the equation
      \[
        ses = cses + meanses
      \]
      its differential effectiveness is considered at each of the levels
    \item At the same time, the effect of the type of school is examined via the
      variable \texttt{sector}
    \item Notice that the formulation of the model assumes dependencies of the
      slope $b_{1i}$ on both mean socioeconomic status and school type, which is
      captured by the interactions of \texttt{cses} with \texttt{meanses} and
      \texttt{sector}, respectively
  \end{itemize}
\end{frame}

\begin{frame}{}
  \begin{block}{Exercise}
    \begin{enumerate}
      \item Compute the model in R using \texttt{lme4::lmer()}
        {\scriptsize
          \begin{align*}
    \text{(Level 1)} ~\quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
    \text{(Level 2)}  \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
                      \quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
    \text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i
                                    + \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
                                   & + \upsilon_{0i} + cses_{ij}\upsilon_{1i} + \varepsilon_{ij}
    \end{align*}
with
$
  \begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} \sim
    N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
      \begin{pmatrix}
        \sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
        \sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
      \end{pmatrix} \right)~\text{i.i.d},
  \boldsymbol{\varepsilon}_i \sim N(\mathbf{0}, \, \sigma^2
  \mathbf{I}_{n_i})~\text{i.i.d}
        $ }
      \item Interpret the parameters
    \end{enumerate}
  \end{block}
\end{frame}

\begin{frame}{Results}
  {Fixed effects}
  \begin{itemize}[<+->]
    \item Mean math achievement (i.e., for a student with a mean \texttt{cses}
      score in a school with a mean \texttt{meanses} score) is 12.11 in Public
      High Schools and 13.33 in Catholic High Schools
    \item Effects of socioeconomic status at the two levels
      \begin{itemize}
        \item The effect at the student level depends on the type of school:
          math achievement increases by 2.94 points in Public High Schools and
          by $2.94 - 1.64 = 1.30$ points in Catholic High Schools for a unit
          increase in cses
        \item Higher math achievements are obtained in schools with higher mean
          socioeconomic status
        \item In addition, the dependence of math achievement on \texttt{cses}
          scores is more pronounced in schools with higher \texttt{meanses}
          scores (estimated interaction $> 0$)
      \end{itemize}
  \end{itemize}
  \vfill
\end{frame}

\begin{frame}{Results}
  {Random effects}
  \begin{itemize}[<+->]
    \item The estimate $\hat\sigma^2_{\upsilon_0} = 2.32$ of the variance of
      mean school performance provides room for improving prediction by
      including additional predictors
    \item However, there is virtually no variation in the dependence of math
      achievement on \texttt{cses} across schools ($\hat\sigma^2_{\upsilon_1} =
      0.07$), which should also be noted when interpreting the reported
      correlation of 0.48
    \item  The corresponding covariance has an estimated value of
      $\hat\sigma_{\upsilon_0\upsilon_1} = 0.48\cdot
      \hat\sigma_{\upsilon_0}\cdot\hat\sigma_{\upsilon_1} = 0.19$
    \item These results suggest a simplified model of the dependence of math
      achievement on \texttt{cses}, where the intercept, but not the slope
      varies across schools
  \end{itemize}
  \vfill
\end{frame}

\begin{frame}{Summary}
  \begin{itemize}
    \item Regression models with fixed and random effects\pause
      \begin{itemize}
        \item allow for adequately modeling hierarchical data structures
          \begin{itemize}
            \item longitudinal data
            \item individuals organized in groups (e.g., students in classes, or
              schools)
          \end{itemize}\pause
        \item allow for adequately modeling the sources of error occurring in
          this context\pause
        \item offer an optimal trade-off between individual and aggregate data
          analysis
          \begin{itemize}
            \item while individual differences are modeled, information
              aggregated over the sample is exploited, too
          \end{itemize}\pause
      \end{itemize}
      \item Therefore, linear mixed-effects models allow for integrating
        differential and general psychological aspects within a common
        theoretical framework
  \end{itemize}
  \vfill
\end{frame}

\begin{frame}{What we learned today\dots}
  {\dots and how to go on}
  \pause
  \begin{enumerate}[<+->]
    \item We learned
      \begin{itemize}
        \item The basic concept of random effects and why to include them in a
          model
        \item How to compute a linear mixed-effects model in R using
          \texttt{lmer()} from the lme4 package
        \item How to use a hierarchical model to separate individual and school
          differences
        \item How to interpret parameters in a linear mixed-effects model
      \end{itemize}
    \item Next steps
      \begin{itemize}
        \item Do this exercise \url{xxx} using the JSP data set in R
        \item It has a very similar structure than the HSB data set and this
          will help you to generalize the concepts we learned today
        \item You can send questions to me and even make an appointment with me
          to go over your solution
      \end{itemize}
  \end{enumerate}
\end{frame}

\appendix
%\begin{frame}[allowframebreaks]{References}
\begin{frame}{References}
  \printbibliography
  \vfill
\end{frame}

\end{document}