Initialize repository

2025-06-20 10:42:07 +02:00 · 2025-06-20 10:42:07 +02:00 · 72bd3fcaa7
commit 72bd3fcaa7
11 changed files with 7956 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,30 @@
+# Abstract
+
+This workshop will give a short introduction to linear mixed-effects models with
+an example from educational science. We will look at a hierarchical data set
+containing students in schools. The first part will be a basic introduction of
+the concept of random effects and how they extend linear regression. This is
+meant to get everybody on the same page and introduce some notation. We will fit
+these simple models together in R using the lme4 package. In the second part, a
+more complex hierarchical model will be introduced. We will try to understand
+how models like these can be used to answer research questions concerning
+different levels of the data. Again, we will use R to fit this model. This
+course is suited for novices to mixed-effects models who want to understand the
+basic concepts, but also for people with a bit more expertise using hierarchical
+models who want to dig into more details and deepen their understanding of
+parameter interpretation.
+
+# Instructor / speaker
+
+Dr. Nora Wickelmaier, statistics consultant at the Leibniz-Institut für
+Wissensmedien (IWM), Tübingen
+
+# Prerequisites
+
+Participants will need to have installed:
+
+* a current R version (https://cran.r-project.org/)
+* an IDE for R (like RStudio or VSCode) or a text editor with syntax
+  highlighting (like Vim or Notepad++)
+* the R package lme4 (https://cran.r-project.org/package=lme4)
+
--- a/code/hsb.R
+++ b/code/hsb.R
@ -0,0 +1,94 @@
+library(lme4)
+library(lattice)
+
+dat <- read.table("data/hsbdataset.txt", header = TRUE)
+
+dat$gmmath <- mean(dat$mathach)
+dat$meanmath <- with(dat, ave(mathach, school))
+
+plot(dat$ses - dat$meanses, dat$cses)
+
+
+xyplot(mathach ~ cses, dat)
+xyplot(mathach + meanmath + gmmath ~ cses | factor(school), dat,
+       type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+xyplot(gmmath + meanmath ~ cses | factor(school), dat, type = "r")
+
+
+m1 <- lmer(mathach ~ 1 + (1 | school), dat)
+
+xyplot(mathach + predict(m1) + predict(m1, re.form = NA) ~ cses | factor(school),
+       dat, type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+# ICC
+VarCorr(m1)[[1]] / (VarCorr(m1)[[1]] + sigma(m1)^2)
+
+sjPlot::tab_model(m1)
+
+
+m2 <- lmer(mathach ~ cses + (1 | school), dat)
+
+xyplot(mathach + predict(m2) + predict(m2, re.form = NA) ~ cses | factor(school),
+       dat, type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+# ICC
+VarCorr(m2)[[1]] / (VarCorr(m2)[[1]] + sigma(m2)^2)
+
+sjPlot::tab_model(m1, m2)
+
+
+m3 <- lmer(mathach ~ cses + (cses | school), dat)
+
+xyplot(mathach + predict(m3) + predict(m3, re.form = NA) ~ cses | factor(school),
+       dat, type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+sjPlot::tab_model(m1, m2, m3)
+
+
+m4 <- lmer(mathach ~ cses + sector + (cses | school), data = dat)
+
+sjPlot::tab_model(m1, m2, m3, m4)
+
+m5 <- lmer(mathach ~ cses * sector + (cses | school), data = dat)
+
+sjPlot::tab_model(m1, m2, m3, m4, m5)
+
+xyplot(mathach ~ cses, data = dat, groups = sector, type = c("p", "r"))
+
+
+
+lmm.1 <- lmer(mathach ~ meanses*cses + sector*cses + (1 | school), data = dat,
+              REML = FALSE)
+
+lmm.2 <- lmer(mathach ~ meanses*cses + sector*cses + (1 + cses | school),
+              data = dat, REML = FALSE)
+
+c <- seq(-2, 2, length = 51)
+m <- seq(-1, 1, length = 26)
+ndat <- expand.grid(c, m)
+
+colnames(ndat) <- c("cses", "meanses")
+
+ndat$sector <- factor(0, levels = c("0", "1"))
+
+z <- matrix(predict(lmm.2, newdata=ndat, re.form=NA), 51)
+
+persp(c, m, z, theta = 40, phi = 20, col = "lightblue", ltheta = 60, shade = .9,
+      xlab = "cses", ylab = "meanses", zlab = "mathach", main = "Model 2")
+
+lmm.3 <- lmer(mathach ~ meanses + sector*cses + (1 + cses | school),
+              data = dat, REML = FALSE)
+
+z <- matrix(predict(lmm.3, newdata = ndat, re.form = NA), nrow = 51)
+
+persp(c, m, z, theta = 40, phi = 20, col = "lightblue", ltheta = 60, shade = .9,
+      xlab = "cses", ylab = "meanses", zlab = "mathach", main = "Model 3")
+
+
+# TODO: Add profiling to show instability of parameter estimation?
+
--- a/data/hsbdataset.txt
+++ b/data/hsbdataset.txt
--- a/figures/hsb_model.R
+++ b/figures/hsb_model.R
@ -0,0 +1,73 @@
+library(lme4)
+library(lattice)
+
+dat <- read.table("data/hsbdataset.txt", header = TRUE)
+
+# Model 1 - null model with random intercepts
+m1 <- lmer(mathach ~ 1 + (1 | school), dat)
+
+# Plot with subsample of schools
+set.seed(1427)
+subdat <- subset(dat, dat$school %in% sample(unique(dat$school), size = 9))
+
+pdf("figures/hsb_model1.pdf", height = 5, width = 5)
+xyplot(mathach + predict(m1, newdata = subdat) +
+       predict(m1, re.form = NA, newdata = subdat) ~ cses | factor(school),
+       subdat,
+       pch = 16,
+       cex = 0.4,
+       type = c("p", "r", "r"),
+       lwd = 2,
+       xlab = "socioeconomic status",
+       ylab = "math performance",
+       distribute.type = TRUE,
+       par.strip.text = list(cex = 0.8),
+       col = c("gray80", "#91C86E", "#78004B"))
+dev.off()
+
+# Model 2 - random intercept model
+m2 <- lmer(mathach ~ cses + (1 | school), dat)
+
+xyplot(mathach + predict(m2) + predict(m2, re.form = NA) ~ cses | factor(school),
+       dat, type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+
+pdf("figures/hsb_model2.pdf", height = 5, width = 5)
+xyplot(mathach + predict(m2, newdata = subdat) +
+       predict(m2, re.form = NA, newdata = subdat) ~ cses | factor(school),
+       subdat,
+       pch = 16,
+       cex = 0.4,
+       type = c("p", "r", "r"),
+       lwd = 2,
+       xlab = "socioeconomic status",
+       ylab = "math performance",
+       distribute.type = TRUE,
+       par.strip.text = list(cex = 0.8),
+       col = c("gray80", "#91C86E", "#78004B"))
+dev.off()
+
+# Model 3 - random slope model
+m3 <- lmer(mathach ~ cses + (cses | school), dat)
+
+xyplot(mathach + predict(m3) + predict(m3, re.form = NA) ~ cses | factor(school),
+       dat, type = c("p", "r", "r"), distribute.type = TRUE,
+       col = c("#91C86E", "#91C86E", "#78004B"))
+
+
+pdf("figures/hsb_model3.pdf", height = 5, width = 5)
+xyplot(mathach + predict(m3, newdata = subdat) +
+       predict(m3, re.form = NA, newdata = subdat) ~ cses | factor(school),
+       subdat,
+       pch = 16,
+       cex = 0.4,
+       type = c("p", "r", "r"),
+       lwd = 2,
+       xlab = "socioeconomic status",
+       ylab = "math performance",
+       distribute.type = TRUE,
+       par.strip.text = list(cex = 0.8),
+       col = c("gray80", "#91C86E", "#78004B"))
+dev.off()
+
--- a/figures/hsb_model1.pdf
+++ b/figures/hsb_model1.pdf
--- a/figures/hsb_model2.pdf
+++ b/figures/hsb_model2.pdf
--- a/figures/hsb_model3.pdf
+++ b/figures/hsb_model3.pdf
--- a/slides/iwm_logo_rgb.pdf
+++ b/slides/iwm_logo_rgb.pdf
--- a/slides/lead_lmm.pdf
+++ b/slides/lead_lmm.pdf
--- a/slides/lead_lmm.tex
+++ b/slides/lead_lmm.tex
@ -0,0 +1,551 @@
+\documentclass[aspectratio=169]{beamer}
+
+\usepackage{listings}
+\usepackage[utf8,latin1]{inputenc}
+\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
+\usepackage{tikz}
+\addbibresource{lit.bib}
+
+\makeatletter \def\newblock{\beamer@newblock} \makeatother
+
+\beamertemplatenavigationsymbolsempty
+\setbeamertemplate{itemize items}[circle]
+\setbeamertemplate{section in toc}[circle]
+\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
+\setbeamercolor{block body}{bg=iwmorange!50!white}
+\setbeamercolor{block title}{fg=white, bg=iwmorange}
+
+% Definitions for biblatex
+\setbeamercolor{bibliography entry note}{fg=iwmgray}
+\setbeamercolor{bibliography entry author}{fg=iwmgray}
+\setbeamertemplate{bibliography item}{}
+
+\definecolor{iwmorange}{RGB}{255,105,0}
+\definecolor{iwmgray}{RGB}{67,79,79}
+\definecolor{iwmblue}{RGB}{60,180,220}
+\definecolor{iwmgreen}{RGB}{145,200,110}
+\definecolor{iwmpurple}{RGB}{120,0,75}
+
+\setbeamercolor{title}{fg=iwmorange}
+\setbeamercolor{frametitle}{fg=iwmorange}
+\setbeamercolor{structure}{fg=iwmorange}
+\setbeamercolor{normal text}{fg=iwmgray}
+\setbeamercolor{author}{fg=iwmgray}
+\setbeamercolor{date}{fg=iwmgray}
+
+\lstset{language = R,%
+  basicstyle = \ttfamily\color{iwmgray},
+  frame = single,
+  rulecolor = \color{iwmgray},
+  commentstyle = \slshape\color{iwmgreen},
+  keywordstyle = \bfseries\color{iwmgray},
+  identifierstyle = \color{iwmpurple},
+  stringstyle = \color{iwmblue},
+  numbers = none,%left,numberstyle = \tiny,
+  basewidth = {.5em, .4em},
+  showstringspaces = false,
+  emphstyle = \color{red!50!white}}
+
+\title{Introduction to mixed-effects models}
+\subtitle{(for hierarchical data)}
+\author{Nora Wickelmaier}
+\institute{\includegraphics[scale=.2]{iwm_logo_rgb}}
+\date{2025-06-24}
+
+\AtBeginSection[]{
+  \frame{
+    \tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
+
+\setbeamertemplate{headline}{
+ \begin{beamercolorbox}{section in head}
+   \vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
+ \end{beamercolorbox}
+}
+
+\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
+
+\begin{document}
+
+\begin{frame}{}
+\thispagestyle{empty}
+\titlepage
+\end{frame}
+
+\begin{frame}{Plan for today}
+  \begin{itemize}[<+->]
+    \item We will walk through an example for a hierarchical data set (students
+      in schools)
+    \item I will explain the general concepts with the slides
+    \item We will switch to R and use the lme4 package to fit the models
+    \item You will use R to fit an extension of the model
+    \item We will discuss the results\\~\\
+    \item[$\to$] Try to go along in R! Ask as many questions as possible, also
+      the ones you usually do not dare to ask (because you are supposed to know
+      them already or something\dots)
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{Outline}
+\tableofcontents
+\end{frame}
+
+\section[Introduction]{Introduction to random effects}
+
+\begin{frame}{Hierarchical data}
+  \begin{itemize}[<+->]
+    \item Observations often do not come from a simple random sample, but result
+      from a hierarchical structure
+      \begin{itemize}
+        \item Individuals are organized in groups (e.g., students nested in
+          classes, or schools)
+        \item Persons are observed multiple times (observations nested in
+          persons, longitudinal data\nocite{Hedeker2006})
+      \end{itemize}
+    \item Statistical models for this kind of data are called multilevel models,
+      mixed-effects models, random-effects models, covariance components models,
+      or hierarchical models
+  \end{itemize}
+  \vfill
+\end{frame}
+
+\begin{frame}{Example: Mathematics achievement study}
+\begin{itemize}
+  \item The \texttt{hsbdataset.txt} file contains data from the National Center
+    for Education Statistics' (NCES) ``High School \& Beyond'' national survey
+    of U.S.\ public and Catholic high schools
+    \citep{BrykRaudenbush2002}\pause
+  \item The data set consists of information on 7,185 students from 160 schools
+    on student performance on a mathematics test and information concerning
+    their socioeconomic status\pause
+  \item Hierarchical data structure
+  \begin{itemize}
+    \item Students are organized in schools
+    \begin{tabular}{ll}
+        $y_{ij}$ & mathematics achievement of student $j$ in school $i$ \\
+        $x_{ij}$ & (relative) socioeconomic status of student $j$ \\
+                 &  in school $i$ (overall mean 0, centered)
+    \end{tabular}\pause
+    \item Two levels
+    \begin{itemize}
+      \item Level 1: Student attributes
+      \item Level 2: School attributes
+    \end{itemize}
+  \end{itemize}
+\end{itemize}
+\end{frame}
+
+\begin{frame}{Regression with random school effects}
+  \begin{itemize}
+    \item What is the mean math achievement of the students?
+    \item How much do schools vary in mean math achievement?
+  \end{itemize}\pause
+  \vspace{.8cm}
+    \begin{columns}
+      \begin{column}{.4\textwidth}
+        \begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
+        \draw[->] (0,0) -- coordinate (x axis mid) (10,0);
+        \draw[->] (0,0) -- coordinate (y axis mid) (0,30);
+        \node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
+        \node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
+        %
+        \draw[dashed] (0, 15) -- (10, 15) node [right] {$\beta_0$};
+        \draw (0.5, 10) -- (4.5, 10);
+        \draw plot[only marks, mark size=0.5pt, mark=*]
+            coordinates {(1, 9) (2, 12) (3, 13) (4, 8)};
+        \draw (5.5, 20) -- (9.5, 20);
+        \draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 15) -- (7.5, 17.5) node [right] {$\upsilon_{0i}$} -- (7.5, 20);
+        \draw plot[only marks, mark size=0.5pt, mark=*]
+            coordinates {(6, 22) (7, 18) (8, 24) (9, 19)};
+        \draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 20) -- (8, 22) node [right] {$\varepsilon_{ij}$} -- (8, 24);
+        \end{tikzpicture}
+      \end{column}\pause
+      \begin{column}{.6\textwidth}
+        Model equation
+    \begin{align*}
+    \text{(Level 1)}  \quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
+    \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
+    \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
+    \end{align*}
+    with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
+    $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
+        $\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
+      \end{column}
+    \end{columns}
+\end{frame}
+
+% \begin{frame}{Models with random school effects}
+% \begin{itemize}
+%   \item Model equation
+%     \begin{align*}
+%     \text{(Level 1)}  \quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
+%     \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
+%     \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
+%     \end{align*}
+%     with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
+%     $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d, $\upsilon_{0i}$ and
+%     $\varepsilon_{ij}$ independent\pause
+%   \item Interpretation\\[1ex]
+%     \begin{tabular}{lp{9.5cm}}
+%         $\beta_0$ & mean math achievement\\
+%         $\upsilon_{0i}$ & random school effect, mean deviation of school $i$
+%         from math achievement\\
+%         $\varepsilon_{ij}$ & residual of student $j$ in school $i$
+%     \end{tabular}
+% \end{itemize}
+% \end{frame}
+
+\begin{frame}{Null model with random intercepts}
+  {Subset of 9 schools}
+  \centering
+  \vspace{-.5cm}
+  \includegraphics[scale = .6]{../figures/hsb_model1}
+\end{frame}
+
+\begin{frame}{Random effects}
+  \begin{itemize}[<+->]
+  \item The problem of grouping observations in schools and the thereby induced
+    dependencies is solved by introducing school effects
+  \item For many schools this calls for (too) many parameters
+  \item School effects are therefore modeled as random effects (random
+    variables) $\upsilon_{0i}$
+  \item Only their variance $\sigma^2_\upsilon$ has to be estimated in the model
+  \item The total variance of $y_{ij}$ is decomposed into the variance between
+    schools $\sigma^2_\upsilon$ and within schools $\sigma^2$
+\end{itemize}
+\vfill
+\end{frame}
+
+\begin{frame}{Results}
+  \begin{itemize}[<+->]
+  \item The above posed research questions can be answered based on the
+    parameter estimates $\hat\beta_0$, $\hat\sigma^2_\upsilon$ and
+    $\hat\sigma^2$ 
+  \begin{itemize}
+    \item The estimated mean math achievement of students is $\hat\beta_0$
+    \item The estimated variance of schools in mean math achievement is
+      $\hat\sigma^2_\upsilon$
+    \item The proportion of the total variance accounted for by the variance
+      between schools is
+    \[
+      \text{ICC} = \frac{\sigma^2_\upsilon}{\sigma^2_\upsilon + \sigma^2}
+    \]
+    (Intra-class correlation)
+  \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+
+\begin{frame}{Regression with random school effects}
+  \begin{itemize}
+    \item How strong is the relationship between students' socioeconomic status
+      and their math achievement on average?
+    \item How much do schools vary in mean math achievement for students with
+      average socioeconomic status?
+  \end{itemize}\pause
+  \vspace{.4cm}
+    \begin{columns}
+      \begin{column}{.4\textwidth}
+        \begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
+        \draw[->] (0,0) -- coordinate (x axis mid) (10,0);
+        \draw[->] (0,0) -- coordinate (y axis mid) (0,30);
+        \node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
+        \node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
+        %
+        \draw[dashed] (0, 10) node [above right = -0.1cm] {$\beta_0$} -- (10, 20);
+        \draw (1.5, 4.5) -- (5.5, 8.5);
+        \draw plot[only marks, mark size=0.5pt, mark=*]
+            coordinates {(2, 3.5) (3, 7.5) (4, 9.5) (5, 6.5)};
+        \draw (5.5, 22.5) -- (9.5, 26.5);
+        \draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 17.5) -- (7.5, 22) node [right] {$\upsilon_{0i}$} -- (7.5, 24.5);
+        \draw plot[only marks, mark size=0.5pt, mark=*]
+            coordinates {(6, 24) (7, 21) (8, 29) (9.5, 25)};
+        \draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 25) -- (8, 28) node [right] {$\varepsilon_{ij}$} -- (8, 29);
+        \draw (3, 13) -- (6, 13) -- (6, 14) node [right] {$\beta_1$} -- (6, 16);
+        \end{tikzpicture}
+      \end{column}\pause
+      \begin{column}{.6\textwidth}
+        Model equation
+    \begin{align*}
+    \text{(Level 1)}  \quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
+    \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
+                      \quad b_{1i} &= \beta_1\\
+    \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
+                                      \upsilon_{0i} + \varepsilon_{ij}
+    \end{align*}
+    with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
+    $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
+        $\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
+      \end{column}
+    \end{columns}
+\end{frame}
+
+% \begin{frame}{Models with random school effects}
+% \begin{itemize}
+%   \item Regression with random school effects
+%     \begin{align*}
+%     \text{(Level 1)}  \quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
+%     \text{(Level 2)}  \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
+%                       \quad b_{1i} &= \beta_1\\
+%     \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
+%                                       \upsilon_{0i} + \varepsilon_{ij}
+%     \end{align*}
+%     with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ independent,
+%     $\varepsilon_{ij} \sim N(0, \sigma^2)$ independent, $\upsilon_{0i}$ and
+%     $\varepsilon_{ij}$ independent
+%   \item Interpretation\\[1ex]
+%     \begin{tabular}{lp{10cm}}
+%         $\beta_0$ & mean mathematics performance for $x_{ij} = 0$, intercept\\
+%         $\beta_1$ & mean change in mathematics performance for unit change in
+%         socioeconomic status\\
+%         $\upsilon_{0i}$ & school-specific deviation from the mean y-intercept\\
+%         $\varepsilon_{ij}$ & residual of student $j$ in school $i$
+%     \end{tabular}
+% \end{itemize}
+% \end{frame}
+
+\begin{frame}{Model with covariate and random intercepts}
+  {Subset of 9 schools}
+  \centering
+  \vspace{-.5cm}
+  \includegraphics[scale = .6]{../figures/hsb_model2}
+\end{frame}
+
+\begin{frame}{}
+  \begin{block}{Exercise}
+    \begin{itemize}
+      \item What would be the next possible extension of this model?\pause
+      \item Write down the model equations
+        \begin{itemize}
+          \item What changes for the fixed effects?
+          \item How do the variance components for the random effects change?
+        \end{itemize}\pause
+      \item How can we interpret the random slopes for this model?\pause
+      \item How do we add random slopes to a random intercept model using
+        \texttt{lme4::lmer()}?
+      \item Fit a model with random slopes for socioeconomic status in R
+    \end{itemize}
+  \end{block}
+\end{frame}
+
+\begin{frame}{Model with covariate and random slopes}
+  {Subset of 9 schools}
+  \centering
+  \vspace{-.5cm}
+  \includegraphics[scale = .6]{../figures/hsb_model3}
+\end{frame}
+
+
+% \begin{frame}{Mixed-effects models}
+% Possible extensions of the models used so far
+% \begin{itemize}
+%     \item Random effects
+%     \begin{itemize}
+%       \item School specific slopes
+%       \item Correlations between intercept and slope
+%     \end{itemize}
+%     \item Fixed effects
+%     \begin{itemize}
+%       \item At school level: type of school (public vs.\ catholic)
+%       \item At student level: gender, intelligence
+%     \end{itemize}
+%     \item Grouping, levels
+%     \begin{itemize}
+%       \item Classes, school districts or states (hierarchical)
+%       \item Teacher effect within schools (crossed)
+%     \end{itemize}
+% \end{itemize}
+% \end{frame}
+
+% TODO: Include profiling at the end? Maybe to provide some "new" input for
+% people who are already more advanced with LMMs?
+
+\section{Hierarchical modeling}
+
+\begin{frame}{HSB data set}
+  \centering
+  \begin{tabular}{llp{10cm}}
+    \hline
+    Level & Variable & Description \\
+    \hline
+    1 & \texttt{mathach} & Performance in mathematics test \\
+    1 & \texttt{ses} & (relative) socioeconomic status (overall mean 0) \\
+    2 & \texttt{meanses} & mean socioeconomic status of the school (overall mean 0) \\
+    1 & \texttt{cses} & Centered socioeconomic status of the student (mean for each school 0, difference \texttt{ses - meanses}) \\
+    2 & \texttt{school} & school ID \\
+    2 & \texttt{sector} & Public (0) or Catholic High School (1) \\
+    \hline
+  \end{tabular}
+\end{frame}
+
+\begin{frame}{Hierarchical regression model}
+  Model equation
+  \begin{align*}
+    \text{(Level 1)}  \quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
+    \text{(Level 2)}  \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
+                      \quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
+    \text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i\\
+                                   & + \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
+                                   & + \upsilon_{0i} + cses_{ij}\upsilon_{1i} + \varepsilon_{ij}
+    \end{align*}
+with
+\begin{align*}
+  \begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} & \sim
+    N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
+      \begin{pmatrix}
+        \sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
+        \sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
+      \end{pmatrix} \right)~\text{i.i.d} \\
+  \boldsymbol{\varepsilon}_i & \sim N(\mathbf{0}, \, \sigma^2
+  \mathbf{I}_{n_i})~\text{i.i.d}
+\end{align*}
+\end{frame}
+
+\begin{frame}{Decomposing socioeconomic status}
+  \begin{itemize}[<+->]
+    \item In this model, by decomposing the socioeconomic status according to
+      the equation
+      \[
+        ses = cses + meanses
+      \]
+      its differential effectiveness is considered at each of the levels
+    \item At the same time, the effect of the type of school is examined via the
+      variable \texttt{sector}
+    \item Notice that the formulation of the model assumes dependencies of the
+      slope $b_{1i}$ on both mean socioeconomic status and school type, which is
+      captured by the interactions of \texttt{cses} with \texttt{meanses} and
+      \texttt{sector}, respectively
+  \end{itemize}
+\end{frame}
+
+\begin{frame}{}
+  \begin{block}{Exercise}
+    \begin{enumerate}
+      \item Compute the model in R using \texttt{lme4::lmer()}
+        {\scriptsize
+          \begin{align*}
+    \text{(Level 1)}  \quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
+    \text{(Level 2)}  \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
+                      \quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
+    \text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i
+                                    + \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
+                                   & + \upsilon_{0i} + cses_{ij}\upsilon_{1i} + \varepsilon_{ij}
+    \end{align*}
+with
+$
+  \begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} \sim
+    N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
+      \begin{pmatrix}
+        \sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
+        \sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
+      \end{pmatrix} \right)~\text{i.i.d}, 
+  \boldsymbol{\varepsilon}_i \sim N(\mathbf{0}, \, \sigma^2
+  \mathbf{I}_{n_i})~\text{i.i.d}
+        $ }
+      \item Interpret the parameters
+    \end{enumerate}
+  \end{block}
+\end{frame}
+
+\begin{frame}{Results}
+  {Fixed effects}
+  \begin{itemize}[<+->]
+    \item Mean math achievement (i.e., for a student with a mean \texttt{cses}
+      score in a school with a mean \texttt{meanses} score) is 12.11 in Public
+      High Schools and 13.33 in Catholic High Schools
+    \item Effects of socioeconomic status at the two levels
+      \begin{itemize}
+        \item The effect at the student level depends on the type of school:
+          math achievement increases by 2.94 points in Public High Schools and
+          by $2.94 - 1.64 = 1.30$ points in Catholic High Schools for a unit
+          increase in cses
+        \item Higher math achievements are obtained in schools with higher mean
+          socioeconomic status
+        \item In addition, the dependence of math achievement on \texttt{cses}
+          scores is more pronounced in schools with higher \texttt{meanses}
+          scores (estimated interaction $> 0$)
+      \end{itemize}
+  \end{itemize}
+  \vfill
+\end{frame}
+
+\begin{frame}{Results}
+  {Random effects}
+  \begin{itemize}[<+->]
+    \item The estimate $\hat\sigma^2_{\upsilon_0} = 2.32$ of the variance of
+      mean school performance provides room for improving prediction by
+      including additional predictors
+    \item However, there is virtually no variation in the dependence of math
+      achievement on \texttt{cses} across schools ($\hat\sigma^2_{\upsilon_1} =
+      0.07$), which should also be noted when interpreting the reported
+      correlation of 0.48
+    \item  The corresponding covariance has an estimated value of
+      $\hat\sigma_{\upsilon_0\upsilon_1} = 0.48\cdot
+      \hat\sigma_{\upsilon_0}\cdot\hat\sigma_{\upsilon_1} = 0.19$
+    \item These results suggest a simplified model of the dependence of math
+      achievement on \texttt{cses}, where the intercept, but not the slope
+      varies across schools
+  \end{itemize}
+  \vfill
+\end{frame}
+
+\begin{frame}{Summary}
+  \begin{itemize}
+    \item Regression models with fixed and random effects\pause
+      \begin{itemize}
+        \item allow for adequately modeling hierarchical data structures
+          \begin{itemize}
+            \item longitudinal data
+            \item individuals organized in groups (e.g., students in classes, or
+              schools)
+          \end{itemize}\pause
+        \item allow for adequately modeling the sources of error occurring in
+          this context\pause
+        \item offer an optimal trade-off between individual and aggregate data
+          analysis
+          \begin{itemize}
+            \item while individual differences are modeled, information
+              aggregated over the sample is exploited, too
+          \end{itemize}\pause
+      \end{itemize}
+      \item Therefore, linear mixed-effects models allow for integrating
+        differential and general psychological aspects within a common
+        theoretical framework
+  \end{itemize}
+  \vfill
+\end{frame}
+
+\begin{frame}{What we learned today\dots}
+  {\dots and how to go on}
+  \pause
+  \begin{enumerate}[<+->]
+    \item We learned
+      \begin{itemize}
+        \item The basic concept of random effects and why to include them in a
+          model
+        \item How to compute a linear mixed-effects model in R using
+          \texttt{lmer()} from the lme4 package
+        \item How to use a hierarchical model to separate individual and school
+          differences
+        \item How to interpret parameters in a linear mixed-effects model
+      \end{itemize}
+    \item Next steps
+      \begin{itemize}
+        \item Do this exercise \url{xxx} using the JSP data set in R
+        \item It has a very similar structure than the HSB data set and this
+          will help you to generalize the concepts we learned today
+        \item You can send questions to me and even make an appointment with me
+          to go over your solution
+      \end{itemize}
+  \end{enumerate}
+\end{frame}
+
+\appendix
+%\begin{frame}[allowframebreaks]{References}
+\begin{frame}{References}
+  \printbibliography
+  \vfill
+\end{frame}
+
+\end{document}
+
--- a/slides/lit.bib
+++ b/slides/lit.bib
@ -0,0 +1,17 @@
+@book{BrykRaudenbush2002,
+  title     = {Hierarchical linear models: {A}pplications and data analysis methods},
+  author    = {Raudenbush, Stephen W and Bryk, Anthony S},
+  volume    = {1},
+  year      = {2002},
+  publisher = {Sage}
+}
+
+
+@book{Hedeker2006,
+  author    = {Hedeker, D. R. and Gibbons, R. D.},
+  title     = {Longitudinal data analysis},
+  year      = {2006},
+  address   = {Hoboken},
+  publisher = {John Wiley}
+}
+