Initialize repository
This commit is contained in:
commit
72bd3fcaa7
30
README.md
Normal file
30
README.md
Normal file
@ -0,0 +1,30 @@
|
||||
# Abstract
|
||||
|
||||
This workshop will give a short introduction to linear mixed-effects models with
|
||||
an example from educational science. We will look at a hierarchical data set
|
||||
containing students in schools. The first part will be a basic introduction of
|
||||
the concept of random effects and how they extend linear regression. This is
|
||||
meant to get everybody on the same page and introduce some notation. We will fit
|
||||
these simple models together in R using the lme4 package. In the second part, a
|
||||
more complex hierarchical model will be introduced. We will try to understand
|
||||
how models like these can be used to answer research questions concerning
|
||||
different levels of the data. Again, we will use R to fit this model. This
|
||||
course is suited for novices to mixed-effects models who want to understand the
|
||||
basic concepts, but also for people with a bit more expertise using hierarchical
|
||||
models who want to dig into more details and deepen their understanding of
|
||||
parameter interpretation.
|
||||
|
||||
# Instructor / speaker
|
||||
|
||||
Dr. Nora Wickelmaier, statistics consultant at the Leibniz-Institut für
|
||||
Wissensmedien (IWM), Tübingen
|
||||
|
||||
# Prerequisites
|
||||
|
||||
Participants will need to have installed:
|
||||
|
||||
* a current R version (https://cran.r-project.org/)
|
||||
* an IDE for R (like RStudio or VSCode) or a text editor with syntax
|
||||
highlighting (like Vim or Notepad++)
|
||||
* the R package lme4 (https://cran.r-project.org/package=lme4)
|
||||
|
94
code/hsb.R
Normal file
94
code/hsb.R
Normal file
@ -0,0 +1,94 @@
|
||||
library(lme4)
|
||||
library(lattice)
|
||||
|
||||
dat <- read.table("data/hsbdataset.txt", header = TRUE)
|
||||
|
||||
dat$gmmath <- mean(dat$mathach)
|
||||
dat$meanmath <- with(dat, ave(mathach, school))
|
||||
|
||||
plot(dat$ses - dat$meanses, dat$cses)
|
||||
|
||||
|
||||
xyplot(mathach ~ cses, dat)
|
||||
xyplot(mathach + meanmath + gmmath ~ cses | factor(school), dat,
|
||||
type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
xyplot(gmmath + meanmath ~ cses | factor(school), dat, type = "r")
|
||||
|
||||
|
||||
m1 <- lmer(mathach ~ 1 + (1 | school), dat)
|
||||
|
||||
xyplot(mathach + predict(m1) + predict(m1, re.form = NA) ~ cses | factor(school),
|
||||
dat, type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
# ICC
|
||||
VarCorr(m1)[[1]] / (VarCorr(m1)[[1]] + sigma(m1)^2)
|
||||
|
||||
sjPlot::tab_model(m1)
|
||||
|
||||
|
||||
m2 <- lmer(mathach ~ cses + (1 | school), dat)
|
||||
|
||||
xyplot(mathach + predict(m2) + predict(m2, re.form = NA) ~ cses | factor(school),
|
||||
dat, type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
# ICC
|
||||
VarCorr(m2)[[1]] / (VarCorr(m2)[[1]] + sigma(m2)^2)
|
||||
|
||||
sjPlot::tab_model(m1, m2)
|
||||
|
||||
|
||||
m3 <- lmer(mathach ~ cses + (cses | school), dat)
|
||||
|
||||
xyplot(mathach + predict(m3) + predict(m3, re.form = NA) ~ cses | factor(school),
|
||||
dat, type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
sjPlot::tab_model(m1, m2, m3)
|
||||
|
||||
|
||||
m4 <- lmer(mathach ~ cses + sector + (cses | school), data = dat)
|
||||
|
||||
sjPlot::tab_model(m1, m2, m3, m4)
|
||||
|
||||
m5 <- lmer(mathach ~ cses * sector + (cses | school), data = dat)
|
||||
|
||||
sjPlot::tab_model(m1, m2, m3, m4, m5)
|
||||
|
||||
xyplot(mathach ~ cses, data = dat, groups = sector, type = c("p", "r"))
|
||||
|
||||
|
||||
|
||||
lmm.1 <- lmer(mathach ~ meanses*cses + sector*cses + (1 | school), data = dat,
|
||||
REML = FALSE)
|
||||
|
||||
lmm.2 <- lmer(mathach ~ meanses*cses + sector*cses + (1 + cses | school),
|
||||
data = dat, REML = FALSE)
|
||||
|
||||
c <- seq(-2, 2, length = 51)
|
||||
m <- seq(-1, 1, length = 26)
|
||||
ndat <- expand.grid(c, m)
|
||||
|
||||
colnames(ndat) <- c("cses", "meanses")
|
||||
|
||||
ndat$sector <- factor(0, levels = c("0", "1"))
|
||||
|
||||
z <- matrix(predict(lmm.2, newdata=ndat, re.form=NA), 51)
|
||||
|
||||
persp(c, m, z, theta = 40, phi = 20, col = "lightblue", ltheta = 60, shade = .9,
|
||||
xlab = "cses", ylab = "meanses", zlab = "mathach", main = "Model 2")
|
||||
|
||||
lmm.3 <- lmer(mathach ~ meanses + sector*cses + (1 + cses | school),
|
||||
data = dat, REML = FALSE)
|
||||
|
||||
z <- matrix(predict(lmm.3, newdata = ndat, re.form = NA), nrow = 51)
|
||||
|
||||
persp(c, m, z, theta = 40, phi = 20, col = "lightblue", ltheta = 60, shade = .9,
|
||||
xlab = "cses", ylab = "meanses", zlab = "mathach", main = "Model 3")
|
||||
|
||||
|
||||
# TODO: Add profiling to show instability of parameter estimation?
|
||||
|
7191
data/hsbdataset.txt
Normal file
7191
data/hsbdataset.txt
Normal file
File diff suppressed because it is too large
Load Diff
73
figures/hsb_model.R
Normal file
73
figures/hsb_model.R
Normal file
@ -0,0 +1,73 @@
|
||||
library(lme4)
|
||||
library(lattice)
|
||||
|
||||
dat <- read.table("data/hsbdataset.txt", header = TRUE)
|
||||
|
||||
# Model 1 - null model with random intercepts
|
||||
m1 <- lmer(mathach ~ 1 + (1 | school), dat)
|
||||
|
||||
# Plot with subsample of schools
|
||||
set.seed(1427)
|
||||
subdat <- subset(dat, dat$school %in% sample(unique(dat$school), size = 9))
|
||||
|
||||
pdf("figures/hsb_model1.pdf", height = 5, width = 5)
|
||||
xyplot(mathach + predict(m1, newdata = subdat) +
|
||||
predict(m1, re.form = NA, newdata = subdat) ~ cses | factor(school),
|
||||
subdat,
|
||||
pch = 16,
|
||||
cex = 0.4,
|
||||
type = c("p", "r", "r"),
|
||||
lwd = 2,
|
||||
xlab = "socioeconomic status",
|
||||
ylab = "math performance",
|
||||
distribute.type = TRUE,
|
||||
par.strip.text = list(cex = 0.8),
|
||||
col = c("gray80", "#91C86E", "#78004B"))
|
||||
dev.off()
|
||||
|
||||
# Model 2 - random intercept model
|
||||
m2 <- lmer(mathach ~ cses + (1 | school), dat)
|
||||
|
||||
xyplot(mathach + predict(m2) + predict(m2, re.form = NA) ~ cses | factor(school),
|
||||
dat, type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
|
||||
pdf("figures/hsb_model2.pdf", height = 5, width = 5)
|
||||
xyplot(mathach + predict(m2, newdata = subdat) +
|
||||
predict(m2, re.form = NA, newdata = subdat) ~ cses | factor(school),
|
||||
subdat,
|
||||
pch = 16,
|
||||
cex = 0.4,
|
||||
type = c("p", "r", "r"),
|
||||
lwd = 2,
|
||||
xlab = "socioeconomic status",
|
||||
ylab = "math performance",
|
||||
distribute.type = TRUE,
|
||||
par.strip.text = list(cex = 0.8),
|
||||
col = c("gray80", "#91C86E", "#78004B"))
|
||||
dev.off()
|
||||
|
||||
# Model 3 - random slope model
|
||||
m3 <- lmer(mathach ~ cses + (cses | school), dat)
|
||||
|
||||
xyplot(mathach + predict(m3) + predict(m3, re.form = NA) ~ cses | factor(school),
|
||||
dat, type = c("p", "r", "r"), distribute.type = TRUE,
|
||||
col = c("#91C86E", "#91C86E", "#78004B"))
|
||||
|
||||
|
||||
pdf("figures/hsb_model3.pdf", height = 5, width = 5)
|
||||
xyplot(mathach + predict(m3, newdata = subdat) +
|
||||
predict(m3, re.form = NA, newdata = subdat) ~ cses | factor(school),
|
||||
subdat,
|
||||
pch = 16,
|
||||
cex = 0.4,
|
||||
type = c("p", "r", "r"),
|
||||
lwd = 2,
|
||||
xlab = "socioeconomic status",
|
||||
ylab = "math performance",
|
||||
distribute.type = TRUE,
|
||||
par.strip.text = list(cex = 0.8),
|
||||
col = c("gray80", "#91C86E", "#78004B"))
|
||||
dev.off()
|
||||
|
BIN
figures/hsb_model1.pdf
Normal file
BIN
figures/hsb_model1.pdf
Normal file
Binary file not shown.
BIN
figures/hsb_model2.pdf
Normal file
BIN
figures/hsb_model2.pdf
Normal file
Binary file not shown.
BIN
figures/hsb_model3.pdf
Normal file
BIN
figures/hsb_model3.pdf
Normal file
Binary file not shown.
BIN
slides/iwm_logo_rgb.pdf
Normal file
BIN
slides/iwm_logo_rgb.pdf
Normal file
Binary file not shown.
BIN
slides/lead_lmm.pdf
Normal file
BIN
slides/lead_lmm.pdf
Normal file
Binary file not shown.
551
slides/lead_lmm.tex
Normal file
551
slides/lead_lmm.tex
Normal file
@ -0,0 +1,551 @@
|
||||
\documentclass[aspectratio=169]{beamer}
|
||||
|
||||
\usepackage{listings}
|
||||
\usepackage[utf8,latin1]{inputenc}
|
||||
\usepackage[style = apa, backend = biber, natbib = true]{biblatex}
|
||||
\usepackage{tikz}
|
||||
\addbibresource{lit.bib}
|
||||
|
||||
\makeatletter \def\newblock{\beamer@newblock} \makeatother
|
||||
|
||||
\beamertemplatenavigationsymbolsempty
|
||||
\setbeamertemplate{itemize items}[circle]
|
||||
\setbeamertemplate{section in toc}[circle]
|
||||
\mode<beamer>{\setbeamercolor{math text displayed}{fg=iwmgray}}
|
||||
\setbeamercolor{block body}{bg=iwmorange!50!white}
|
||||
\setbeamercolor{block title}{fg=white, bg=iwmorange}
|
||||
|
||||
% Definitions for biblatex
|
||||
\setbeamercolor{bibliography entry note}{fg=iwmgray}
|
||||
\setbeamercolor{bibliography entry author}{fg=iwmgray}
|
||||
\setbeamertemplate{bibliography item}{}
|
||||
|
||||
\definecolor{iwmorange}{RGB}{255,105,0}
|
||||
\definecolor{iwmgray}{RGB}{67,79,79}
|
||||
\definecolor{iwmblue}{RGB}{60,180,220}
|
||||
\definecolor{iwmgreen}{RGB}{145,200,110}
|
||||
\definecolor{iwmpurple}{RGB}{120,0,75}
|
||||
|
||||
\setbeamercolor{title}{fg=iwmorange}
|
||||
\setbeamercolor{frametitle}{fg=iwmorange}
|
||||
\setbeamercolor{structure}{fg=iwmorange}
|
||||
\setbeamercolor{normal text}{fg=iwmgray}
|
||||
\setbeamercolor{author}{fg=iwmgray}
|
||||
\setbeamercolor{date}{fg=iwmgray}
|
||||
|
||||
\lstset{language = R,%
|
||||
basicstyle = \ttfamily\color{iwmgray},
|
||||
frame = single,
|
||||
rulecolor = \color{iwmgray},
|
||||
commentstyle = \slshape\color{iwmgreen},
|
||||
keywordstyle = \bfseries\color{iwmgray},
|
||||
identifierstyle = \color{iwmpurple},
|
||||
stringstyle = \color{iwmblue},
|
||||
numbers = none,%left,numberstyle = \tiny,
|
||||
basewidth = {.5em, .4em},
|
||||
showstringspaces = false,
|
||||
emphstyle = \color{red!50!white}}
|
||||
|
||||
\title{Introduction to mixed-effects models}
|
||||
\subtitle{(for hierarchical data)}
|
||||
\author{Nora Wickelmaier}
|
||||
\institute{\includegraphics[scale=.2]{iwm_logo_rgb}}
|
||||
\date{2025-06-24}
|
||||
|
||||
\AtBeginSection[]{
|
||||
\frame{
|
||||
\tableofcontents[sectionstyle=show/hide, subsectionstyle=show/show/hide]}}
|
||||
|
||||
\setbeamertemplate{headline}{
|
||||
\begin{beamercolorbox}{section in head}
|
||||
\vskip5pt\insertsectionnavigationhorizontal{\paperwidth}{}{}\vskip2pt
|
||||
\end{beamercolorbox}
|
||||
}
|
||||
|
||||
\setbeamertemplate{footline}{\vskip-2pt\hfill\insertframenumber$\;$\vskip2pt}
|
||||
|
||||
\begin{document}
|
||||
|
||||
\begin{frame}{}
|
||||
\thispagestyle{empty}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Plan for today}
|
||||
\begin{itemize}[<+->]
|
||||
\item We will walk through an example for a hierarchical data set (students
|
||||
in schools)
|
||||
\item I will explain the general concepts with the slides
|
||||
\item We will switch to R and use the lme4 package to fit the models
|
||||
\item You will use R to fit an extension of the model
|
||||
\item We will discuss the results\\~\\
|
||||
\item[$\to$] Try to go along in R! Ask as many questions as possible, also
|
||||
the ones you usually do not dare to ask (because you are supposed to know
|
||||
them already or something\dots)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Outline}
|
||||
\tableofcontents
|
||||
\end{frame}
|
||||
|
||||
\section[Introduction]{Introduction to random effects}
|
||||
|
||||
\begin{frame}{Hierarchical data}
|
||||
\begin{itemize}[<+->]
|
||||
\item Observations often do not come from a simple random sample, but result
|
||||
from a hierarchical structure
|
||||
\begin{itemize}
|
||||
\item Individuals are organized in groups (e.g., students nested in
|
||||
classes, or schools)
|
||||
\item Persons are observed multiple times (observations nested in
|
||||
persons, longitudinal data\nocite{Hedeker2006})
|
||||
\end{itemize}
|
||||
\item Statistical models for this kind of data are called multilevel models,
|
||||
mixed-effects models, random-effects models, covariance components models,
|
||||
or hierarchical models
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Example: Mathematics achievement study}
|
||||
\begin{itemize}
|
||||
\item The \texttt{hsbdataset.txt} file contains data from the National Center
|
||||
for Education Statistics' (NCES) ``High School \& Beyond'' national survey
|
||||
of U.S.\ public and Catholic high schools
|
||||
\citep{BrykRaudenbush2002}\pause
|
||||
\item The data set consists of information on 7,185 students from 160 schools
|
||||
on student performance on a mathematics test and information concerning
|
||||
their socioeconomic status\pause
|
||||
\item Hierarchical data structure
|
||||
\begin{itemize}
|
||||
\item Students are organized in schools
|
||||
\begin{tabular}{ll}
|
||||
$y_{ij}$ & mathematics achievement of student $j$ in school $i$ \\
|
||||
$x_{ij}$ & (relative) socioeconomic status of student $j$ \\
|
||||
& in school $i$ (overall mean 0, centered)
|
||||
\end{tabular}\pause
|
||||
\item Two levels
|
||||
\begin{itemize}
|
||||
\item Level 1: Student attributes
|
||||
\item Level 2: School attributes
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Regression with random school effects}
|
||||
\begin{itemize}
|
||||
\item What is the mean math achievement of the students?
|
||||
\item How much do schools vary in mean math achievement?
|
||||
\end{itemize}\pause
|
||||
\vspace{.8cm}
|
||||
\begin{columns}
|
||||
\begin{column}{.4\textwidth}
|
||||
\begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
|
||||
\draw[->] (0,0) -- coordinate (x axis mid) (10,0);
|
||||
\draw[->] (0,0) -- coordinate (y axis mid) (0,30);
|
||||
\node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
|
||||
\node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
|
||||
%
|
||||
\draw[dashed] (0, 15) -- (10, 15) node [right] {$\beta_0$};
|
||||
\draw (0.5, 10) -- (4.5, 10);
|
||||
\draw plot[only marks, mark size=0.5pt, mark=*]
|
||||
coordinates {(1, 9) (2, 12) (3, 13) (4, 8)};
|
||||
\draw (5.5, 20) -- (9.5, 20);
|
||||
\draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 15) -- (7.5, 17.5) node [right] {$\upsilon_{0i}$} -- (7.5, 20);
|
||||
\draw plot[only marks, mark size=0.5pt, mark=*]
|
||||
coordinates {(6, 22) (7, 18) (8, 24) (9, 19)};
|
||||
\draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 20) -- (8, 22) node [right] {$\varepsilon_{ij}$} -- (8, 24);
|
||||
\end{tikzpicture}
|
||||
\end{column}\pause
|
||||
\begin{column}{.6\textwidth}
|
||||
Model equation
|
||||
\begin{align*}
|
||||
\text{(Level 1)} \quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
|
||||
\text{(Level 2)} \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
|
||||
\text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
|
||||
\end{align*}
|
||||
with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
|
||||
$\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
|
||||
$\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
% \begin{frame}{Models with random school effects}
|
||||
% \begin{itemize}
|
||||
% \item Model equation
|
||||
% \begin{align*}
|
||||
% \text{(Level 1)} \quad y_{ij} &= b_{0i} + \varepsilon_{ij}\\
|
||||
% \text{(Level 2)} \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
|
||||
% \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \upsilon_{0i} + \varepsilon_{ij}
|
||||
% \end{align*}
|
||||
% with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
|
||||
% $\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d, $\upsilon_{0i}$ and
|
||||
% $\varepsilon_{ij}$ independent\pause
|
||||
% \item Interpretation\\[1ex]
|
||||
% \begin{tabular}{lp{9.5cm}}
|
||||
% $\beta_0$ & mean math achievement\\
|
||||
% $\upsilon_{0i}$ & random school effect, mean deviation of school $i$
|
||||
% from math achievement\\
|
||||
% $\varepsilon_{ij}$ & residual of student $j$ in school $i$
|
||||
% \end{tabular}
|
||||
% \end{itemize}
|
||||
% \end{frame}
|
||||
|
||||
\begin{frame}{Null model with random intercepts}
|
||||
{Subset of 9 schools}
|
||||
\centering
|
||||
\vspace{-.5cm}
|
||||
\includegraphics[scale = .6]{../figures/hsb_model1}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Random effects}
|
||||
\begin{itemize}[<+->]
|
||||
\item The problem of grouping observations in schools and the thereby induced
|
||||
dependencies is solved by introducing school effects
|
||||
\item For many schools this calls for (too) many parameters
|
||||
\item School effects are therefore modeled as random effects (random
|
||||
variables) $\upsilon_{0i}$
|
||||
\item Only their variance $\sigma^2_\upsilon$ has to be estimated in the model
|
||||
\item The total variance of $y_{ij}$ is decomposed into the variance between
|
||||
schools $\sigma^2_\upsilon$ and within schools $\sigma^2$
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Results}
|
||||
\begin{itemize}[<+->]
|
||||
\item The above posed research questions can be answered based on the
|
||||
parameter estimates $\hat\beta_0$, $\hat\sigma^2_\upsilon$ and
|
||||
$\hat\sigma^2$
|
||||
\begin{itemize}
|
||||
\item The estimated mean math achievement of students is $\hat\beta_0$
|
||||
\item The estimated variance of schools in mean math achievement is
|
||||
$\hat\sigma^2_\upsilon$
|
||||
\item The proportion of the total variance accounted for by the variance
|
||||
between schools is
|
||||
\[
|
||||
\text{ICC} = \frac{\sigma^2_\upsilon}{\sigma^2_\upsilon + \sigma^2}
|
||||
\]
|
||||
(Intra-class correlation)
|
||||
\end{itemize}
|
||||
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
|
||||
\begin{frame}{Regression with random school effects}
|
||||
\begin{itemize}
|
||||
\item How strong is the relationship between students' socioeconomic status
|
||||
and their math achievement on average?
|
||||
\item How much do schools vary in mean math achievement for students with
|
||||
average socioeconomic status?
|
||||
\end{itemize}\pause
|
||||
\vspace{.4cm}
|
||||
\begin{columns}
|
||||
\begin{column}{.4\textwidth}
|
||||
\begin{tikzpicture}[>=stealth, y=.1cm, x=.4cm, font=\footnotesize]
|
||||
\draw[->] (0,0) -- coordinate (x axis mid) (10,0);
|
||||
\draw[->] (0,0) -- coordinate (y axis mid) (0,30);
|
||||
\node[below=0.1cm] at (x axis mid) {socioeconomic status $x$};
|
||||
\node[rotate=90, above=0.0cm] at (y axis mid) {math performance $y$};
|
||||
%
|
||||
\draw[dashed] (0, 10) node [above right = -0.1cm] {$\beta_0$} -- (10, 20);
|
||||
\draw (1.5, 4.5) -- (5.5, 8.5);
|
||||
\draw plot[only marks, mark size=0.5pt, mark=*]
|
||||
coordinates {(2, 3.5) (3, 7.5) (4, 9.5) (5, 6.5)};
|
||||
\draw (5.5, 22.5) -- (9.5, 26.5);
|
||||
\draw[<->, shorten <= 1pt, shorten >= 1pt] (7.5, 17.5) -- (7.5, 22) node [right] {$\upsilon_{0i}$} -- (7.5, 24.5);
|
||||
\draw plot[only marks, mark size=0.5pt, mark=*]
|
||||
coordinates {(6, 24) (7, 21) (8, 29) (9.5, 25)};
|
||||
\draw[<->, shorten <= 1pt, shorten >= 1pt] (8, 25) -- (8, 28) node [right] {$\varepsilon_{ij}$} -- (8, 29);
|
||||
\draw (3, 13) -- (6, 13) -- (6, 14) node [right] {$\beta_1$} -- (6, 16);
|
||||
\end{tikzpicture}
|
||||
\end{column}\pause
|
||||
\begin{column}{.6\textwidth}
|
||||
Model equation
|
||||
\begin{align*}
|
||||
\text{(Level 1)} \quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
|
||||
\text{(Level 2)} \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
|
||||
\quad b_{1i} &= \beta_1\\
|
||||
\text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
|
||||
\upsilon_{0i} + \varepsilon_{ij}
|
||||
\end{align*}
|
||||
with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ i.i.d,
|
||||
$\varepsilon_{ij} \sim N(0, \sigma^2)$ i.i.d,\\
|
||||
$\upsilon_{0i}$ and $\varepsilon_{ij}$ independent
|
||||
\end{column}
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
|
||||
% \begin{frame}{Models with random school effects}
|
||||
% \begin{itemize}
|
||||
% \item Regression with random school effects
|
||||
% \begin{align*}
|
||||
% \text{(Level 1)} \quad y_{ij} &= b_{0i} + b_{1i}\,x_{ij} + \varepsilon_{ij}\\
|
||||
% \text{(Level 2)} \quad b_{0i} &= \beta_0 + \upsilon_{0i}\\
|
||||
% \quad b_{1i} &= \beta_1\\
|
||||
% \text{(2) in (1)} \quad y_{ij} &= \beta_0 + \beta_1\,x_{ij} +
|
||||
% \upsilon_{0i} + \varepsilon_{ij}
|
||||
% \end{align*}
|
||||
% with $\upsilon_{0i} \sim N(0, \sigma^2_{\upsilon})$ independent,
|
||||
% $\varepsilon_{ij} \sim N(0, \sigma^2)$ independent, $\upsilon_{0i}$ and
|
||||
% $\varepsilon_{ij}$ independent
|
||||
% \item Interpretation\\[1ex]
|
||||
% \begin{tabular}{lp{10cm}}
|
||||
% $\beta_0$ & mean mathematics performance for $x_{ij} = 0$, intercept\\
|
||||
% $\beta_1$ & mean change in mathematics performance for unit change in
|
||||
% socioeconomic status\\
|
||||
% $\upsilon_{0i}$ & school-specific deviation from the mean y-intercept\\
|
||||
% $\varepsilon_{ij}$ & residual of student $j$ in school $i$
|
||||
% \end{tabular}
|
||||
% \end{itemize}
|
||||
% \end{frame}
|
||||
|
||||
\begin{frame}{Model with covariate and random intercepts}
|
||||
{Subset of 9 schools}
|
||||
\centering
|
||||
\vspace{-.5cm}
|
||||
\includegraphics[scale = .6]{../figures/hsb_model2}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{}
|
||||
\begin{block}{Exercise}
|
||||
\begin{itemize}
|
||||
\item What would be the next possible extension of this model?\pause
|
||||
\item Write down the model equations
|
||||
\begin{itemize}
|
||||
\item What changes for the fixed effects?
|
||||
\item How do the variance components for the random effects change?
|
||||
\end{itemize}\pause
|
||||
\item How can we interpret the random slopes for this model?\pause
|
||||
\item How do we add random slopes to a random intercept model using
|
||||
\texttt{lme4::lmer()}?
|
||||
\item Fit a model with random slopes for socioeconomic status in R
|
||||
\end{itemize}
|
||||
\end{block}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Model with covariate and random slopes}
|
||||
{Subset of 9 schools}
|
||||
\centering
|
||||
\vspace{-.5cm}
|
||||
\includegraphics[scale = .6]{../figures/hsb_model3}
|
||||
\end{frame}
|
||||
|
||||
|
||||
% \begin{frame}{Mixed-effects models}
|
||||
% Possible extensions of the models used so far
|
||||
% \begin{itemize}
|
||||
% \item Random effects
|
||||
% \begin{itemize}
|
||||
% \item School specific slopes
|
||||
% \item Correlations between intercept and slope
|
||||
% \end{itemize}
|
||||
% \item Fixed effects
|
||||
% \begin{itemize}
|
||||
% \item At school level: type of school (public vs.\ catholic)
|
||||
% \item At student level: gender, intelligence
|
||||
% \end{itemize}
|
||||
% \item Grouping, levels
|
||||
% \begin{itemize}
|
||||
% \item Classes, school districts or states (hierarchical)
|
||||
% \item Teacher effect within schools (crossed)
|
||||
% \end{itemize}
|
||||
% \end{itemize}
|
||||
% \end{frame}
|
||||
|
||||
% TODO: Include profiling at the end? Maybe to provide some "new" input for
|
||||
% people who are already more advanced with LMMs?
|
||||
|
||||
\section{Hierarchical modeling}
|
||||
|
||||
\begin{frame}{HSB data set}
|
||||
\centering
|
||||
\begin{tabular}{llp{10cm}}
|
||||
\hline
|
||||
Level & Variable & Description \\
|
||||
\hline
|
||||
1 & \texttt{mathach} & Performance in mathematics test \\
|
||||
1 & \texttt{ses} & (relative) socioeconomic status (overall mean 0) \\
|
||||
2 & \texttt{meanses} & mean socioeconomic status of the school (overall mean 0) \\
|
||||
1 & \texttt{cses} & Centered socioeconomic status of the student (mean for each school 0, difference \texttt{ses - meanses}) \\
|
||||
2 & \texttt{school} & school ID \\
|
||||
2 & \texttt{sector} & Public (0) or Catholic High School (1) \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Hierarchical regression model}
|
||||
Model equation
|
||||
\begin{align*}
|
||||
\text{(Level 1)} \quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
|
||||
\text{(Level 2)} \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
|
||||
\quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
|
||||
\text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i\\
|
||||
& + \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
|
||||
& + \upsilon_{0i} + cses_{ij}\upsilon_{1i} + \varepsilon_{ij}
|
||||
\end{align*}
|
||||
with
|
||||
\begin{align*}
|
||||
\begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} & \sim
|
||||
N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
|
||||
\begin{pmatrix}
|
||||
\sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
|
||||
\sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
|
||||
\end{pmatrix} \right)~\text{i.i.d} \\
|
||||
\boldsymbol{\varepsilon}_i & \sim N(\mathbf{0}, \, \sigma^2
|
||||
\mathbf{I}_{n_i})~\text{i.i.d}
|
||||
\end{align*}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Decomposing socioeconomic status}
|
||||
\begin{itemize}[<+->]
|
||||
\item In this model, by decomposing the socioeconomic status according to
|
||||
the equation
|
||||
\[
|
||||
ses = cses + meanses
|
||||
\]
|
||||
its differential effectiveness is considered at each of the levels
|
||||
\item At the same time, the effect of the type of school is examined via the
|
||||
variable \texttt{sector}
|
||||
\item Notice that the formulation of the model assumes dependencies of the
|
||||
slope $b_{1i}$ on both mean socioeconomic status and school type, which is
|
||||
captured by the interactions of \texttt{cses} with \texttt{meanses} and
|
||||
\texttt{sector}, respectively
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{}
|
||||
\begin{block}{Exercise}
|
||||
\begin{enumerate}
|
||||
\item Compute the model in R using \texttt{lme4::lmer()}
|
||||
{\scriptsize
|
||||
\begin{align*}
|
||||
\text{(Level 1)} \quad y_{ij} =&~b_{0i} + b_{1i}\,cses_{ij} + \varepsilon_{ij}\\
|
||||
\text{(Level 2)} \quad b_{0i} =&~\beta_0 + \beta_2 meanses_i + \beta_4 sector_i + \upsilon_{0i}\\
|
||||
\quad b_{1i} =&~\beta_1 + \beta_3 meanses_i + \beta_5 sector_i + \upsilon_{1i}\\
|
||||
\text{(2) in (1)} \quad y_{ij} =&~\beta_0 + \beta_1\,cses_{ij} + \beta_2 meanses_i + \beta_4 sector_i
|
||||
+ \beta_3 (cses_{ij} \times meanses_i) + \beta_5 (cses_{ij} \times sector_i) \\
|
||||
& + \upsilon_{0i} + cses_{ij}\upsilon_{1i} + \varepsilon_{ij}
|
||||
\end{align*}
|
||||
with
|
||||
$
|
||||
\begin{pmatrix} \upsilon_{0i}\\ \upsilon_{1i} \end{pmatrix} \sim
|
||||
N \left(\begin{pmatrix} 0\\ 0 \end{pmatrix}, \, \boldsymbol{\Sigma}_\upsilon =
|
||||
\begin{pmatrix}
|
||||
\sigma^2_{\upsilon_0} & \sigma_{\upsilon_0 \upsilon_1} \\
|
||||
\sigma_{\upsilon_0 \upsilon_1} & \sigma^2_{\upsilon_1} \\
|
||||
\end{pmatrix} \right)~\text{i.i.d},
|
||||
\boldsymbol{\varepsilon}_i \sim N(\mathbf{0}, \, \sigma^2
|
||||
\mathbf{I}_{n_i})~\text{i.i.d}
|
||||
$ }
|
||||
\item Interpret the parameters
|
||||
\end{enumerate}
|
||||
\end{block}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Results}
|
||||
{Fixed effects}
|
||||
\begin{itemize}[<+->]
|
||||
\item Mean math achievement (i.e., for a student with a mean \texttt{cses}
|
||||
score in a school with a mean \texttt{meanses} score) is 12.11 in Public
|
||||
High Schools and 13.33 in Catholic High Schools
|
||||
\item Effects of socioeconomic status at the two levels
|
||||
\begin{itemize}
|
||||
\item The effect at the student level depends on the type of school:
|
||||
math achievement increases by 2.94 points in Public High Schools and
|
||||
by $2.94 - 1.64 = 1.30$ points in Catholic High Schools for a unit
|
||||
increase in cses
|
||||
\item Higher math achievements are obtained in schools with higher mean
|
||||
socioeconomic status
|
||||
\item In addition, the dependence of math achievement on \texttt{cses}
|
||||
scores is more pronounced in schools with higher \texttt{meanses}
|
||||
scores (estimated interaction $> 0$)
|
||||
\end{itemize}
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Results}
|
||||
{Random effects}
|
||||
\begin{itemize}[<+->]
|
||||
\item The estimate $\hat\sigma^2_{\upsilon_0} = 2.32$ of the variance of
|
||||
mean school performance provides room for improving prediction by
|
||||
including additional predictors
|
||||
\item However, there is virtually no variation in the dependence of math
|
||||
achievement on \texttt{cses} across schools ($\hat\sigma^2_{\upsilon_1} =
|
||||
0.07$), which should also be noted when interpreting the reported
|
||||
correlation of 0.48
|
||||
\item The corresponding covariance has an estimated value of
|
||||
$\hat\sigma_{\upsilon_0\upsilon_1} = 0.48\cdot
|
||||
\hat\sigma_{\upsilon_0}\cdot\hat\sigma_{\upsilon_1} = 0.19$
|
||||
\item These results suggest a simplified model of the dependence of math
|
||||
achievement on \texttt{cses}, where the intercept, but not the slope
|
||||
varies across schools
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Summary}
|
||||
\begin{itemize}
|
||||
\item Regression models with fixed and random effects\pause
|
||||
\begin{itemize}
|
||||
\item allow for adequately modeling hierarchical data structures
|
||||
\begin{itemize}
|
||||
\item longitudinal data
|
||||
\item individuals organized in groups (e.g., students in classes, or
|
||||
schools)
|
||||
\end{itemize}\pause
|
||||
\item allow for adequately modeling the sources of error occurring in
|
||||
this context\pause
|
||||
\item offer an optimal trade-off between individual and aggregate data
|
||||
analysis
|
||||
\begin{itemize}
|
||||
\item while individual differences are modeled, information
|
||||
aggregated over the sample is exploited, too
|
||||
\end{itemize}\pause
|
||||
\end{itemize}
|
||||
\item Therefore, linear mixed-effects models allow for integrating
|
||||
differential and general psychological aspects within a common
|
||||
theoretical framework
|
||||
\end{itemize}
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{What we learned today\dots}
|
||||
{\dots and how to go on}
|
||||
\pause
|
||||
\begin{enumerate}[<+->]
|
||||
\item We learned
|
||||
\begin{itemize}
|
||||
\item The basic concept of random effects and why to include them in a
|
||||
model
|
||||
\item How to compute a linear mixed-effects model in R using
|
||||
\texttt{lmer()} from the lme4 package
|
||||
\item How to use a hierarchical model to separate individual and school
|
||||
differences
|
||||
\item How to interpret parameters in a linear mixed-effects model
|
||||
\end{itemize}
|
||||
\item Next steps
|
||||
\begin{itemize}
|
||||
\item Do this exercise \url{xxx} using the JSP data set in R
|
||||
\item It has a very similar structure than the HSB data set and this
|
||||
will help you to generalize the concepts we learned today
|
||||
\item You can send questions to me and even make an appointment with me
|
||||
to go over your solution
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
\appendix
|
||||
%\begin{frame}[allowframebreaks]{References}
|
||||
\begin{frame}{References}
|
||||
\printbibliography
|
||||
\vfill
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
17
slides/lit.bib
Normal file
17
slides/lit.bib
Normal file
@ -0,0 +1,17 @@
|
||||
@book{BrykRaudenbush2002,
|
||||
title = {Hierarchical linear models: {A}pplications and data analysis methods},
|
||||
author = {Raudenbush, Stephen W and Bryk, Anthony S},
|
||||
volume = {1},
|
||||
year = {2002},
|
||||
publisher = {Sage}
|
||||
}
|
||||
|
||||
|
||||
@book{Hedeker2006,
|
||||
author = {Hedeker, D. R. and Gibbons, R. D.},
|
||||
title = {Longitudinal data analysis},
|
||||
year = {2006},
|
||||
address = {Hoboken},
|
||||
publisher = {John Wiley}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user