--- title: "Lecture 5" subtitle: "Random Effects Models" date: "9/19/2018" fontsize: 11pt output: beamer_presentation: theme: metropolis highlight: pygments fig_caption: false latex_engine: xelatex includes: in_header: ../settings.tex --- ```{r setup, include=FALSE} library(dplyr) library(ggplot2) ``` ```{r config, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, fig.width=7, fig.height=4.5, out.width="\\textwidth", fig.align="center", echo=TRUE, warning=FALSE ) ggplot2::theme_set(ggplot2::theme_bw()) ``` # Random Effects Models ## Sleep Study Data {.t} \small The average reaction time per day for subjects in a sleep deprivation study. On day 0 the subjects had their normal amount of sleep . Starting that night they were restricted to 3 hours of sleep per night. The observations represent the average reaction time on a series of tests given each day to each subject. ```{r message=FALSE} sleep = lme4::sleepstudy %>% tbl_df() sleep ``` ## EDA ```{r echo=FALSE} ggplot(sleep, aes(x=Days, y=Reaction, color=Subject)) + geom_point() ``` ## EDA (small multiples) ```{r echo=FALSE} ggplot(sleep, aes(x=Days, y=Reaction)) + geom_point() + facet_wrap(~Subject) ``` ## Bayesian Linear Model {.t} ```{r} sleep_lm = "model{ # Likelihood for(i in 1:length(y)){ y[i] ~ dnorm(mu[i], tau) mu[i] = beta[1] + beta[2]*x[i] y_pred[i] ~ dnorm(mu[i],tau) } # Prior for beta beta[1] ~ dnorm(0,1/10000) beta[2] ~ dnorm(0,1/10000) # Prior for sigma / tau sigma ~ dunif(0, 100) tau = 1/(sigma*sigma) }" ``` ```{r echo=FALSE, include=FALSE} nburn=1000; niter=5000 m = rjags::jags.model( textConnection(sleep_lm), data = list(x = sleep$Days, y = sleep$Reaction), quiet = TRUE, n.chains = 2 ) update(m, n.iter=nburn, progress.bar="none") sleep_lm_samp = rjags::coda.samples( m, variable.names=c("beta", "sigma", "mu", "y_pred"), n.iter=niter, progress.bar="none" ) ``` ## MCMC Diagnostics ```{r echo=FALSE} sleep_lm_samp %>% window(thin=10) %>% tidybayes::gather_samples(beta[i], sigma) %>% ungroup() %>% mutate(term = ifelse(is.na(i), term, paste0(term,"[",i,"]"))) %>% ggplot(aes(x=.iteration, y=estimate, color=as.factor(.chain))) + geom_line(alpha=0.5) + facet_grid(term~., scale="free_y") + labs(color="chain", x="iteration") ``` ## Model fit ```{r echo=FALSE, message=FALSE} sleep_lm_pred = sleep_lm_samp[1] %>% tidybayes::spread_samples(mu[i], y_pred[i]) %>% ungroup() %>% left_join( mutate(sleep, i = 1:n()) ) %>% mutate(resid = Reaction - mu) ``` ```{r echo=FALSE} ggplot(sleep, aes(x=Days, y=Reaction)) + tidybayes::stat_lineribbon(data=sleep_lm_pred, aes(y=y_pred), alpha=0.5) + geom_point(data=sleep) + facet_wrap(~Subject) ``` ## Residuals ```{r echo=FALSE} sleep_lm_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(resid) %>% ggplot(aes(x=Days, y=resid, color=Subject)) + geom_hline(yintercept=0, color="grey") + geom_point() + guides(color=FALSE) ``` ## Residuals by subject ```{r echo=FALSE} sleep_lm_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(resid) %>% ggplot(aes(x=Days, y=resid, color=Subject)) + geom_hline(yintercept=0, color="grey") + geom_point() + facet_wrap(~Subject) + guides(color=FALSE) ``` # Random Intercept Model ## Coding {.t} ```{r} sleep = sleep %>% mutate(Subject_index = as.integer(Subject)) sleep[c(1:2,11:12,21:22,31:32),] ``` ## Random Intercept Model {.t} Let $i$ represent each observation and $j(i)$ be subject in oberservation $i$ then $$ y_i = \alpha_{j(i)}+ \beta \times \text{Day}_i + \epsilon_i $$ $$ \begin{aligned} \alpha_j &\sim \mathcal{N}(\beta_\alpha,~\sigma^2_\alpha) \\ \epsilon_i &\sim \mathcal{N}(0,~\sigma^2) \\ \\ \beta_{\alpha} &\sim \mathcal{N}(0, 10^4)\\ \beta &\sim \mathcal{N}(0, 10^4)\\ \sigma, \sigma_\alpha &\sim \text{Unif}(0,10^2) \end{aligned} $$ ## Random Intercept Model - JAGS \footnoteoutput ```{r} sleep_ri = "model{ for(i in 1:length(Reaction)) { Reaction[i] ~ dnorm(mu[i],tau) mu[i] = alpha[Subject_index[i]] + beta*Days[i] y_pred[i] ~ dnorm(mu[i],tau) } for(j in 1:18) { alpha[j] ~ dnorm(beta_alpha, tau_alpha) } beta_alpha ~ dnorm(0,1/10000) sigma_alpha ~ dunif(0, 100) tau_alpha = 1/(sigma_alpha*sigma_alpha) beta ~ dnorm(0,1/10000) sigma ~ dunif(0, 100) tau = 1/(sigma*sigma) }" ``` ```{r echo=FALSE, include=FALSE, message=FALSE} nburn=1000; niter=5000 m = rjags::jags.model( textConnection(sleep_ri), data = sleep, quiet = TRUE, n.chains = 2 ) update(m, n.iter=nburn, progress.bar="none") sleep_ri_samp = rjags::coda.samples( m, n.iter=niter, progress.bar="none", variable.names=c( "y_pred", "mu", "beta", "sigma", "alpha", "beta_alpha"," sigma_alpha") ) ``` ## MCMC Diagnostics ```{r echo=FALSE} sleep_ri_samp %>% window(thin=10) %>% tidybayes::gather_samples(beta, sigma, beta_alpha, sigma_alpha) %>% ungroup() %>% ggplot(aes(x=.iteration, y=estimate, color=as.factor(.chain))) + geom_line(alpha=0.5) + facet_grid(term~., scale="free_y") + labs(color="chain", x="iteration") ``` ## Model fit ```{r echo=FALSE, message=FALSE} sleep_ri_pred = sleep_ri_samp[1] %>% tidybayes::spread_samples(y_pred[i], mu[i]) %>% ungroup() %>% left_join( mutate(sleep, i = 1:n()) ) %>% mutate(resid = Reaction - mu) ``` ```{r echo=FALSE} ggplot(sleep, aes(x=Days, y=Reaction)) + tidybayes::stat_lineribbon(data=sleep_ri_pred, aes(y=y_pred), alpha=0.5) + geom_point(data=sleep) + facet_wrap(~Subject) ``` ## Model fit - lines ```{r echo=FALSE} sleep_ri_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(mu) %>% ungroup() %>% ggplot(aes(x=Days, y=mu, color=Subject)) + geom_line() + guides(color=FALSE) + labs(y="Reaction") ``` ## Random Effects ```{r echo=FALSE} sleep_ri_samp[1] %>% tidybayes::gather_samples(alpha[j]) %>% ungroup() %>% ggplot(aes(x=j, group=j, y=estimate)) + geom_boxplot() ``` ## Residuals ```{r echo=FALSE} sleep_ri_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(resid) %>% ggplot(aes(x=Days, y=resid, color=Subject)) + geom_hline(yintercept=0, color="grey") + geom_point() + guides(color=FALSE) ``` ## Residuals by subject ```{r echo=FALSE} sleep_ri_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(resid) %>% ggplot(aes(x=Days, y=resid, color=Subject)) + geom_hline(yintercept=0, color="grey") + geom_point() + facet_wrap(~Subject) + guides(color=FALSE) ``` ## Why not a fixed effect for Subject? \small We are not going to bother with the Bayesian model here to avoid the headache of dummy coding and the resulting $\beta$s. . . . \tinyoutput ```{r} l = lm(Reaction ~ Days + Subject - 1, data=sleep) summary(l) ``` ## Comparing Model fit ```{r echo=FALSE} bind_rows( sleep_ri_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(mu) %>% ungroup() %>% select(Days, Subject, pred = mu) %>% mutate(model="Random Intercept"), sleep %>% modelr::add_predictions(l) %>% select(-Subject_index, Reaction) %>% mutate(model="Fixed Effect") ) %>% ggplot(aes(x=Days, y=pred, color=model)) + geom_point(data=sleep, aes(y=Reaction), color="black") + geom_line() + facet_wrap(~Subject) + labs(y="Reaction") ``` ## Random effects vs fixed effects ```{r echo=FALSE} fix_eff = data_frame( estimate = coefficients(l)[-1], term = paste0("alpha[",1:18,"]") %>% forcats::as_factor() ) sleep_ri_samp[1] %>% tidybayes::gather_samples(alpha[i]) %>% ungroup() %>% mutate(term = paste0(term,"[",i,"]") %>% forcats::as_factor()) %>% ggplot(aes(x=term, y=estimate)) + geom_boxplot() + geom_point(data=fix_eff, color="red") + geom_hline(yintercept = mean(sleep_ri_samp[[1]][,"beta_alpha"]), color="blue") ``` ## Random Intercept Model (strong prior for $\sigma_\alpha$) \footnoteoutput ```{r} sleep_ri2 = "model{ for(i in 1:length(Reaction)) { Reaction[i] ~ dnorm(mu[i],tau) mu[i] = alpha[Subject_index[i]] + beta*Days[i] y_pred[i] ~ dnorm(mu[i],tau) } for(j in 1:18) { alpha[j] ~ dnorm(beta_alpha, tau_alpha) } beta_alpha ~ dnorm(0,1/10000) sigma_alpha ~ dunif(0, 10) tau_alpha = 1/(sigma_alpha*sigma_alpha) beta ~ dnorm(0,1/10000) sigma ~ dunif(0, 100) tau = 1/(sigma*sigma) }" ``` ```{r echo=FALSE, include=FALSE} nburn=1000; niter=5000 m = rjags::jags.model( textConnection(sleep_ri2), data = sleep, quiet = TRUE, n.chains = 1 ) update(m, n.iter=nburn, progress.bar="none") sleep_ri2_samp = rjags::coda.samples( m, n.iter=niter, progress.bar="none", variable.names=c( "y_pred", "mu", "beta", "sigma", "alpha", "beta_alpha"," sigma_alpha") ) ``` ```{r echo=FALSE, message=FALSE} sleep_ri2_pred = sleep_ri2_samp[1] %>% tidybayes::spread_samples(y_pred[i], mu[i]) %>% ungroup() %>% left_join( mutate(sleep, i = 1:n()) ) %>% mutate(resid = Reaction - mu) ``` ## Comparing Model fit ```{r echo=FALSE} bind_rows( sleep_ri_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(mu) %>% ungroup() %>% select(Days, Subject, pred = mu) %>% mutate(model="Random Intercept (weak)"), sleep %>% modelr::add_predictions(l) %>% select(-Subject_index, Reaction) %>% mutate(model="Fixed Effect"), sleep_ri2_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(mu) %>% ungroup() %>% select(Days, Subject, pred = mu) %>% mutate(model="Random Intercept (strong") ) %>% ggplot(aes(x=Days, y=pred, color=model)) + geom_point(data=sleep, aes(y=Reaction), color="black") + geom_line() + facet_wrap(~Subject) + labs(y="Reaction") ``` ## Prior Effect on $\alpha$ ```{r echo=FALSE} bind_rows( data_frame( estimate = coefficients(l)[-1], term = paste0("alpha[",1:18,"]") %>% forcats::as_factor(), model = "Fixed Effect" ), sleep_ri_samp[1] %>% tidybayes::gather_samples(alpha[i]) %>% ungroup() %>% mutate(term = paste0(term,"[",i,"]") %>% forcats::as_factor()) %>% select(term, estimate) %>% mutate(model = "Random Effect (weak)"), sleep_ri2_samp[1] %>% tidybayes::gather_samples(alpha[i]) %>% ungroup() %>% mutate(term = paste0(term,"[",i,"]") %>% forcats::as_factor()) %>% select(term, estimate) %>% mutate(model = "Random Effect (strong)") ) %>% mutate(model = forcats::as_factor(model)) %>% ggplot(aes(x=term, y=estimate, color=model)) + geom_hline(yintercept = mean(sleep_ri_samp[[1]][,"beta_alpha"]), color="#00BA38") + geom_hline(yintercept = mean(sleep_ri2_samp[[1]][,"beta_alpha"]), color="#619CFF") + geom_boxplot() ``` ## Some Distribution Theory # Random intercept and slope model ## Model Let $i$ represent each observation and $j(i)$ be the subject in oberservation $i$ then $$ Y_i = \alpha_{j(i)} + \beta_{j(i)} \times \text{Days} + \epsilon_i $$ $$ \begin{aligned} \alpha_j &\sim \mathcal{N}(\beta_0,~\sigma^2_\alpha) \\ \beta_j &\sim \mathcal{N}(\beta_1,~\sigma^2_\beta) \\ \epsilon_i &\sim \mathcal{N}(0,~\sigma^2) \\ \\ \beta_\alpha, \beta_\beta &\sim \mathcal{N}(0, 10000)\\ \sigma, \sigma_\alpha, \sigma_\beta &\sim \text{Unif}(0,100) \end{aligned} $$ ## Model - JAGS \footnoteoutput ```{r} sleep_ris = "model{ for(i in 1:length(Reaction)) { Reaction[i] ~ dnorm(mu[i],tau) mu[i] = alpha[Subject_index[i]] + beta[Subject_index[i]] * Days[i] y_pred[i] ~ dnorm(mu[i], tau) } sigma ~ dunif(0, 100) tau = 1/(sigma*sigma) for(j in 1:18) { alpha[j] ~ dnorm(beta_alpha, tau_alpha) beta[j] ~ dnorm(beta_beta, tau_beta) } beta_alpha ~ dnorm(0,1/10000) beta_beta ~ dnorm(0,1/10000) sigma_alpha ~ dunif(0, 100) tau_alpha = 1/(sigma_alpha*sigma_alpha) sigma_beta ~ dunif(0, 100) tau_beta = 1/(sigma_beta*sigma_beta) }" ``` ```{r echo=FALSE, include=FALSE} nburn=1000; niter=5000 m = rjags::jags.model( textConnection(sleep_ris), data = sleep, quiet = TRUE, n.chains = 2 ) update(m, n.iter=nburn, progress.bar="none") sleep_ris_samp = rjags::coda.samples( m, variable.names=c("alpha", "beta", "beta_alpha", "beta_beta", "sigma", "sigma_alpha", "sigma_beta", "y_pred", "mu"), n.iter=niter, progress.bar="none" ) ``` ## MCMC Diagnostics ```{r echo=FALSE} sleep_ris_samp %>% window(thin=10) %>% tidybayes::gather_samples(sigma, beta_alpha, sigma_alpha, beta_beta, sigma_beta) %>% ungroup() %>% ggplot(aes(x=.iteration, y=estimate, color=as.factor(.chain))) + geom_line(alpha=0.5) + facet_grid(term~., scale="free_y") + labs(color="chain", x="iteration") ``` ## MCMC Diagnostics - $\alpha$ ```{r echo=FALSE} sleep_ris_samp %>% window(thin=10) %>% tidybayes::gather_samples(alpha[i]) %>% ungroup() %>% mutate(term = paste0(term,"[",i,"]")) %>% ggplot(aes(x=.iteration, y=estimate, color=forcats::as_factor(as.character(.chain)))) + geom_line(alpha=0.5) + facet_wrap(~term) + labs(color="chain", x="iteration") ``` ## MCMC Diagnostics - $\beta$ ```{r echo=FALSE} sleep_ris_samp %>% window(thin=10) %>% tidybayes::gather_samples(beta[i]) %>% ungroup() %>% mutate(term = paste0(term,"[",i,"]")) %>% ggplot(aes(x=.iteration, y=estimate, color=forcats::as_factor(as.character(.chain)))) + geom_line(alpha=0.5) + facet_wrap(~term) + labs(color="chain", x="iteration") ``` ## Model fit ```{r echo=FALSE, message=FALSE} sleep_ris_pred = sleep_ri_samp[1] %>% tidybayes::spread_samples(y_pred[i], mu[i]) %>% ungroup() %>% left_join( mutate(sleep, i = 1:n()) ) %>% mutate(resid = Reaction - mu) ``` ```{r echo=FALSE} ggplot(sleep, aes(x=Days, y=Reaction)) + tidybayes::stat_lineribbon(data=sleep_ris_pred, aes(y=y_pred), alpha=0.5) + geom_point(data=sleep) + facet_wrap(~Subject) ``` ## Model fit - lines ```{r echo=FALSE} sleep_ris_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(mu) %>% ungroup() %>% ggplot(aes(x=Days, y=mu, color=Subject)) + geom_line() + guides(color=FALSE) + labs(y="Reaction") ``` ## Random Effects ```{r echo=FALSE} sleep_ris_samp[1] %>% tidybayes::gather_samples(alpha[i],beta[i]) %>% ungroup() %>% mutate(full_term = paste0(term,"[",i,"]") %>% forcats::as_factor()) %>% ggplot(aes(group=i, x=i, y=estimate)) + geom_boxplot() + facet_grid(term~., scale="free_y") ``` ## Residuals by subject ```{r echo=FALSE} sleep_ris_pred %>% group_by(Days, Subject) %>% tidybayes::mean_hdi(resid) %>% ggplot(aes(x=Days, y=resid, color=Subject)) + geom_hline(yintercept=0, color="grey") + geom_point() + facet_wrap(~Subject) + guides(color=FALSE) ```