library(tidyverse)
library(broom)
library(knitr)
library(patchwork)
Lecture 11: Multilevel models
<- read_csv("data/musicdata.csv") |>
music mutate(orchestra = factor(if_else(instrument == "orchestral instrument",
1, 0)),
large_ensemble = factor(if_else(perform_type == "Large Ensemble",
1, 0))
)
Part 1: Univariate EDA
Below are plots of the distribution of the response variable na
for (1) individual performances and (2) aggregated by musician (id
)
<- ggplot(data = music, aes(x = na)) +
p1 geom_histogram(color = "white", binwidth = 2) +
labs(x = "NA",
title = "Individual performances")
<- music |>
p2 group_by(id) |>
summarise(mean_na = mean(na)) |>
ggplot(aes(x = mean_na)) +
geom_histogram(color = "white", binwidth = 2) +
labs(x = "Average NA",
title = "Aggregated by musician")
+ p2 + plot_annotation(title = "Negative affect scores") p1
How are the plots similar? How do they differ?
What are some advantages of each plot? What are some disadvantages?
Part 2: Bivariate EDA
Make a single scatterplot of the negative affect versus number of previous performances (previous
) using the individual observations. Use geom_smooth()
to add a linear regression line to the plot.
# add code
Make a separate scatterplot of negative affect versus number of previous performances (previous
) for each musician (id
). Use geom_smooth()
to add a linear regression line to each plot.
# add code
How are the plots similar? How do they differ?
What are some advantages of each plot? What are some disadvantages?
Part 3: Level One Models
Fit and display the Level One model for musician 22.
|>
music filter(id == 22) |>
lm(na ~ large_ensemble, data = _) |>
tidy() |>
kable(digits = 3)
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 24.500 | 1.96 | 12.503 | 0.000 |
large_ensemble1 | -7.833 | 2.53 | -3.097 | 0.009 |
Fit the Level One models to obtain the fitted slope, intercept, and \(R^2\) value for each musician.
# set up tibble for fitted values
<- tibble(slopes = rep(NA,37),
model_stats intercepts = rep(NA,37),
r.squared = rep(NA, 37))
<- music |> distinct(id) |> pull()
ids
# fit the model and score the releveant statistcs
for(i in 1:length(ids)){
<- music |>
level_one_data filter(id == ids[i])
# check if more than one performance type
<- level_one_data |>
num_perform_types distinct(large_ensemble) |>
nrow()
if(num_perform_types > 1){
#if more than one performance type, calculate all model statistics
<- lm(na ~ large_ensemble,
level_one_model data = level_one_data)
<- tidy(level_one_model)
level_one_model_tidy
$slopes[i] <- level_one_model_tidy$estimate[2]
model_stats$intercepts[i] <- level_one_model_tidy$estimate[1]
model_stats$r.squared[i] <- glance(level_one_model)$r.squared
model_statselse{
}# if only one performance type, calculate intercept and R^2
$intercepts[i] <- mean(level_one_data$na)
model_stats$r.squared[i] <- 0
model_stats
} }
Plot the estimated intercepts, slopes and, \(R^2\) values.
<- ggplot(model_stats, aes(x = intercepts)) +
level_one_int geom_histogram(color = "white", binwidth = 2) +
labs(x = "",
title = "Fitted intercepts",
subtitle = "for 37 musicians")
<- ggplot(model_stats, aes(x = slopes)) +
level_one_slope geom_histogram(color = "white", binwidth = 2) +
labs(x = "",
title = "Fitted slopes",
subtitle = "for 37 musicians")
+ level_one_slope level_one_int
ggplot(model_stats, aes(x = r.squared)) +
geom_histogram(color = "white", binwidth = 0.05) +
labs(x = "",
title = "Fitted R-squared values",
subtitle = "for 37 musicians")
Part 4: Level Two Models
# Make a Level Two data set
<- music |>
musicians distinct(id, orchestra) |>
bind_cols(model_stats)
Model for intercepts
<- lm(intercepts ~ orchestra, data = musicians)
a tidy(a) |>
kable(digits = 3)
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | 16.283 | 0.671 | 24.249 | 0.000 |
orchestra1 | 1.411 | 0.991 | 1.424 | 0.163 |
Model for slopes
<- lm(slopes ~ orchestra, data = musicians)
b tidy(b) |>
kable(digits = 3)
term | estimate | std.error | statistic | p.value |
---|---|---|---|---|
(Intercept) | -0.771 | 0.851 | -0.906 | 0.373 |
orchestra1 | -1.406 | 1.203 | -1.168 | 0.253 |