heart_data <- read_csv("https://raw.githubusercontent.com/sta210-sp20/datasets/master/framingham.csv") %>%
drop_na() %>% #remove observations with missing values
mutate(education = case_when(
education == 1 ~ "Some HS",
education == 2 ~ "HS or GED",
education == 3 ~ "Some College",
education == 4 ~ "College"
),
TenYearCHD = as.factor(TenYearCHD),
currentSmoker = as.factor(currentSmoker),
ageCent = age - mean(age),
totCholCent = totChol - mean(totChol),
BPMeds = as.factor(BPMeds),
prevalentStroke = as.factor(prevalentStroke),
prevalentHyp = as.factor(prevalentHyp),
male = as.factor(male),
diabetes = as.factor(diabetes)
)
Interpreting interaction terms
risk_m <- glm(TenYearCHD ~ ageCent + currentSmoker + totCholCent +
education + ageCent*currentSmoker + currentSmoker*education,
data = heart_data, family = binomial)
tidy(risk_m, exponentiate = FALSE) %>%
select(term, estimate) %>%
kable(format = "markdown", digits = 3)
(Intercept) |
-1.881 |
ageCent |
0.088 |
currentSmoker1 |
0.020 |
totCholCent |
0.003 |
educationHS or GED |
-0.374 |
educationSome College |
-0.595 |
educationSome HS |
-0.186 |
ageCent:currentSmoker1 |
-0.017 |
currentSmoker1:educationHS or GED |
0.424 |
currentSmoker1:educationSome College |
0.746 |
currentSmoker1:educationSome HS |
0.572 |
Odds to predicted probabilities
x0 <- tibble(ageCent = (60 - 49.552),
totCholCent = (263 - 236.848),
currentSmoker = as.factor(0),
education = "College")
x0
## # A tibble: 1 x 4
## ageCent totCholCent currentSmoker education
## <dbl> <dbl> <fct> <chr>
## 1 10.4 26.2 0 College
Predicted log-odds
(pred_log_odds <- predict(risk_m, x0))
## 1
## -0.891849
predicted odds
(pred_odds <- exp(pred_log_odds))
## 1
## 0.4098972
predicted probability
(pred_prob <- pred_odds / (1 + pred_odds))
## 1
## 0.2907284
Using confidence interval to determine if significant
Model in terms of log-odds
tidy(risk_m, conf.int = TRUE, exponentiate = FALSE) %>%
kable(format = "markdown", digits = 3)
(Intercept) |
-1.881 |
0.203 |
-9.249 |
0.000 |
-2.297 |
-1.497 |
ageCent |
0.088 |
0.009 |
9.579 |
0.000 |
0.070 |
0.106 |
currentSmoker1 |
0.020 |
0.291 |
0.070 |
0.944 |
-0.554 |
0.591 |
totCholCent |
0.003 |
0.001 |
2.504 |
0.012 |
0.001 |
0.005 |
educationHS or GED |
-0.374 |
0.247 |
-1.516 |
0.130 |
-0.853 |
0.118 |
educationSome College |
-0.595 |
0.274 |
-2.172 |
0.030 |
-1.133 |
-0.056 |
educationSome HS |
-0.186 |
0.224 |
-0.831 |
0.406 |
-0.614 |
0.264 |
ageCent:currentSmoker1 |
-0.017 |
0.012 |
-1.379 |
0.168 |
-0.041 |
0.007 |
currentSmoker1:educationHS or GED |
0.424 |
0.347 |
1.222 |
0.222 |
-0.255 |
1.109 |
currentSmoker1:educationSome College |
0.746 |
0.385 |
1.937 |
0.053 |
-0.007 |
1.504 |
currentSmoker1:educationSome HS |
0.572 |
0.320 |
1.785 |
0.074 |
-0.055 |
1.204 |
Model in terms of odds (exponentiated coefficients)
tidy(risk_m, conf.int = TRUE, exponentiate = TRUE) %>%
kable(format = "markdown", digits = 3)
(Intercept) |
0.152 |
0.203 |
-9.249 |
0.000 |
0.101 |
0.224 |
ageCent |
1.092 |
0.009 |
9.579 |
0.000 |
1.073 |
1.112 |
currentSmoker1 |
1.021 |
0.291 |
0.070 |
0.944 |
0.575 |
1.805 |
totCholCent |
1.003 |
0.001 |
2.504 |
0.012 |
1.001 |
1.005 |
educationHS or GED |
0.688 |
0.247 |
-1.516 |
0.130 |
0.426 |
1.125 |
educationSome College |
0.552 |
0.274 |
-2.172 |
0.030 |
0.322 |
0.946 |
educationSome HS |
0.831 |
0.224 |
-0.831 |
0.406 |
0.541 |
1.303 |
ageCent:currentSmoker1 |
0.983 |
0.012 |
-1.379 |
0.168 |
0.960 |
1.007 |
currentSmoker1:educationHS or GED |
1.529 |
0.347 |
1.222 |
0.222 |
0.775 |
3.032 |
currentSmoker1:educationSome College |
2.108 |
0.385 |
1.937 |
0.053 |
0.993 |
4.500 |
currentSmoker1:educationSome HS |
1.772 |
0.320 |
1.785 |
0.074 |
0.946 |
3.332 |
Practice exam #3 and #4