heart_data <- read_csv("https://raw.githubusercontent.com/sta210-sp20/datasets/master/framingham.csv") %>%
  drop_na() %>% #remove observations with missing values
  mutate(education = case_when(
    education == 1 ~ "Some HS", 
    education == 2 ~ "HS or GED", 
    education == 3 ~ "Some College", 
    education == 4 ~ "College"
  ),
  TenYearCHD = as.factor(TenYearCHD), 
  currentSmoker = as.factor(currentSmoker),
  ageCent = age - mean(age), 
  totCholCent = totChol - mean(totChol), 
  BPMeds = as.factor(BPMeds), 
  prevalentStroke = as.factor(prevalentStroke), 
  prevalentHyp = as.factor(prevalentHyp), 
  male = as.factor(male), 
  diabetes = as.factor(diabetes)
  )

Interpreting interaction terms

risk_m <- glm(TenYearCHD ~ ageCent + currentSmoker + totCholCent + 
                education + ageCent*currentSmoker + currentSmoker*education, 
              data = heart_data, family = binomial)
tidy(risk_m, exponentiate = FALSE) %>%
  select(term, estimate) %>%
  kable(format = "markdown", digits = 3)
term estimate
(Intercept) -1.881
ageCent 0.088
currentSmoker1 0.020
totCholCent 0.003
educationHS or GED -0.374
educationSome College -0.595
educationSome HS -0.186
ageCent:currentSmoker1 -0.017
currentSmoker1:educationHS or GED 0.424
currentSmoker1:educationSome College 0.746
currentSmoker1:educationSome HS 0.572

Odds to predicted probabilities

x0 <- tibble(ageCent = (60 - 49.552), 
                 totCholCent = (263 - 236.848), 
currentSmoker = as.factor(0), 
education = "College")
x0
## # A tibble: 1 x 4
##   ageCent totCholCent currentSmoker education
##     <dbl>       <dbl> <fct>         <chr>    
## 1    10.4        26.2 0             College

Predicted log-odds

(pred_log_odds <- predict(risk_m, x0)) 
##         1 
## -0.891849

predicted odds

(pred_odds <- exp(pred_log_odds))
##         1 
## 0.4098972

predicted probability

(pred_prob <- pred_odds / (1 + pred_odds))
##         1 
## 0.2907284

Using confidence interval to determine if significant

Model in terms of log-odds

tidy(risk_m, conf.int = TRUE, exponentiate = FALSE) %>%
  kable(format = "markdown", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) -1.881 0.203 -9.249 0.000 -2.297 -1.497
ageCent 0.088 0.009 9.579 0.000 0.070 0.106
currentSmoker1 0.020 0.291 0.070 0.944 -0.554 0.591
totCholCent 0.003 0.001 2.504 0.012 0.001 0.005
educationHS or GED -0.374 0.247 -1.516 0.130 -0.853 0.118
educationSome College -0.595 0.274 -2.172 0.030 -1.133 -0.056
educationSome HS -0.186 0.224 -0.831 0.406 -0.614 0.264
ageCent:currentSmoker1 -0.017 0.012 -1.379 0.168 -0.041 0.007
currentSmoker1:educationHS or GED 0.424 0.347 1.222 0.222 -0.255 1.109
currentSmoker1:educationSome College 0.746 0.385 1.937 0.053 -0.007 1.504
currentSmoker1:educationSome HS 0.572 0.320 1.785 0.074 -0.055 1.204

Model in terms of odds (exponentiated coefficients)

tidy(risk_m, conf.int = TRUE, exponentiate = TRUE) %>%
  kable(format = "markdown", digits = 3)
term estimate std.error statistic p.value conf.low conf.high
(Intercept) 0.152 0.203 -9.249 0.000 0.101 0.224
ageCent 1.092 0.009 9.579 0.000 1.073 1.112
currentSmoker1 1.021 0.291 0.070 0.944 0.575 1.805
totCholCent 1.003 0.001 2.504 0.012 1.001 1.005
educationHS or GED 0.688 0.247 -1.516 0.130 0.426 1.125
educationSome College 0.552 0.274 -2.172 0.030 0.322 0.946
educationSome HS 0.831 0.224 -0.831 0.406 0.541 1.303
ageCent:currentSmoker1 0.983 0.012 -1.379 0.168 0.960 1.007
currentSmoker1:educationHS or GED 1.529 0.347 1.222 0.222 0.775 3.032
currentSmoker1:educationSome College 2.108 0.385 1.937 0.053 0.993 4.500
currentSmoker1:educationSome HS 1.772 0.320 1.785 0.074 0.946 3.332

Practice exam #3 and #4