Exam 02 review

heart_data <- read_csv("https://raw.githubusercontent.com/sta210-sp20/datasets/master/framingham.csv") %>%
  drop_na() %>% #remove observations with missing values
  mutate(education = case_when(
    education == 1 ~ "Some HS", 
    education == 2 ~ "HS or GED", 
    education == 3 ~ "Some College", 
    education == 4 ~ "College"
  ),
  TenYearCHD = as.factor(TenYearCHD), 
  currentSmoker = as.factor(currentSmoker),
  ageCent = age - mean(age), 
  totCholCent = totChol - mean(totChol), 
  BPMeds = as.factor(BPMeds), 
  prevalentStroke = as.factor(prevalentStroke), 
  prevalentHyp = as.factor(prevalentHyp), 
  male = as.factor(male), 
  diabetes = as.factor(diabetes)
  )

Interpreting interaction terms

risk_m <- glm(TenYearCHD ~ ageCent + currentSmoker + totCholCent + 
                education + ageCent*currentSmoker + currentSmoker*education, 
              data = heart_data, family = binomial)
tidy(risk_m, exponentiate = FALSE) %>%
  select(term, estimate) %>%
  kable(format = "markdown", digits = 3)

term	estimate
(Intercept)	-1.881
ageCent	0.088
currentSmoker1	0.020
totCholCent	0.003
educationHS or GED	-0.374
educationSome College	-0.595
educationSome HS	-0.186
ageCent:currentSmoker1	-0.017
currentSmoker1:educationHS or GED	0.424
currentSmoker1:educationSome College	0.746
currentSmoker1:educationSome HS	0.572

Odds to predicted probabilities

x0 <- tibble(ageCent = (60 - 49.552), 
                 totCholCent = (263 - 236.848), 
currentSmoker = as.factor(0), 
education = "College")

x0

## # A tibble: 1 x 4
##   ageCent totCholCent currentSmoker education
##     <dbl>       <dbl> <fct>         <chr>    
## 1    10.4        26.2 0             College

Predicted log-odds

(pred_log_odds <- predict(risk_m, x0))

##         1 
## -0.891849

predicted odds

(pred_odds <- exp(pred_log_odds))

##         1 
## 0.4098972

predicted probability

(pred_prob <- pred_odds / (1 + pred_odds))

##         1 
## 0.2907284

Using confidence interval to determine if significant

Model in terms of log-odds

tidy(risk_m, conf.int = TRUE, exponentiate = FALSE) %>%
  kable(format = "markdown", digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	-1.881	0.203	-9.249	0.000	-2.297	-1.497
ageCent	0.088	0.009	9.579	0.000	0.070	0.106
currentSmoker1	0.020	0.291	0.070	0.944	-0.554	0.591
totCholCent	0.003	0.001	2.504	0.012	0.001	0.005
educationHS or GED	-0.374	0.247	-1.516	0.130	-0.853	0.118
educationSome College	-0.595	0.274	-2.172	0.030	-1.133	-0.056
educationSome HS	-0.186	0.224	-0.831	0.406	-0.614	0.264
ageCent:currentSmoker1	-0.017	0.012	-1.379	0.168	-0.041	0.007
currentSmoker1:educationHS or GED	0.424	0.347	1.222	0.222	-0.255	1.109
currentSmoker1:educationSome College	0.746	0.385	1.937	0.053	-0.007	1.504
currentSmoker1:educationSome HS	0.572	0.320	1.785	0.074	-0.055	1.204

Model in terms of odds (exponentiated coefficients)

tidy(risk_m, conf.int = TRUE, exponentiate = TRUE) %>%
  kable(format = "markdown", digits = 3)

term	estimate	std.error	statistic	p.value	conf.low	conf.high
(Intercept)	0.152	0.203	-9.249	0.000	0.101	0.224
ageCent	1.092	0.009	9.579	0.000	1.073	1.112
currentSmoker1	1.021	0.291	0.070	0.944	0.575	1.805
totCholCent	1.003	0.001	2.504	0.012	1.001	1.005
educationHS or GED	0.688	0.247	-1.516	0.130	0.426	1.125
educationSome College	0.552	0.274	-2.172	0.030	0.322	0.946
educationSome HS	0.831	0.224	-0.831	0.406	0.541	1.303
ageCent:currentSmoker1	0.983	0.012	-1.379	0.168	0.960	1.007
currentSmoker1:educationHS or GED	1.529	0.347	1.222	0.222	0.775	3.032
currentSmoker1:educationSome College	2.108	0.385	1.937	0.053	0.993	4.500
currentSmoker1:educationSome HS	1.772	0.320	1.785	0.074	0.946	3.332

Exam 02 review

2020-04-15

Interpreting interaction terms

Odds to predicted probabilities

Predicted log-odds

predicted odds

predicted probability

Using confidence interval to determine if significant

Model in terms of log-odds

Model in terms of odds (exponentiated coefficients)

Practice exam #3 and #4