bike <- read.csv("bikecrash_agg.csv")HW 11 Solutions
STA 211 Spring 2023 (Jiang)
The log-likelihood function for logistic regression is
\[\begin{align*} \mathcal{L}(p) &= \prod_{i = 1}^n f(y_i)\\ &= \prod_{i = 1}^n p^{y_i}(1 - p)^{1 - y_i}\\ \log\mathcal{L}(p) &= \sum_{i = 1}^n y_i\log(p) + \log(1 - p) - y_i\log(1 - p)\\ \log\mathcal{L}(p) &= \sum_{i = 1}^n y_i \log\left(\frac{p}{1 - p} \right) + \log(1 - p) \end{align*}\]
In logistic regression, we have \(\log\left(\frac{p}{1 - p} \right) = \mathbf{X}\boldsymbol\beta\), so the log-likelihood function is
\[\begin{align*} \log\mathcal{L}(p) &= \sum_{i = 1}^n y_i\mathbf{X}_i\boldsymbol\beta - \log(1 + \exp(\mathbf{X}_i\boldsymbol\beta)). \end{align*}\]
Differentiating with respect to \(\boldsymbol\beta\):
\[\begin{align*} \nabla_{\boldsymbol\beta} \log\mathcal{L}(p) &= \sum_{i = 1}^n y_i\mathbf{X}_i - \frac{\exp(\mathbf{X}_i^T\boldsymbol\beta)}{1 + \exp(\mathbf{X}_i^T\boldsymbol\beta)}\mathbf{X}_i\\ \nabla^2_{\boldsymbol\beta} \log\mathcal{L}(p) &= -\sum_{i = 1}^n \left(\frac{1}{1 + \exp(\mathbf{X}_i^T\boldsymbol\beta)}\right)\left(\frac{\exp(\mathbf{X}_i^T\boldsymbol\beta)}{1 + \exp(\mathbf{X}_i^T\boldsymbol\beta)}\right)\mathbf{X}_i\mathbf{X}_i^T. \end{align*}\]
d1func <- function(beta, X, y){
d1 <- rep(0, length(beta))
for(i in 1:length(y)){
d1 <- d1 + (y[i] - exp(X[i,] %*% beta)) %*% X[i,]
}
return(colSums(d1))
}
d2func <- function(beta, X, y){
d2 <- matrix(0, nrow = length(beta), ncol = length(beta))
for(i in 1:length(y)){
d2 <- d2 - t((exp(X[i,] %*% beta)) %*% X[i,]) %*% (X[i,])
}
return(d2)
}
beta <- c(mean(log(bike$crashes)), 0, 0)
X <- cbind(1, bike$traffic_vol, bike$pop/100000)
y <- bike$crashes
iter <- 1
delta <- 1
temp <- matrix(0, nrow = 500, ncol = 3)
while(delta > 0.000001 & iter < 500){
old <- beta
beta <- old - solve(d2func(beta = beta, X = X, y = y)) %*%
d1func(beta = beta, X = X, y = y)
temp[iter,] <- beta
delta <- sqrt(sum((beta - old)^2))
iter <- iter + 1
}
round(beta, 6) [,1]
[1,] 3.211257
[2,] 0.005554
[3,] 0.179782
Compare to built-in function from R:
m1 <- glm(crashes ~ traffic_vol + I(pop/100000),
data = bike, family = "poisson")
round(m1$coef, 6) (Intercept) traffic_vol I(pop/1e+05)
3.211257 0.005554 0.179782
table(bike$crashes/bike$pop*100000 > 50)
FALSE TRUE
55 45
d1func <- function(beta, X, y){
d1 <- rep(0, length(beta))
for(i in 1:length(y)){
d1 <- d1 + as.numeric(y[i] - exp(X[i,] %*% beta)/(1 + exp(X[i,] %*% beta))) %*% X[i,]
}
return(colSums(d1))
}
d2func <- function(beta, X, y){
d2 <- matrix(0, nrow = length(beta), ncol = length(beta))
for(i in 1:length(y)){
d2 <- d2 + as.numeric(1/(1 + exp(X[i,] %*% beta)))*
as.numeric((exp(X[i,] %*% beta)/(1 + exp(X[i,] %*% beta)))) *
(X[i,]) %*% t(X[i,])
}
return(-1 * d2)
}
beta <- c(0, 0, 0)
X <- cbind(1, bike$traffic_vol, bike$pop/100000)
y <- I(bike$crashes/bike$pop*100000 > 50)
iter <- 1
delta <- 1
temp <- matrix(0, nrow = 500, ncol = 3)
while(delta > 0.000001 & iter < 50){
old <- beta
beta <- old - solve(d2func(beta = beta, X = X, y = y)) %*%
d1func(beta = beta, X = X, y = y)
temp[iter,] <- beta
delta <- sqrt(sum((beta - old)^2))
iter <- iter + 1
}
round(beta, 6) [,1]
[1,] -1.309845
[2,] 0.014234
[3,] 0.036717
Compare to built-in function from R:
m2 <- glm(I(crashes/pop*100000 > 50) ~
traffic_vol + I(pop/100000),
data = bike, family = "binomial")
round(m2$coef, 6) (Intercept) traffic_vol I(pop/1e+05)
-1.309845 0.014234 0.036717