bike <- read.csv("bikecrash_agg.csv")HW 12 Solutions
STA 211 Spring 2023 (Jiang)
The log-likelihood, score vector, and Hessian matrix are:
\[\begin{align*} \log \mathcal{L}&\propto \sum_{i = 1}^n y_i\mathbf{X}_i\boldsymbol\beta - \omega_i e^{\mathbf{X}_i\boldsymbol\beta}\\ \nabla \log \mathcal{L}&= \sum_{i = 1}^n \left(y_i - \omega_ie^{\mathbf{X}_i\boldsymbol\beta}\right)\mathbf{X}_i^T\\ \nabla^2 \log \mathcal{L} &= -\sum_{i = 1}^n \omega_i e^{\mathbf{X}_i\boldsymbol\beta}\mathbf{X}_i\mathbf{X}_i^T \end{align*}\]
Newton-Raphson update steps for Poisson regression with offset:
\[\begin{align*} \boldsymbol\beta^{(t+1)} &= \boldsymbol\beta^{(t)} - \left(-\sum_{i = 1}^n \omega_ie^{\mathbf{X}_i\boldsymbol\beta}\mathbf{X}_i\mathbf{X}_i^T \right)^{-1}\left(\sum_{i = 1}^n \left(y_i - \omega_i e^{\mathbf{X}_i\boldsymbol\beta}\right)\mathbf{X}_i^T \right) \end{align*}\]
d1ofs <- function(beta, X, y, offset){
d1 <- rep(0, length(beta))
for(i in 1:length(y)){
d1 <- d1 + (y[i] - offset[i] *exp(X[i,] %*% beta)) %*% X[i,]
}
return(colSums(d1))
}
d2ofs <- function(beta, X, y, offset){
d2 <- matrix(0, nrow = length(beta), ncol = length(beta))
for(i in 1:length(y)){
d2 <- d2 - offset[i] * t((exp(X[i,] %*% beta)) %*% X[i,]) %*% (X[i,])
}
return(d2)
}
beta <- c(mean(log(bike$crashes)), 0, 0)
X <- cbind(1, bike$traffic_vol, bike$pct_rural)
y <- bike$crashes
offset <- bike$pop
iter <- 1
delta <- 1
temp <- matrix(0, nrow = 500, ncol = 3)
while(delta > 0.000001 & iter < 500){
old <- beta
beta <- old - solve(d2ofs(beta = beta, X = X, y = y, offset = offset)) %*%
d1ofs(beta = beta, X = X, y = y, offset = offset)
temp[iter,] <- beta
delta <- sqrt(sum((beta - old)^2))
iter <- iter + 1
}
round(beta, 6) [,1]
[1,] -6.916803
[2,] -0.000047
[3,] -0.010936
Compare to built-in function from R:
m1 <- glm(crashes ~ traffic_vol + pct_rural, offset = log(pop),
data = bike, family = "poisson")
round(m1$coef, 6)(Intercept) traffic_vol pct_rural
-6.916803 -0.000047 -0.010936