## Analysis of traffic data as discussed in class with DP mixture ## Dataset and formatting require(MASS) data(Traffic) ts <- split(Traffic, Traffic$limit) y.no <- ts[["no"]]$y n.no <- length(y.no) y.yes <- ts[["yes"]]$y n.yes <- length(y.yes) ## The following function draws a random number from ## the multinomial distribution. It takes as input ## a (possibly un-normalized) probability vector ## prob and samples an integer k between 1 and ## K = length(prob) with probability prob[k]. rmult <- function(prob) return(sample(length(prob), size = 1, prob = prob)) gibbs.pgdpmix <- function(y, zeta = NULL, a = 1, b1 = 1, b2 = 2, n.sweep = 1e3){ n <- length(y) if(is.null(zeta)) zeta <- rgamma(n, b1, b2) ## Storage for zeta store.zeta <- matrix(NA, n.sweep, n) ## Begin looping for(sweep in 1:n.sweep){ for(i in 1:n){ q <- dpois(y[i], zeta) q[i] <- a * dnbinom(y[i], size = b1, prob = b2/(1 + b2)) ## this is really q0, but I am placing it at the empty cell q[i] l <- rmult(q) if(l == i) zeta[i] <- rgamma(1, b1 + y[i], b2 + 1) else zeta[i] <- zeta[l] } ## store store.zeta[sweep,] <- zeta } return(list(zeta = store.zeta, last.zeta = zeta)) } ## The following computes the mixture sampling density for a future observable dmixpois <- Vectorize(function(x, zeta, a, b1, b2) return((sum(dpois(x, zeta)) + a * dnbinom(x, size = b1, prob = b2/(1 + b2))) / (a + length(zeta))), "x") ## The following generates a random future observable from the poserior predictive rmixpois <- function(zeta, a, b1, b2){ n <- length(zeta) ix <- sample(n + 1, 1, replace = TRUE, prob = c(rep(1,n), a)) if(ix == n + 1) x <- rnbinom(1, size = b1, prob = b2 / (b2 + 1)) else x <- rpois(1, zeta[ix]) return(x) } ## Posterior inference ## Get MCMC sample of (omega, lambda) from the posterior distributions. out.dp.no <- gibbs.pgdpmix(y.no, 2, a = 1, b1 = 2, b2 = 1/10, n.sweep = 2e3) out.dp.yes <- gibbs.pgdpmix(y.yes, 2, a = 1, b1 = 2, b2 = 1/10, n.sweep = 2e3) ## Shall discard the first 1000 draws and then save every tenth last <- seq(1e3 + 10, 2e3, 10) ## Some visual depiction of posterior draws. brks <- 0:50 + 0.5 g <- seq(0, 50, 1) f.dp.no <- apply(out.dp.no$zeta[last,], 1, dmixpois, x = g, a = 1, b1 = 2, b2 = 1 / 10) f.dp.yes <- apply(out.dp.yes$zeta[last,], 1, dmixpois, x = g, a = 1, b1 = 2, b2 = 1 / 10) plot(g, 0 * g, ylim = range(range(f.no), range(f.yes)), ann = F, ty = "n") for(i in 1:length(last)) lines(g, f.dp.no[,i], col = "gray") for(i in 1:length(last)) lines(g, f.dp.yes[,i], col = "orange") text(35, 0.082, "Speed limit") legend(35, 0.08, c("no", "yes"), lty = c(1, 1), col = c(1, 2), bty = "n") ## overlay the mean of these densities -- which also is the posterior predictive ## distribution of a future observation from the model lines(g, apply(f.dp.no, 1, mean), lwd = 2) lines(g, apply(f.dp.yes, 1, mean), lwd = 2, col = "red") ## draw future samples from the model y.no.fut <- apply(out.dp.no$zeta[last,], 1, rmixpois, a = 1, b1 = 2, b2 = 1 / 10) y.yes.fut <- apply(out.dp.yes$zeta[last,], 1, rmixpois, a = 1, b1 = 2, b2 = 1 / 10) ## posterior probability of the event that imposing speed limit ## reduces accident counts pr.better <- mean(y.no.fut > y.yes.fut) ## word of caution -- we had only 100 draws of the future obs ## this might be too small for a good Monte Carlo approximation