## Analysis of traffic data as discussed in class ## Dataset and formatting require(MASS) data(Traffic) ts <- split(Traffic, Traffic$limit) y.no <- ts[["no"]]$y n.no <- length(y.no) y.yes <- ts[["yes"]]$y n.yes <- length(y.yes) ## The following function draws a random number from ## the multinomial distribution. It takes as input ## a (possibly un-normalized) probability vector ## `prob' and samples an integer k between 1 and ## K = length(prob) with probability prob[k]. rmult <- function(prob) return(sample(length(prob), size = 1, prob = prob)) ## The following function is for the main Gibbs sampler. ## Recall our model is given as follows: ## ## p(y | latent, lambda, omega) = prod_j Pois(y[j] | lambda[latent[j]]) ## p(latent | lambda, omega) = prod_j Mult(latent[j] | K, omega) ## p(lambda, omega) = prod_k Gamma(lambda[k] | b1[k], b2[k]) ## x Dir(omega | a[1], ..., a[K]) ## gibbs.pgmix <- function(y, K, latent = NULL, a = rep(1, K), b1 = c(2, K), b2 = c(2, K), n.sweep = 1e3 ){ ## Need library `bayesm' for the function `rdirichlet' ## that generates random probability vectors from the ## Dirichlet distribution require(bayesm) ## variables n <- length(y) omega <- rep(NA, K) lambda <- rep(NA, K) ## latent must be initialized -- if not supplied by ## the user then sample latent from Mult(K, E(omega)) ## where E(omega) is the prior expectation of omega ## given by the normalized version of `a'. if(is.null(latent)) latent <- replicate(n, rmult(a)) ## The above is same as ## for(j in 1:n) ## latent[j] <- rmult(a) ## Storage for omega and lambda -- will not store latent ## Recall that augmenting latent was simply to facilitate ## gibbs sampling -- our interest still lies with the original ## parameters omega and lambda. store.omega <- matrix(NA, n.sweep, K) store.lambda <- matrix(NA, n.sweep, K) ## Begin looping for(sweep in 1:n.sweep){ ## forming clusters according to latent ## the k-th cluster corresponds to observations for which ## latent[j] == k. y.group <- split(y, latent) m <- sapply(y.group, length) ybar <- sapply(y.group, mean) ## Updating theta = (omega, lambda) omega <- rdirichlet(a + m) lambda <- rgamma(K, b1 + m * ybar, b2 + m) ## Updating latent variables prob.mat <- omega * t(outer(y, lambda, "dpois")) latent <- apply(prob.mat, 2, rmult) ## store store.omega[sweep,] <- omega store.lambda[sweep,] <- lambda } return(list(omega = store.omega, lambda = store.lambda, last.latent = latent)) } ## The following computes the mixture sampling density ## p(y | omega, lambda) = sum_k omega[k] Pos(y | lambda[k]) dmixpois <- function(x, lambda, omega){ return(rowSums(dpois(x, lambda) * omega)) } ## The following generates a random sample from the mixture sampling ## density. rmixpois <- function(lambda, omega){ K <- ncol(omega) ix <- apply(omega, 1, rmult) lam <- rowSums(lambda * diag(K)[ix, ]) return(rpois(nrow(lambda), lam)) } ## Posterior inference ## Get MCMC sample of (omega, lambda) from the posterior distributions. out.no <- gibbs.pgmix(y.no, 2, a = c(2, 1), b1 = 2, b2 = 1/10, n.sweep = 2e3) out.yes <- gibbs.pgmix(y.yes, 2, a = c(2, 1), b1 = 2, b2 = 1/10, n.sweep = 2e3) ## Shall discard the first 1000 draws and then save every tenth last <- seq(1e3 + 10, 2e3, 10) ## Some visual depiction of posterior draws. Shall plot the mixture sampling ## density p(y | lambda, omega) for each of the saved draws of (lambda, omega) brks <- 0:50 + 0.5 g <- seq(0, 50, 1) f.no <- sapply(g, dmixpois, lam = out.no$lambda[last,], omega = out.no$omega[last,]) f.yes <- sapply(g, dmixpois, lam = out.yes$lambda[last,], omega = out.yes$omega[last,]) plot(g, 0 * g, ylim = range(range(f.no), range(f.yes)), ann = F, ty = "n") for(i in 1:length(last)) lines(g, f.no[i,], col = "gray") for(i in 1:length(last)) lines(g, f.yes[i,], col = "orange") text(35, 0.082, "Speed limit") legend(35, 0.08, c("no", "yes"), lty = c(1, 1), col = c(1, 2), bty = "n") ## overlay the mean of these densities -- which also is the posterior predictive ## distribution of a future observation from the model lines(g, apply(f.no, 2, mean), lwd = 2) lines(g, apply(f.yes, 2, mean), lwd = 2, col = "red") ## draw future samples from the model y.no.fut <- rmixpois(out.no$lambda[last, ], out.no$omega[last, ]) y.yes.fut <- rmixpois(out.yes$lambda[last, ], out.yes$omega[last, ]) ## posterior probability of the event that imposing speed limit ## reduces accident counts pr.better <- mean(y.no.fut > y.yes.fut) ## word of caution -- we had only 100 draws of the future obs ## this might be too small for a good Monte Carlo approximation