## Analysis of traffic data as discussed in class require(MASS) data(Traffic) str(Traffic) require(ggplot2) ## split data ts <- split(Traffic, Traffic$limit) y.no <- ts[["no"]]$y n.no <- length(y.no) y.yes <- ts[["yes"]]$y n.yes <- length(y.yes) ## Poisson fit under prior lam ~ Ga(b1, b2) nsamp <- 1000 #b1 <- 1; b2 <- 1/10 ## diffuse prior with mean 10 #b1 <- 0.001; b2 <- 0.001 ## Gelman type weak prior b1 <- 0.5; b2 <- 0 ## Jeffreys' prior b1.yes <- b1 + sum(y.yes); b2.yes <- b2 + n.yes lam.yes <- rgamma(nsamp, b1.yes, b2.yes) y.star.yes <- rpois(nsamp, lam.yes) b1.no <- b1 + sum(y.no); b2.no <- b2 + n.no lam.no <- rgamma(nsamp, b1.no, b2.no) y.star.no <- rpois(nsamp, lam.no) post.dat <- data.frame(y.star = c(y.star.yes, y.star.no), lam = c(lam.yes, lam.no), limit = rep(c("yes", "no"), each = 1000)) ## plot posterior draws of lam by limit, ## plot posterior predictive draws of y by limit ## and constrast the latter with observed y by limit plot1 <- ggplot(post.dat, aes(factor(limit), lam)) + geom_violin(aes(fill = limit)) plot2 <- ggplot(post.dat, aes(factor(limit), y.star)) + geom_violin(aes(fill = limit)) + ylim(0, 60) plot3 <- ggplot(Traffic, aes(factor(limit), y)) + geom_violin(aes(fill = limit)) + ylim(0, 60) require(gridExtra) grid.arrange(plot1, plot2, plot3, nrow = 1) ## 95% interval for difference in accident counts quantile(y.star.no - y.star.yes, pr = c(0.025, 0.5, 0.975)) ## estimate of P(#accidents with limit = no > #accidenta with limit = yes) mean(y.star.no > y.star.yes) ## second look at the observed data ggplot(Traffic, aes(y, fill = limit, color = limit)) + geom_density(alpha = 0.5) + xlim(0, 60) ## Mann-Whitney-Wilcoxon test w.test <- wilcox.test(y.no, y.yes, conf.int = TRUE) print(w.test) ## estimate of P(#accidents with limit = no > #accidenta with limit = yes) w.test$statistic / (n.no * n.yes)