## Analysis of traffic data as discussed in class
require(MASS)
data(Traffic)
str(Traffic)

require(ggplot2)

## split data
ts <- split(Traffic, Traffic$limit)

y.no <- ts[["no"]]$y
n.no <- length(y.no)

y.yes <- ts[["yes"]]$y
n.yes <- length(y.yes)

## Poisson fit under prior lam ~ Ga(b1, b2)
nsamp <- 1000
#b1 <- 1; b2 <- 1/10 ## diffuse prior with mean 10
#b1 <- 0.001; b2 <- 0.001 ## Gelman type weak prior
b1 <- 0.5; b2 <- 0 ## Jeffreys' prior

b1.yes <- b1 + sum(y.yes); b2.yes <- b2 + n.yes
lam.yes <- rgamma(nsamp, b1.yes, b2.yes)
y.star.yes <- rpois(nsamp, lam.yes)

b1.no <- b1 + sum(y.no); b2.no <- b2 + n.no
lam.no <- rgamma(nsamp, b1.no, b2.no)
y.star.no <- rpois(nsamp, lam.no)

post.dat <- data.frame(y.star = c(y.star.yes, y.star.no),
                       lam = c(lam.yes, lam.no),
                       limit = rep(c("yes", "no"), each = 1000))


## plot posterior draws of lam by limit, 
## plot posterior predictive draws of y by limit 
## and constrast the latter with observed y by limit
plot1 <- ggplot(post.dat, aes(factor(limit), lam)) + geom_violin(aes(fill = limit))
plot2 <- ggplot(post.dat, aes(factor(limit), y.star)) + geom_violin(aes(fill = limit)) + ylim(0, 60)
plot3 <- ggplot(Traffic, aes(factor(limit), y)) + geom_violin(aes(fill = limit)) + ylim(0, 60)

require(gridExtra)
grid.arrange(plot1, plot2, plot3, nrow = 1)

## 95% interval for difference in accident counts
quantile(y.star.no - y.star.yes, pr = c(0.025, 0.5, 0.975))

## estimate of P(#accidents with limit = no > #accidenta with limit = yes)
mean(y.star.no > y.star.yes)


## second look at the observed data
ggplot(Traffic, aes(y, fill = limit, color = limit)) + geom_density(alpha = 0.5) + xlim(0, 60)

## Mann-Whitney-Wilcoxon test
w.test <- wilcox.test(y.no, y.yes, conf.int = TRUE)
print(w.test)

## estimate of P(#accidents with limit = no > #accidenta with limit = yes)
w.test$statistic / (n.no * n.yes)