## Analysis of traffic data as discussed in class with DP mixture

## Dataset and formatting

require(MASS)
data(Traffic)
ts <- split(Traffic, Traffic$limit)

y.no <- ts[["no"]]$y
n.no <- length(y.no)

y.yes <- ts[["yes"]]$y
n.yes <- length(y.yes)

## The following function draws a random number from
## the multinomial distribution. It takes as input
## a (possibly un-normalized) probability vector
## prob and samples an integer k between 1 and
## K = length(prob) with probability prob[k].

rmult <- function(prob)
  return(sample(length(prob), size = 1, prob = prob))


gibbs.pgdpmix <- function(y, zeta = NULL, a = 1, b1 = 1, b2 = 2, n.sweep = 1e3){


  n <- length(y)
  if(is.null(zeta)) zeta <- rgamma(n, b1, b2)


  ## Storage for zeta
  
  store.zeta <- matrix(NA, n.sweep, n)

  ## Begin looping
  
  for(sweep in 1:n.sweep){
  	
  	for(i in 1:n){
  		q <- dpois(y[i], zeta)
  		q[i] <- a * dnbinom(y[i], size = b1, prob = b2/(1 + b2)) 
  		## this is really q0, but I am placing it at the empty cell q[i]
  		
  		l <- rmult(q)
  		if(l == i) zeta[i] <- rgamma(1, b1 + y[i], b2 + 1)
  		else zeta[i] <- zeta[l]
  	
  	}

    ## store
    
    store.zeta[sweep,] <- zeta
    
  }

  return(list(zeta = store.zeta, last.zeta = zeta))
}


## The following computes the mixture sampling density for a future observable

dmixpois <- Vectorize(function(x, zeta, a, b1, b2) return((sum(dpois(x, zeta)) + a * dnbinom(x, size = b1, prob = b2/(1 + b2))) / (a + length(zeta))), "x")

## The following generates a random future observable from the poserior predictive

rmixpois <- function(zeta, a, b1, b2){
  n <- length(zeta)
  ix <- sample(n + 1, 1, replace = TRUE, prob = c(rep(1,n), a))
  if(ix == n + 1) x <- rnbinom(1, size = b1, prob = b2 / (b2 + 1))
  else x <- rpois(1, zeta[ix])
  return(x)
}


## Posterior inference

## Get MCMC sample of (omega, lambda) from the posterior distributions.

out.dp.no <- gibbs.pgdpmix(y.no, 2, a = 1, b1 = 2, b2 = 1/10, n.sweep = 2e3)
out.dp.yes <- gibbs.pgdpmix(y.yes, 2, a = 1, b1 = 2, b2 = 1/10, n.sweep = 2e3)

## Shall discard the first 1000 draws and then save every tenth

last <- seq(1e3 + 10, 2e3, 10)

## Some visual depiction of posterior draws. 

brks <- 0:50 + 0.5
g <- seq(0, 50, 1)

f.dp.no <- apply(out.dp.no$zeta[last,], 1, dmixpois, x = g, a = 1, b1 = 2, b2 = 1 / 10)
f.dp.yes <- apply(out.dp.yes$zeta[last,], 1, dmixpois, x = g, a = 1, b1 = 2, b2 = 1 / 10)

plot(g, 0 * g, ylim = range(range(f.no), range(f.yes)), ann = F, ty = "n")

for(i in 1:length(last))
  lines(g, f.dp.no[,i], col = "gray")
for(i in 1:length(last))
  lines(g, f.dp.yes[,i], col = "orange")
text(35, 0.082, "Speed limit")
legend(35, 0.08, c("no", "yes"), lty = c(1, 1), col = c(1, 2), bty = "n")

## overlay the mean of these densities -- which also is the posterior predictive
## distribution of a future observation from the model

lines(g, apply(f.dp.no, 1, mean), lwd = 2)
lines(g, apply(f.dp.yes, 1, mean), lwd = 2, col = "red")

## draw future samples from the model

y.no.fut <- apply(out.dp.no$zeta[last,], 1, rmixpois, a = 1, b1 = 2, b2 = 1 / 10)
y.yes.fut <- apply(out.dp.yes$zeta[last,], 1, rmixpois, a = 1, b1 = 2, b2 = 1 / 10)

## posterior probability of the event that imposing speed limit
## reduces accident counts

pr.better <- mean(y.no.fut > y.yes.fut)


## word of caution -- we had only 100 draws of the future obs
## this might be too small for a good Monte Carlo approximation