## Analysis of traffic data as discussed in class

## Dataset and formatting

require(MASS)
data(Traffic)
ts <- split(Traffic, Traffic$limit)

y.no <- ts[["no"]]$y
n.no <- length(y.no)

y.yes <- ts[["yes"]]$y
n.yes <- length(y.yes)

## The following function draws a random number from
## the multinomial distribution. It takes as input
## a (possibly un-normalized) probability vector
## `prob' and samples an integer k between 1 and
## K = length(prob) with probability prob[k].

rmult <- function(prob)
  return(sample(length(prob), size = 1, prob = prob))


## The following function is for the main Gibbs sampler.
## Recall our model is given as follows:
##
## p(y | latent, lambda, omega) = prod_j Pois(y[j] | lambda[latent[j]])
##    p(latent | lambda, omega) = prod_j Mult(latent[j] | K, omega)
##             p(lambda, omega) = prod_k Gamma(lambda[k] | b1[k], b2[k])
##                                  x Dir(omega | a[1], ..., a[K])
##

gibbs.pgmix <- function(y,           
                        K,            
                        latent = NULL,
                        a = rep(1, K),
                        b1 = c(2, K),
                        b2 = c(2, K),
                        n.sweep = 1e3
                        ){


  ## Need library `bayesm' for the function `rdirichlet'
  ## that generates random probability vectors from the
  ## Dirichlet distribution
  
  require(bayesm)

  ## variables
  
  n <- length(y)
  omega <- rep(NA, K)
  lambda <- rep(NA, K)

  ## latent must be initialized -- if not supplied by
  ## the user then sample latent from Mult(K, E(omega))
  ## where E(omega) is the prior expectation of omega
  ## given by the normalized version of `a'.
  
  if(is.null(latent))
    latent <- replicate(n, rmult(a))

  ## The above is same as
  ## for(j in 1:n)
  ##   latent[j] <- rmult(a)


  ## Storage for omega and lambda -- will not store latent
  ## Recall that augmenting latent was simply to facilitate
  ## gibbs sampling -- our interest still lies with the original
  ## parameters omega and lambda.
  
  store.omega <- matrix(NA, n.sweep, K)
  store.lambda <- matrix(NA, n.sweep, K)

  ## Begin looping
  
  for(sweep in 1:n.sweep){

    ## forming clusters according to latent
    ## the k-th cluster corresponds to observations for which
    ## latent[j] == k.

    y.group <- split(y, latent)
    m <- sapply(y.group, length)
    ybar <- sapply(y.group, mean)
    
    
    ## Updating theta = (omega, lambda)

    omega <- rdirichlet(a + m)
    lambda <- rgamma(K, b1 + m * ybar, b2 + m)

    ## Updating latent variables

    prob.mat <- omega * t(outer(y, lambda, "dpois"))
    latent <- apply(prob.mat, 2, rmult)

    ## store
    
    store.omega[sweep,] <- omega
    store.lambda[sweep,] <- lambda    
    
  }

  return(list(omega = store.omega, lambda = store.lambda, last.latent = latent))
}


## The following computes the mixture sampling density
## p(y | omega, lambda) = sum_k omega[k] Pos(y | lambda[k])

dmixpois <- function(x, lambda, omega){
  return(rowSums(dpois(x, lambda) * omega))
}

## The following generates a random sample from the mixture sampling
## density.

rmixpois <- function(lambda, omega){
  K <- ncol(omega)
  ix <- apply(omega, 1, rmult)
  lam <- rowSums(lambda * diag(K)[ix, ])
  return(rpois(nrow(lambda), lam))
}


## Posterior inference

## Get MCMC sample of (omega, lambda) from the posterior distributions.

out.no <- gibbs.pgmix(y.no, 2, a = c(2, 1), b1 = 2, b2 = 1/10, n.sweep = 2e3)
out.yes <- gibbs.pgmix(y.yes, 2, a = c(2, 1), b1 = 2, b2 = 1/10, n.sweep = 2e3)

## Shall discard the first 1000 draws and then save every tenth

last <- seq(1e3 + 10, 2e3, 10)

## Some visual depiction of posterior draws. Shall plot the mixture sampling
## density p(y | lambda, omega) for each of the saved draws of (lambda, omega)

brks <- 0:50 + 0.5
g <- seq(0, 50, 1)

f.no <- sapply(g, dmixpois, lam = out.no$lambda[last,], omega = out.no$omega[last,])
f.yes <- sapply(g, dmixpois, lam = out.yes$lambda[last,], omega = out.yes$omega[last,])

plot(g, 0 * g, ylim = range(range(f.no), range(f.yes)), ann = F, ty = "n")

for(i in 1:length(last))
  lines(g, f.no[i,], col = "gray")
for(i in 1:length(last))
  lines(g, f.yes[i,], col = "orange")
text(35, 0.082, "Speed limit")
legend(35, 0.08, c("no", "yes"), lty = c(1, 1), col = c(1, 2), bty = "n")

## overlay the mean of these densities -- which also is the posterior predictive
## distribution of a future observation from the model

lines(g, apply(f.no, 2, mean), lwd = 2)
lines(g, apply(f.yes, 2, mean), lwd = 2, col = "red")

## draw future samples from the model

y.no.fut <- rmixpois(out.no$lambda[last, ], out.no$omega[last, ])
y.yes.fut <- rmixpois(out.yes$lambda[last, ], out.yes$omega[last, ])

## posterior probability of the event that imposing speed limit
## reduces accident counts

pr.better <- mean(y.no.fut > y.yes.fut)


## word of caution -- we had only 100 draws of the future obs
## this might be too small for a good Monte Carlo approximation