bayesreg <- function(x, y, wt=rep(1, length(y)), prior=c("gprior","ZSnull","ZSfull","ZSnullLAP","ZSfullLAP","BIC"), nbest=1, nvmax=8, regmeth=c("exhaustive","backward","forward","seqrep"), occam=FALSE, strict=FALSE) { ## MODIFIED VERSION OF bicreg (SPLUS VERSION) FOR R----------------------- ## UPDATED BY CHRISTINE KOHNEN, DUKE UNIVERSITY & SAMSI------------------- ## LAST UPDATED: MARCH 3, 2003-------------------------------------------- ## ## INPUTS----------------------------------------------------------------- ## x matrix of independent variables ## y dependent variable ## wt weights for regression ## prior prior method - gprior, ZSnull, ZSfull, ZSnullLAP, ZSfullLAP,BIC ## nbest number of subsets of each size to report ## nvmax largest subset size to be examined ## regmeth search method - "exhaustive", "forward", "backward" or "seqrep" ## occam = TRUE, will use Occam's window (default is 20) as the ## maximum ratio for excluding models ## = FALSE (default) will return all models ## strict = TRUE will return a more parsimonious set of models ## (Occam's Window) Any model with a more likely submodel will ## be eliminated. ## = FALSE (default). Returns all models within 1/OR in post. model ## prob ## ## OUTPUT----------------------------------------------------------------- ## post.order ordering of the posterior probabilites in decreasing order ## postprob posterior probabilities of the models selected ## label labels identifying the models selected (no intercept) ## r2 R2 values for the models ## bic BIC values for the models ## size number of independent variables in each of the models (no int.) ## which a logical matrix with one row per model and one column per ## variable indicating whether that variable is in the model ## which.int similar to which, but has a column for the intercept ## margs log marginal values or log Bayes Factors (depends on prior) ## probne0 posterior probability that each variable is non-zero (in %) ## postmean posterior mean of each coefficient (from model mixing) ## postsd posterior SD of each coefficient (from model mixing) ## ols matrix (like the matrix which) giving the OLS estimate of ## each coefficient for each model ## se matrix (like ols) giving the corresponding SE for each model ## ## CODE STARTS HERE------------------------------------------------------ x <- data.frame(x) if(is.null(dimnames(x))) dimnames(x) <- list(NULL,paste("X",1:ncol(x),sep="")) y <- as.numeric(y) options(contrasts=c("contr.treatment","contr.treatment")) ## REMOVING THE NA'S------------------------------------------------------ x2 <- na.omit(data.frame(x)) used <- match(row.names(data.frame(x)), row.names(x2)) omitted <- seq(nrow(x))[is.na(used)] if(length(omitted) > 0) { x <- x2 y <- y[ - omitted] warning(paste("There were ",length(omitted),"records deleted due to NA's")) } ## CREATING THE DESIGH MATRIX--------------------------------------------- cdf <- cbind.data.frame(y=y,x) mm <- model.matrix(formula(cdf),data=cdf)[,-1,drop=F] x <- mm ## EXECUTE THE LEAPS OR REGSUBSETS ALGORITHM -- DEPENDS ON THE DIM(X) ## Load the leaps() package if not already available library(leaps) nvar <- ncol(x) n <- length(y) r2full <- summary(lm(y~x))$r.squared if(nvar > 30){ a <- regsubsets(x, y, nbest=nbest, nvmax=nvmax, method = regmeth, really.big =T) b <- summary(a) b$rsq <- pmin(pmax(0, b$rsq), .999) label <- rep(0,nrow(b$which)) for(i in 1:nrow(b$which)){ label[i] <- paste(dimnames(x)[[2]][which(b$which[i,2:(nvar+1)]==T)],collapse=",") } ## Add values corresponding to the null model (intercept only) which.int <- rbind(c(T,rep(F,nvar)),b$which) r2 <- round(c(0, b$rsq), 3) size <- c(1,apply(b$which,1,sum)) #intercept is included in sum label <- c("NULL", label) which <- rbind(rep(F,nvar), b$which[,2:(nvar+1)]) bic <- c(0,b$bic) } else if(nvar < 31 & nvar > 2){ a <- leaps(x, y,method = "r2", names=dimnames(x)[[2]]) a$r2 <- pmin(pmax(0, a$r2), .999) label <- rep(0,nrow(a$which)) for(i in 1:nrow(a$which)){ label[i] <- paste(dimnames(x)[[2]][which(a$which[i,]==T)],collapse=",") } ## Add values corresponding to the null model (intercept only) r2 <- round(c(0, a$r2), 3) size <- c(1, a$size) #intercept is included in a$size label <- c("NULL", label) which <- rbind(rep(F, nvar), a$which) #a$which does not include int bic <- n * log(1 - r2) + (size - 1) * log(n) which.int <- cbind(rep(T,nrow(which)),which) } else{ r2 <- bic <- NULL nmod <- switch(ncol(x),2,4) bic <- label <- rep(0, nmod) model.fits <- as.list(rep(0, nmod)) which <- matrix(c(F,T,F,T,F,F,T,T), nmod, nmod/2) size <- c(1,2,2,3)[1:nmod] sep <- if(all(nchar(dimnames(x)[[2]])==1)) "" else "," for(k in 1:nmod) { if (k==1) { label[k] <- "NULL" lm1 <- lm(y~1,w=wt) } else { label[k] <- paste(dimnames(x)[[2]][which[k,]], collapse = sep) x.lm <- cbind.data.frame(y = y, x=x[,which[k, ,drop=F]], wt = wt) lm1 <- lm(y~.-wt,data=x.lm,w=wt) } r2[k] <- round(summary(lm1)$r.sq,digits=3) } bic <- n*log(1-r2)+(size-1)*log(n) } ## CALCULATE THE LOG MARGINALS OR LOG BAYES FACTORS FOR EACH MODEL-------- margs <- rep(0,nrow(which)) for(i in 1:nrow(which)){ margs[i] <- switch(1+pmatch(prior[1], c("gprior","ZSnull","ZSfull","ZSnullLAP","ZSfullLAP","BIC"), nomatch=0), stop(paste("Ambiguous or unrecognized prior name: ",prior)), log(margy(n,n,sum(which[i,]),r2[i])), ZSnull(r2[i],n,sum(which[i,]),c=n/2), ZSfull(r2full,r2[i],n,nvar,sum(which[i,])), ZSnullLAP(r2[i],n,sum(which[i,])), ZSfullLAP(r2full,r2[i],n,nvar,sum(which[i,])), -.5*bic[i]) } ## CALCULATE BIC AND APPLY THE FIRST OW RULE------------------------------ if(occam){ OR <- 20 occam <- bic - min(bic) < 2 * log(OR) r2 <- r2[occam] #only select the r2 values for which occam=TRUE size <- size[occam] #only select the size values for which occam=TRUE label <-label[occam] #only select the labels for which occam=TRUE which <- which[occam, ,drop=F] bic <- bic[occam] margs <- margs[occam] postprob <- exp(-0.5 * bic)/sum(exp(-0.5 * bic)) postprob[is.na(postprob)] <- 1 ## ORDER MODELS IN DESCENDING ORDER OF POSTERIOR PROBABILITY-------------- order.bic <- order(bic, size, label,decreasing=TRUE) r2 <- r2[order.bic] size <- size[order.bic] label <- label[order.bic] which <- which[order.bic,,drop=F] bic <- bic[order.bic] margs <- margs[order.bic] postprob <- postprob[order.bic] ## APPLY THE SECOND RULE TO ELIMINATE MODELS WITH BETTERS ONES NESTED W/IN if(strict){ nmod <- length(bic) if (nmod>1) { occam <- rep(T, nmod) for(k in (2:nmod)) { for(j in (1:(k - 1))) { which.diff <- which[k,] - which[j,] if(all(which.diff >= 0)) occam[k] <- F } } r2 <- r2[occam] size <- size[occam] label <- label[occam] nmod <- sum(occam) which <- which[occam, ,drop=F] bic <- bic[occam] margs <- margs[occam] postprob <- postprob[occam] postprob <- postprob/sum(postprob) } } } else{ postprob <- exp(margs)/sum(exp(margs)) postprob[is.na(postprob)] <- 1 } ## ORDER THE POSTERIOR MODEL PROBABILITIES IN DESCENDING ORDER---------- post.order <- order(postprob,decreasing=TRUE) ## CALCULATE PR[BETA_i != 0 | D] FOR EACH COEFFICIENT BETA_i------------ probne0 <- round(t(which.int) %*% as.matrix(postprob), 1) nmod <- length(bic) ## CALCULATE E[BETA_i|D] AND SD[BETA_i|D] FOR EACH COEFFICIENT BETA_i-- model.fits <- as.list(rep(0, nmod)) for(k in 1:nmod) { if(sum(which[k,])!=0){ model.fits[[k]] <- summary(lm(y~x[,which[k,],drop=F],wt=wt))$coef } else model.fits[[k]] <- summary(lm(y~1,wt=wt))$coef } Ebi <- rep(0, (nvar+1)) SDbi <- rep(0,(nvar+1)) EbiMk <- matrix(0, nmod, nvar+1) sebiMk <- matrix(0, nmod, nvar+1) for(i in 1:(nvar+1)) { if((i==1) || (sum(which.int[,i]!=0))) { for(k in 1:nmod){ if((i==1) || (which.int[k,i]==T)){ # FIND POSITION OF BETA_i IN MODEL k----------------------------------- if(i==1) pos <- 1 else pos <- sum(which.int[k,(1:i)]) EbiMk[k, i] <- model.fits[[k]][pos,1] sebiMk[k, i] <- model.fits[[k]][pos,2] } } Ebi[i] <- as.numeric(sum(postprob * EbiMk[,i])) SDbi[i] <- sqrt(postprob%*%(sebiMk[,i]^2)+postprob%*%((EbiMk[,i]-Ebi[i])^2)) } } ## OUTPUT GIVEN IN DESCENDING ORDER OF POSTERIOR MODEL PROBABILITIES---- dimnames(which) <- list(NULL,dimnames(x)[[2]]) dimnames(which.int) <- list(NULL,c("Int",dimnames(x)[[2]])) dimnames(EbiMk) <- dimnames(sebiMk) <- list(NULL,c("Int",dimnames(x)[[2]])) return(list(post.order=post.order, postprob = postprob, label = label, r2 = r2, bic = bic, size = size-1, which = which, which.int = which.int, margs = margs, probne0 = c(probne0), postmean = Ebi, postsd = SDbi, ols = EbiMk, se = sebiMk)) } ## ## Zellner-Siow prior for the linear model; here we compute the Bayes ## factor to the full model exactly and using a Laplace ## approximation. ## ## The current model has $k$ covariates of a total of $p$ possible and ## there is a common intercept in all models being compared. ## The sample size is $n$ ## ## Originally we are considering ## ## Y= 1 alpha + X1 beta1 + X2 beta2 ## ## where 1'X1=1'X2=0, rank(X1)=k, rank(X2)=p-k, and we are interested in ## testing H0:beta2=0. ## ## We then reparametrize to ## ## Y= 1 alpha + X1 eta + V beta2 ## ## where eta = beta1 + (X1'X1)^-1 X2 beta2 ## V = [I - X1(X1'X1)^-1 X1'] X2 ## ## y'(I-P*-P_X1-P_X2)y ## R^2Full = 1- ------------------- ## y'(I-P*)y ## ## y'(I-P*-P_X1)y ## R^2current = 1- -------------- ## y'(I-P*)y ## ## P* = 1 (1'1)^(-1) 1'= 1 1'/n, where 1 is the nx1 vector of ones ## P_X1 = X1 (X1'X1)^(-1) X1' ## P_V = V (V'V)^(-1) V' ## I = nxn identity matrix ## ######################################## # Exact Bayes factor to the full model # ######################################## # Calculates the Bayes factor by numerical integration ZSfull <- function(r2full,r2curr,n,p,k){ if(k==p) out <- 0 else{ eps <- (1-r2full)/(1-r2curr) aux <- function(u){ return(integrandfull(u,n,p,k,eps)) } out <- -log(integrate(aux,0,1)$value) } #return(-log(integrate(aux,0,1)$value)) return(out) } # calculates the integrand in the Bayes Factor; change of variables 1/(1+g)=u integrandfull <- function(u,n,p,k,eps){ aux <- -(n-(k+1))*log(eps+u*(1-eps))/2 aux <- aux + (p-(k+1))*log(u)/2 aux <- aux - 3*log(1-u)/2 aux <- aux - (n*u)/(2*(1-u)) aux <- aux + log(n/2)/2-lgamma(1/2) return(exp(aux)) } ########################### # A Laplace approximation # ########################### # this returns the Laplace approx to the Bayes factor to the null model ZSfullLAP <- function(r2full,r2curr,n,p,k){ if(k==p) aux <- 0 else{ eps <- (1-r2full)/(1-r2curr) hat <- pmodefull(eps,n,p,k) if(length(hat)>1) stop("more than 2 roots") else{ if(length(hat)==0) stop("no roots found") else if(hat[1]==0) stop("root at zero") else{ aux <- -likfull(hat,eps,n,p,k)+(log(2*pi)-log(-infofull(hat,eps,n,p,k)))/2 } } } return(aux) } # this returns log(likfullelihood times prior) likfull <- function(g,eps,n,p,k){ aux <- (n-(p+1))*log(1+g)-(n-(k+1))*log(1+eps*g)-3*log(g)-n/g aux <- aux/2 aux <- aux+log(n/2)/2-lgamma(1/2) return(aux) } # this returns the first derivative of log(likfull()) scorefull <- function(g,eps,n,p,k){ aux <- (n-(p+1))/(1+g)-(n-(k+1))*eps/(1+eps*g)-3/g+n/g^2 aux <- aux/2 return(aux) } # and this returns the second derivative of log(likfull()) infofull <- function(g,eps,n,p,k){ aux <- -(n-(p+1))/(1+g)^2+(n-(k+1))*eps^2/(1+eps*g)^2+3/g^2-2*n/g^3 aux <- aux/2 return(aux) } # this gives the posterior mode pmodefull <- function(eps,n,p,k){ # aux <- zapsmall(polyroot(c(n,n*(eps+1)-3,n-p+eps*(k-2)-4,-eps*(p-k+3)))); aux <- rootsfull(c(n,n*(eps+1)-3,n-p+eps*(k-2)-4,-eps*(p-k+3))) sol <- c() for(i in 1:length(aux)){ if(aux[i]>=0) sol <- append(sol,aux[i]) } return(sol) } rootsfull <- function(coeff){ c <- coeff[1]/coeff[4] b <- coeff[2]/coeff[4] a <- coeff[3]/coeff[4] Q <- (a^2-3*b)/9 R <- (2*a^3-9*a*b+27*c)/54 Q3 <- Q^3 disc <- R^2-Q3 if(disc>=0){ A <- -sign(R)*(abs(R)+sqrt(disc))^(1/3) if(A==0.) B <- 0. else B <- Q/A return(c((A+B)-a/3)) } else{ theta <- acos(R/sqrt(Q3)) x <- c(0,0,0) aux <- -2*sqrt(Q) x[1] <- aux*cos(theta/3) x[2] <- aux*cos((theta+2*pi)/3) x[3] <- aux*cos((theta-2*pi)/3) aux <- a/3 for(i in 1:3) x[i] <- x[i]-aux return(x) } } ## ## Zellner-Siow prior for the linear model; here we compute the Bayes ## factor to the null model exactly and using a Laplace ## approximation. ## ## The current model has $k$ covariates and there is a ## common intercept in all models being compared. The sample size is $n$ ## ## ## y'(I-P*-P)y ## R^2 = 1- ----------- ( = 1-epsilon ) ## y'(I-P*)y ## ## P* = 1 (1'1)^(-1) 1'= 1 1'/n, where 1 is the nx1 vector of ones ## P = X (X'X)^(-1) X', where X is the nxk design matrix of the ## current model, excluding the column of ones ## I = nxn identity matrix ## ################################## # Exact Bayes factor to the null # ################################## # Calculates the Bayes factor by numerical integration ZSnull <- function(r2,n,k,c=n/2){ if(k==0) out <- 0 else{ aux <- function(u){ return(integrand(u,n,k,r2,c=n/2)) } out <- log(integrate(aux,0,1)$value) #return(log(integrate(aux,0,1)$value)) } return(out) } # calculates the integrand in the Bayes Factor; change of variables 1/(1+g)=u integrand <- function(u,n,k,r2,c){ aux <- -(n-1)*log((1-r2)+u*r2)/2; aux <- aux + (k-1)*log(u)/2; aux <- aux - 3*log(1-u)/2; aux <- aux - c*u/(1-u); aux<- aux + log(c)/2-lgamma(1/2); return(exp(aux)) } ########################### # A Laplace approximation # ########################### # this returns the Laplace approx to the Bayes factor to the null model ZSnullLAP <- function(r2,n,k){ if(k==0) aux <- 0 else{ hat <- pmode(r2,n,k); if(length(hat)>1) stop("more than 2 roots") else{ if(length(hat)==0) stop("no roots found") else if(hat[1]==0) stop("root at zero") else{ aux <- lik(hat,r2,n,k)+(log(2*pi)-log(-info(hat,r2,n,k)))/2; } } } return(aux); } # this returns log(likelihood times prior) lik <- function(g,r2,n,k){ aux <- (n-1-k)*log(1+g)-(n-1)*log(1+(1-r2)*g)-3*log(g)-n/g; aux <- aux/2; aux <- aux+log(n/2)/2-lgamma(1/2); return(aux); } # this returns the first derivative of log(lik()) score <- function(g,r2,n,k){ aux <- (n-1-k)/(1+g)-(n-1)*(1-r2)/(1+(1-r2)*g)-3/g+n/g^2; aux <- aux/2; return(aux); } # and this returns the second derivative of log(lik()) info <- function(g,r2,n,k){ aux <- -(n-1-k)/(1+g)^2+(n-1)*(1-r2)^2/(1+(1-r2)*g)^2+3/g^2-2*n/g^3; aux <- aux/2; return(aux); } # returns the second derivative of the log-likelihood only info2 <- function(g,r2,n,k){ aux <- -(n-1-k)/(1+g)^2+(n-1)*(1-r2)^2/(1+(1-r2)*g)^2; aux <- aux/2; return(aux); } # this gives the posterior mode pmode <- function(r2,n,k){ # aux <- zapsmall(polyroot(c(n,n*r2-3,n-4-k-2*(1-r2),-(1-r2)*(k+3)))); aux <- roots(c(n,n*(2-r2)-3,n-4-k-2*(1-r2),-(1-r2)*(k+3))); sol <- c(); for(i in 1:length(aux)){ if(aux[i]>=0) sol <- append(sol,aux[i]) } return(sol) } roots <- function(coeff){ c <- coeff[1]/coeff[4]; b <- coeff[2]/coeff[4]; a <- coeff[3]/coeff[4]; Q <- (a^2-3*b)/9; R <- (2*a^3-9*a*b+27*c)/54; Q3 <- Q^3; disc <- R^2-Q3; if(disc>=0){ A <- -sign(R)*(abs(R)+sqrt(disc))^(1/3); if(A==0.) B <- 0. else B <- Q/A; return(c((A+B)-a/3)); } else{ theta <- acos(R/sqrt(Q3)); x <- c(0,0,0); aux <- -2*sqrt(Q); x[1] <- aux*cos(theta/3); x[2] <- aux*cos((theta+2*pi)/3); x[3] <- aux*cos((theta-2*pi)/3); aux <- a/3; for(i in 1:3) x[i] <- x[i]-aux; return(x); } } ## Calculates the marginal distribution of Y using a g-prior margy <- function(g,n,k,r2){ # if null model, force the marginal to zero if(k==0) out <- 0 else{ val <- (1+g)^(0.5*(n-k-1)) out <- val*(1+g*(1-r2))^(-0.5*(n-1)) } return(out) }