bicreg.keep_function(x, y, wt = rep(1, length(y)),keepx,limitprint=1:25){

### Command to run this function on discrimination data (CH12), keeping the 1st 4 main FX:
### bicreg.keep(y=log.bsal,x=data.frame(senior,age,educ,exper,senior2,age2,educ2,exper2,senior.i.age,senior.i.educ,senior.i.exper,age.i.educ,age.i.exper,educ.i.exper),keep=1:4,limitprint=1:25)
### The limitprint argument will print the first 25 models, and can be modified.
### Results for the top models differ slightly from the book b/c of the constraint that the 
### 4 main effects remain in all models.

### Modified by S. McBride to include a keep function.  Corrections to R2 were removed.

### S-PLUS function to apply Bayesian Model Averaging to variable
### 	selection for linear regression models. Bayesian
### 	Model Averaging accounts for the model uncertainty inherent in the
### 	variable selection problem by averaging over the best models in the
### 	model class according to posterior model probability.  For more details see 
### 	references at the end of the documentation.

### Developer: Adrian Raftery (raftery@stat.washington.edu).
### Revised by Chris T. Volinsky
### Modified by Merlise Clyde for STA242 at Duke University to agree with def's in 
### The Statistical Sleuth

###  This software is not formally maintained, but I will be happy to
###  hear from people who have problems with it, although I cannot guarantee to
###  to solve them!
### Thanks to Nguyen Cong Vu for pointing out bugs.
### Permission is hereby given to StatLib to redistribute this software.
### The software can be freely used for non-commercial purposes, and can
###  be freely distributed for non-commercial purposes only. 
### The copyright is retained by the developer.
### Copyright 1994 Adrian E. Raftery
### Copyright 1996 Adrian E. Raftery and Chris T. Volinsky
### Returns the models in the strict (or symmetric) Occam's Window with O_R 
### as specified for linear regression with BIC.
### Inputs:
###  x		matrix of independent variables
###  y		dependent variable
###  wt		weights for regression
### Outputs:
###  postprob	posterior probabilities of the models selected
###  label	   labels identifying the models selected
###  r2		R2 values for the models
###  bic		values of bic for the models
###  size		the number of independent variables in each of the models
###  which	a logical matrix with one row per model and one column per 
###		variable indicating whether that variable is in the model
###  probne0	posterior probability that each variable is non-zero (in %)
###  postmean	posterior mean of each coefficient (from model mixing)
###  postsd	posterior standard deviation of each coefficient 
###		  (from model mixing) 
###  ols		matrix (like the matrix which) giving the OLS estimate of
###		  each coefficient for each model
###  se		matrix (like ols) giving the 
###		  corresponding standard errors for each model
### Reference: 
###  Raftery, Adrian E. (1995). Bayesian model selection in social research
###    (with Discussion). Sociological Methodology 1995 (Peter V. Marsden, ed.), 
###    pp. 111-196, Cambridge, Mass.: Blackwells.
###   This is also Working Paper no. 94-12, Center for Studies in Demography and Ecology, 
###   University of Washington (1994).
### It is available as a Postscript file at 
###   http://www.stat.washington.edu/tech.reports/bic.ps
### It is also available via regular ftp using the following commands:
### ftp ftp.stat.washington.edu (or 128.95.17.34)
### login as anonymous
### enter your email address as your password
### ftp>cd /pub/tech.reports
### ftp>get bic.ps
### ftp>bye

###############################################################################################
### Set original names of x-variables, if there are none to start with.

  x <- data.frame(x)
  if(is.null(dimnames(x))) dimnames(x) <- list(NULL,paste("X",1:ncol(x),sep=""))	
  y <- as.numeric(y)
  options(contrasts=c("contr.treatment","contr.treatment"))

 
### Removing NA's

  x2 <- na.omit(data.frame(x))
  used <- match(row.names(data.frame(x)), row.names(x2))
  omitted <- seq(nrow(x))[is.na(used)]
  if(length(omitted) > 0) {
    wt <- wt [ - omitted]
    x <- x2
    y <- y[ - omitted]
    warning(paste("There were ",length(omitted),"records deleted due to NA's"))
  }

### Creating design matrix

  cdf <-cbind.data.frame(y=y,x)
  mm <- model.matrix(formula(cdf),data=cdf)[,-1,drop=F]
  x <- mm

### If there are more than 30 columns in x, reduce to 30 using backwards
###  elimination.

  if(ncol(x) > 30) {
    back <- stepwise(x, y, wt, method = "backward")
    which.back30 <- back$which[(ncol(x) - 30),  ]
    x <- x[, which.back30]
  }

### Execute leaps and bounds algorithm
  nvar <- length(x[1,  ])

if (nvar >2)
  { print("Running Leaps -- this may take a while")
    a <- leaps(x, y, wt = wt, method = "r2", keep=keepx)
    r2_a$r2
    size_a$size
    label_a$label
    which_a$which
  } 
  else 
    {
      r2_bic_NULL
      nmod <- switch(ncol(x),2,4)
      bic <- label <- rep(0, nmod)
      model.fits <- as.list(rep(0, nmod))
      which <- matrix(c(F, T, F, T, F, F, T, T), nmod, nmod/2)
      size <- c(1 ,2,2,3)[1:nmod]
      sep <- if(all(nchar(dimnames(x)[[2]]) == 1)) "" else ","
      for(k in 1:nmod) {
	if (k==1) {
	  label[k] <- "NULL"
	  lm1 <- lm(y~1,w=wt)
	}
	  else {
	    label[k] <- paste(dimnames(x)[[2]][which[k,  ]], collapse = sep)
	    x.lm <- cbind.data.frame(y = y, x=x[,which[k, ,drop=F]], wt = wt)
	    lm1 <- lm(y~.-wt,data=x.lm,w=wt)
	  }
	r2[k] <- summary(lm1)$r.sq*100
      }
    }
  
### Calculate BIC and apply the first OW rule.
  n <- length(y)
  sigma2 _ (n - 1)*var(y)*(1 - r2/100)/(n - size)
  bic <- n * log(sigma2) + size * log(n)
  postprob <- exp(-0.5 * (bic - min(bic)))/sum(exp(-0.5 * (bic - min(bic))))
  postprob[is.na(postprob)]_1

### Order models in descending order of posterior probability
  order.bic <- order(bic, size, label)
  r2 <- r2[order.bic]
  size <- size[order.bic]
  label <- label[order.bic]
  which <- which[order.bic,  , drop=F]
  bic <- bic[order.bic]
  postprob <- postprob[order.bic]	
  
### Calculate Pr[beta_i != 0 | D] for each coefficient beta_i
  probne0 <- round(100 * t(which) %*% as.matrix(postprob), 1)
  nmod <- length(bic)	

### Calculate E[beta_i|D] and SD[beta_i|D] for each coefficient beta_i
  model.fits <- as.list(rep(0, nmod))
  for(k in (1:nmod)) {
    if(sum(which[k,  ]) != 0) {
      model.fits[[k]] <- ls.print(lsfit(x[, which[k,  ],drop=F], y, wt=wt), 
				  print.it = F)$coef.table
    }
      else model.fits[[k]] <- ls.print(lsfit(rep(1,length(y)),y, wt=wt,int=F), 
				     print.it = F)$coef.table  
  }
  Ebi <- rep(0, (nvar+1))
  SDbi <- rep(0,(nvar+1))
  EbiMk <- matrix(rep(0, nmod * (nvar+1)), nrow = nmod)
  sebiMk <- matrix(rep(0, nmod * (nvar+1)), nrow = nmod)
  for(i in 1:(nvar+1)) {
    if((i==1)||(sum(which[, (i-1)] != 0))) {
      for(k in (1:nmod)) {
	if((i==1)||(which[k, (i-1)] == T)) {

###Find position of beta_i in model k
	  if (i==1) pos <- 1
	    else pos <- 1+sum(which[k, (1:(i-1))])
	  EbiMk[k, i] <- model.fits[[k]][pos , 1]
	  sebiMk[k, i] <- model.fits[[k]][pos , 2]
	}
      }
      Ebi[i] <- as.numeric(sum(postprob * EbiMk[, i]))
      SDbi[i] <- sqrt(postprob%*%(sebiMk[,i]^2) + postprob%*%((EbiMk[, i]-Ebi[i])^2))
    }
  }

### Output
  dimnames(which)_list(NULL,dimnames(x)[[2]])
  dimnames(EbiMk)_dimnames(sebiMk)_list(NULL,c("Int",dimnames(x)[[2]]))

  list(postprob = postprob[limitprint], label = label[limitprint], r2 = r2[limitprint], bic = bic[limitprint], 
       size = (size )[limitprint], probne0 = c(probne0), postmean = Ebi, 
       postsd = SDbi, namesx = dimnames(x)[[2]])}

plot.models <- function(bic.out, varnames=T,  color=T, top=F)
{
  postprob _ bic.out$postprob
  rank <- (log(postprob) - log(min(postprob)))
  ordertop _ sort.list(-rank)
  postprob _ postprob[ordertop]
  which _ bic.out$which[ordertop,]
  if (top == F) top _ length(postprob)
  
  bestmodels _ t(which[1:top,])
 
  col <- dim(bestmodels)[1]
  nm _ dim(bestmodels)[2]
  rank <- (1.0 + (log(postprob) - log(min(postprob)))[1:top])
  if( color) intense _ t(t(bestmodels) * as.vector(rank))
  if (! color) intense _ bestmodels
  image(intense[,nm:1], axes = F, ylab="log(BAYES FACTOR)")
  if (varnames)
    mtext(bic.out$namesx, at = ((1:col) - 0.25), srt = 75, side = 1,
	  outer = F,  line = 2.5, cex = 0.5)
  yat _round(seq(dim(bestmodels)[2],1, length=6))
  ylabs _ as.character(round(rank[yat]))
  mtext(ylabs, at=(max(yat)+1 - yat), side=2, outer=F,line=1.5, cex=1)
  box()
invisible()
}