# From last time…

Application exercise 11:

1. Create the following distributions of size 100.
• We R Prepared: normal
• #Shreklμvers: somewhat right skewed
• Fantastic four (minus 1): extremely left skewed
• Statisfaction: bimodal
• It’s All Ogre Now: wonkiest distribution you can imagine
2. Treat the above distribution as your population. Create sampling distributions of samples of sizes n = 10, 50, 100. Make histograms and normal probability plots of these distributions.
3. Describe the shapes of these distributions, and calculate the centers and the spreads. Compare these to shapes, centers, of spreads of parent population distributions from (1).

## Aside: multiple ggplots side-by-side

Below is some helper code that will be useful later (from http://stackoverflow.com/a/8391998):

multiplot <- function(..., plotlist=NULL, cols) {
require(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# Make the panel
plotCols = cols                          # Number of columns of plots
plotRows = ceiling(numPlots/plotCols)    # Number of rows needed, calculated from # of cols
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(plotRows, plotCols)))
vplayout <- function(x, y)
viewport(layout.pos.row = x, layout.pos.col = y)
# Make each plot, in the correct location
for (i in 1:numPlots) {
curRow = ceiling(i/plotCols)
curCol = (i-1) %% plotCols + 1
print(plots[[i]], vp = vplayout(curRow, curCol ))
}
}

## We R Prepared: normal population

set.seed(24)
norm_pop = rnorm(100, mean = 50, sd =5)
plot1 = qplot(sample = norm_pop, stat = "qq")
plot1 = plot1 + geom_abline(intercept = mean(norm_pop), slope = sd(norm_pop), linetype = "dashed")
plot2 = qplot(norm_pop)
multiplot(plot1, plot2, cols=2)

## We R Prepared: sampler

sample_dist = function(n){
sample_means = rep(NA,10000)
for(x in 1:10000){
sample_means[x]=mean(sample(norm_pop, size = n, replace = TRUE))
}
return(sample_means)
}

sample_10 = sample_dist(10)
sample_50 = sample_dist(50)
sample_100 = sample_dist(100)

## We R Prepared: centers and spreads

## [1] "Population: mean =  49.38 ; sd =  4.74"
## [1] "n = 10: mean =  49.37 ; se =  1.48"
## [1] "n = 50: mean =  49.38 ; se =  0.66"
## [1] "n = 100: mean =  49.39 ; se =  0.47"

## #Shreklμvers: somewhat right skewed

set.seed(1)
rSkew = 100*rbeta(100, 2, 10)
par(mfrow=c(1,2))
hist(rSkew)
qqnorm(rSkew)
qqline(rSkew)

## #Shreklμvers: sampler

s = function(data, sampleSize, numSims){
sampleMeans = data.frame(means = rep(NA, numSims))

for(i in 1:numSims){
sampleMeans\$means[i] = mean(sample(data, sampleSize, replace = TRUE, prob = NULL))
}

par(mfrow=c(1,2))
hist(sampleMeans\$means, xlab = "Means", main = paste("Histogram of Means where n = ", sampleSize))
qqnorm(sampleMeans\$means)
qqline(sampleMeans\$means)

cat(paste("Mean: ", round(mean(sampleMeans\$means),2), "\n",
"Standard Deviation: ", round(sd(sampleMeans\$means),2)))
return(list(mean = mean(sampleMeans\$means), sd = sd(sampleMeans\$means)))
}

## #Shreklμvers: sampling distributions

s10 = s(rSkew, 10, 1000)

## Mean:  18.09
##  Standard Deviation:  3.2

## #Shreklμvers: sampling distributions

s50 = s(rSkew, 50, 1000)

## Mean:  18.12
##  Standard Deviation:  1.42

## #Shreklμvers: sampling distributions

s100 = s(rSkew, 100, 1000)