#Exercise 16#
#Let's enter the data manually#
distances_c(3.87,7.23,10.00,15.24,29.00,52.03,95.46,
192.00,300.90,395.00)
names_c("Mercury","Venus","Earth","Mars","(Asteroids)",
"Jupiter","Saturn","Uranus","Neptune","Pluto")
#Scatterplot
plot(1:10,distances,xlab="order",ylab="distances",
main="Distance vs order")
#Let's create a vector of natural logarithms
logdis_log(distances)
plot(1:10,logdis,xlab="order",ylab="log distances",
main="Log distance vs order")
#Summary statistics
av_mean(distances)
sd_sqrt(var(distances))
av
sd
#Summary statistics for the log of the distances
avl_mean(logdis)
sdl_sqrt(var(logdis))
avl
sdl

#Exercise 17
# here are each of the 35 permutations along with the
# differences in means.
   group1        group2    diff  1 for diffs more extreme than observed
 -----------    --------   ----- ---
 68 77 82 53    64 71 85   -3.33  0  
 82 53 64 71    68 77 85   -9.17  0  
 77 82 64 71    53 68 85   4.83   0  
 77 85 64 71    53 68 82   6.58   0  
 77 82 85 71    53 64 68   17.08  1  
 68 77 85 71    53 64 82   8.92   0  
 68 77 85 53    64 71 82   -1.58  0  
 68 53 64 71    77 82 85   -17.33 1  
 68 77 53 64    71 82 85   -13.83 0  
 77 82 53 64    68 71 85   -5.67  0  
 68 77 53 71    64 82 85   -9.75  0  
 68 77 82 64    53 71 85   3.08   0  
 77 53 64 71    68 82 85   -12.08 0  
 68 82 53 71    64 77 85   -6.83  0  
 82 85 53 71    64 68 77   3.08   0  
 68 85 53 64    71 77 82   -9.17  0  
 85 53 64 71    68 77 82   -7.42  0  
 77 85 53 64    68 71 82   -3.92  0  
 68 85 53 71    64 77 82   -5.08  0  
 68 82 64 71    53 77 85   -0.42  0  
 77 85 53 71    64 68 82   0.17   0  
 68 82 85 71    53 64 77   11.83  0  
 82 85 64 71    53 68 77   9.5    0  
 68 77 85 64    53 71 82   4.83   0  
 68 82 85 53    64 71 77   1.33   0  
 77 82 53 71    64 68 85   -1.58  0  
 68 77 82 85    53 64 71   15.33  1  
 68 77 64 71    53 82 85   -3.33  0  
 77 82 85 53    64 68 71   6.58   0  
 82 85 53 64    68 71 77   -1     0  
 68 82 53 64    71 77 85   -10.92 0  
 68 77 82 71    53 64 85   7.17   0  
 77 82 85 64    53 68 71   13     0  
 68 85 64 71    53 77 82   1.33   0  
 68 82 85 64    53 71 77   7.75   0  


# For the curious, Here's the Splus code German used
# to find all the permutations.  Unless this is real
# interesting to you, you should skip it.

#Let's introduce the data in a vector
#There are 7!/4!3! ways to combine them
el_c(68,77,82,85,53,64,71)
kl_c(0,0,0)
j_matrix(0,1000,3)
jk_matrix(0,1000,3)
#here we get every sample permutation and reorder it
#look that it doesn't matter that we reorder it, because
#we are only seeking to have different ones, but the place
#that each one has in every group is not important
for (i in 1:1000){
jk[i,]_sample(el,size=3,replace=F)
kl_order(jk[i,])
j[i,1]_jk[i,kl[1]]
j[i,2]_jk[i,kl[2]]
j[i,3]_jk[i,kl[3]]
}
pe_matrix(0,35,3)
np_1
i_0
while (np!=36){
b_0
i_i+1
for (pu in 1:35){
if (j[i,1]==pe[pu,1] && j[i,2]==pe[pu,2] && j[i,3]==pe[pu,3]){
b_b-1}
else b_b+1}
if (b==35){
pe[np,1]_j[i,1]
pe[np,2]_j[i,2]
pe[np,3]_j[i,3]
print (pe[np,])
print (np)
np_np+1
}
}
#now that we have permutations of one side
#let's define the other side and we can do the means
pa_matrix(0,35,4)
for (w in 1:35){
j_1
for (i in 1:7){
if (pe[w,1]!=el[i] && pe[w,2]!=el[i] && pe[w,3]!=el[i]){
pa[w,j]_el[i]
j_j+1}
}}
#we have two matrix ordered-) let's do the sample averages for each
aver_matrix(0,35,1)
for (i in 1:35){
aver[i,]_mean(pa[i,])-mean(pe[i,])}
#Let's see the two sided p-value
extremer_ifelse(abs(aver)>=(abs(mean(c(el[1:4]))-mean(c(el[5:7])))),1,0)

#Exercise 18
#Let's create a vector with all the data
dat1_c(12.0,12.0,12.9,13.6,16.6,17.2,17.5,18.2,19.1,19.3,
19.8,20.3,20.5,20.6,21.3,21.6,22.1,22.2,22.6,23.1,24.0,
24.3,26.7,29.7)
dat2_c(5.0,5.4,6.1,10.9,11.8,12.0,12.3,14.8,15.0,16.8,17.2,
17.2,17.4,17.5,18.5,18.7,18.7,19.2,19.5,20.7,21.2,22.1,24.0)
data_c(dat1,dat2)
av_rep(0,5)
rnd_sample(1:47,size=47,replace=F)
de_data[rnd]
or_mean(de[1:24])-mean(de[25:47])
for (i in 1:5){
rnd_sample(1:47,size=47,replace=F)
de_data[rnd]
av[i]_mean(de[1:24])-mean(de[25:47])}

#Exercise 26
ga_c(1.31,1.45,1.12,1.16,1.3,1.5,1.2,1.22,1.42,1.14,1.23,1.59,
1.11,1.10,1.53,1.52,1.17,1.49,1.62,1.29)
gb_c(1.13,1.71,1.39,1.15,1.33,1.00,1.03,1.68,1.76,1.55,1.34,
1.47,1.74,1.74,1.19,1.15,1.2,1.59,1.47)
boxplot(ga,gb,names=c("group a","group b"),ylab="mg/ml",
  main="zinc concentrations")

#The continuation of problem 18:
# here I assume the data has been read into a dataframe
# called creativ
y_creativ$score
ok1_creativ$code==1
ok2_creativ$code==2

# I'll use mean of group2 - mean of group 1
dif0_mean(y[ok2]) - mean(y[ok1])

# now the permutation part:
dif_rep(NA,1000)
for(i in 1:1000){
  yshuf_sample(y,47,replace=T)
  dif[i]_mean(yshuf[ok2]) - mean(yshuf[ok1])
}

# now compute the p-value
extreme_ifelse(dif > dif0,1,0)
mean(extreme) 
# I got .002 for the one sided p-value

# for a nice picture:
hist(dif,xlab="permutation values of mean2-mean1")
points(dif0,0,pch=17,cex=3)