#Exercise 16# #Let's enter the data manually# distances_c(3.87,7.23,10.00,15.24,29.00,52.03,95.46, 192.00,300.90,395.00) names_c("Mercury","Venus","Earth","Mars","(Asteroids)", "Jupiter","Saturn","Uranus","Neptune","Pluto") #Scatterplot plot(1:10,distances,xlab="order",ylab="distances", main="Distance vs order") #Let's create a vector of natural logarithms logdis_log(distances) plot(1:10,logdis,xlab="order",ylab="log distances", main="Log distance vs order") #Summary statistics av_mean(distances) sd_sqrt(var(distances)) av sd #Summary statistics for the log of the distances avl_mean(logdis) sdl_sqrt(var(logdis)) avl sdl #Exercise 17 # here are each of the 35 permutations along with the # differences in means. group1 group2 diff 1 for diffs more extreme than observed ----------- -------- ----- --- 68 77 82 53 64 71 85 -3.33 0 82 53 64 71 68 77 85 -9.17 0 77 82 64 71 53 68 85 4.83 0 77 85 64 71 53 68 82 6.58 0 77 82 85 71 53 64 68 17.08 1 68 77 85 71 53 64 82 8.92 0 68 77 85 53 64 71 82 -1.58 0 68 53 64 71 77 82 85 -17.33 1 68 77 53 64 71 82 85 -13.83 0 77 82 53 64 68 71 85 -5.67 0 68 77 53 71 64 82 85 -9.75 0 68 77 82 64 53 71 85 3.08 0 77 53 64 71 68 82 85 -12.08 0 68 82 53 71 64 77 85 -6.83 0 82 85 53 71 64 68 77 3.08 0 68 85 53 64 71 77 82 -9.17 0 85 53 64 71 68 77 82 -7.42 0 77 85 53 64 68 71 82 -3.92 0 68 85 53 71 64 77 82 -5.08 0 68 82 64 71 53 77 85 -0.42 0 77 85 53 71 64 68 82 0.17 0 68 82 85 71 53 64 77 11.83 0 82 85 64 71 53 68 77 9.5 0 68 77 85 64 53 71 82 4.83 0 68 82 85 53 64 71 77 1.33 0 77 82 53 71 64 68 85 -1.58 0 68 77 82 85 53 64 71 15.33 1 68 77 64 71 53 82 85 -3.33 0 77 82 85 53 64 68 71 6.58 0 82 85 53 64 68 71 77 -1 0 68 82 53 64 71 77 85 -10.92 0 68 77 82 71 53 64 85 7.17 0 77 82 85 64 53 68 71 13 0 68 85 64 71 53 77 82 1.33 0 68 82 85 64 53 71 77 7.75 0 # For the curious, Here's the Splus code German used # to find all the permutations. Unless this is real # interesting to you, you should skip it. #Let's introduce the data in a vector #There are 7!/4!3! ways to combine them el_c(68,77,82,85,53,64,71) kl_c(0,0,0) j_matrix(0,1000,3) jk_matrix(0,1000,3) #here we get every sample permutation and reorder it #look that it doesn't matter that we reorder it, because #we are only seeking to have different ones, but the place #that each one has in every group is not important for (i in 1:1000){ jk[i,]_sample(el,size=3,replace=F) kl_order(jk[i,]) j[i,1]_jk[i,kl[1]] j[i,2]_jk[i,kl[2]] j[i,3]_jk[i,kl[3]] } pe_matrix(0,35,3) np_1 i_0 while (np!=36){ b_0 i_i+1 for (pu in 1:35){ if (j[i,1]==pe[pu,1] && j[i,2]==pe[pu,2] && j[i,3]==pe[pu,3]){ b_b-1} else b_b+1} if (b==35){ pe[np,1]_j[i,1] pe[np,2]_j[i,2] pe[np,3]_j[i,3] print (pe[np,]) print (np) np_np+1 } } #now that we have permutations of one side #let's define the other side and we can do the means pa_matrix(0,35,4) for (w in 1:35){ j_1 for (i in 1:7){ if (pe[w,1]!=el[i] && pe[w,2]!=el[i] && pe[w,3]!=el[i]){ pa[w,j]_el[i] j_j+1} }} #we have two matrix ordered-) let's do the sample averages for each aver_matrix(0,35,1) for (i in 1:35){ aver[i,]_mean(pa[i,])-mean(pe[i,])} #Let's see the two sided p-value extremer_ifelse(abs(aver)>=(abs(mean(c(el[1:4]))-mean(c(el[5:7])))),1,0) #Exercise 18 #Let's create a vector with all the data dat1_c(12.0,12.0,12.9,13.6,16.6,17.2,17.5,18.2,19.1,19.3, 19.8,20.3,20.5,20.6,21.3,21.6,22.1,22.2,22.6,23.1,24.0, 24.3,26.7,29.7) dat2_c(5.0,5.4,6.1,10.9,11.8,12.0,12.3,14.8,15.0,16.8,17.2, 17.2,17.4,17.5,18.5,18.7,18.7,19.2,19.5,20.7,21.2,22.1,24.0) data_c(dat1,dat2) av_rep(0,5) rnd_sample(1:47,size=47,replace=F) de_data[rnd] or_mean(de[1:24])-mean(de[25:47]) for (i in 1:5){ rnd_sample(1:47,size=47,replace=F) de_data[rnd] av[i]_mean(de[1:24])-mean(de[25:47])} #Exercise 26 ga_c(1.31,1.45,1.12,1.16,1.3,1.5,1.2,1.22,1.42,1.14,1.23,1.59, 1.11,1.10,1.53,1.52,1.17,1.49,1.62,1.29) gb_c(1.13,1.71,1.39,1.15,1.33,1.00,1.03,1.68,1.76,1.55,1.34, 1.47,1.74,1.74,1.19,1.15,1.2,1.59,1.47) boxplot(ga,gb,names=c("group a","group b"),ylab="mg/ml", main="zinc concentrations") #The continuation of problem 18: # here I assume the data has been read into a dataframe # called creativ y_creativ$score ok1_creativ$code==1 ok2_creativ$code==2 # I'll use mean of group2 - mean of group 1 dif0_mean(y[ok2]) - mean(y[ok1]) # now the permutation part: dif_rep(NA,1000) for(i in 1:1000){ yshuf_sample(y,47,replace=T) dif[i]_mean(yshuf[ok2]) - mean(yshuf[ok1]) } # now compute the p-value extreme_ifelse(dif > dif0,1,0) mean(extreme) # I got .002 for the one sided p-value # for a nice picture: hist(dif,xlab="permutation values of mean2-mean1") points(dif0,0,pch=17,cex=3)