% Boxplot
% read data where 'NaN' means missing data
haircolor = {'Light Blonde', 'Dard Blonde', 'Light Brunette', 'Dark Brunette'}
pain= [ 62 60 71 55 48; 63 57 52 41 43; 42 50 41 37 NaN; 32 39 51 30 35];
pain = pain'  % transpose of the original matrix
boxplot(pain)
 
set(gca, 'XTicklabel',haircolor )
xlabel('Hair Color');
ylabel('Pain threshold score');

% correlation coefficient
x=unifrnd(-3, 3,1, 50);
y1 = unifrnd(-3,3,1, 50);
y2 = 0.8*x + 0.5*y1;
y3 = -0.8*x + 0.5*y1;
x=x./sqrt(var(x));
y1=y1./sqrt(var(y1));
y2=y2./sqrt(var(y2));
y3=y3./sqrt(var(y3));


subplot(2,2,1)
plot(x, y1, '.');
subplot(2,2,2);
plot(x,y2, '.');
subplot(2,2,3);
plot(x,y3,'.');


% correlation matrix
data = [x' y1' y2' y3'];
size(data)
corrcoeff(data)

% Challenger data
[Oring, temp] = textread('challenger.txt', '%d%d');
subplot(1,2,1);
plot(temp(Oring>0),Oring(Oring>0), '.');
set(gca, 'Ylim',[-1,4]);
set(gca, 'Xlim',[35,90]);
subplot(1,2,2);
plot(temp, Oring, '.');
set(gca, 'Ylim',[-1,4]);
set(gca, 'Xlim',[35,90]);

% linear regression for oldfaith data
[date, duration, time]=textread('oldfaith.dat', '%d%f%d', 'headerlines',1);
plot(duration, time, '.');
X = [ones(length(duration),1) duration]; 
% X needs to be a matrix with a leading  column of ones.
[b,bint ,r,rint,stats] = regress(time, X, 0.05);  
% b :  estimates for the regression coefficients
% r : residuals
% stats(1) : R-square

b
stats(1)
SSE = sum(r.^2);
SST = var(time)*(length(time)-1);
1-SSE/SST

% Now add the regression line to the scatter plot.
x = (15:55)/10;
y = b(1)+ b(2).*x;
hold on;
plot(x , y, 'r-');
hold off;

% draw residual plots
subplot(1,2,1);
plot(duration, r, '.');
subplot(1,2,2);
plot(time-r, r, '.');

% an example

x= unifrnd(-2,2,50,1);
y = -2 + x.^2 + 0.7*normrnd(zeros(50,1), ones(50,1));
plot(x, y, '.');
X = [ones(length(x), 1)  x];
[b, bint, r, rint, stats] = regress(y, X, 0.05);

x1 = [-2.1 2.1];
hold on;
plot(x1, b(1)+b(2).*x1, 'r-');
hold off;

% draw residual plot
subplot(1,2,1);
plot(x, r, '.');

% transform the response variable
newx = x.^2;
X = [ones(length(x), 1)  newx];
[newb, bint, newr, rint, newstats] = regress(y, X, 0.05);
subplot(1,2,2);
plot(x ,newr,'r.');
plot(newx, newr, 'r.');