% linear regression for oldfaith data
[date, duration, time]=textread('oldfaith.dat', '%d%f%d', 'headerlines',1);
plot(duration, time, '.');
X = [ones(length(duration),1) duration]; 
% X needs to be a matrix with a leading  column of ones.
[b,bint ,r,rint,stats] = regress(time, X, 0.05);  
% b :  estimates for the regression coefficients
% r : residuals
% stats(1) : R-square

b
% Now add the regression line to the scatter plot.
x = [1.5 5.5];
y = b(1)+ b(2).*x;
hold on;
plot(x , y, 'r-');
hold off;

% CI for b(2)
bint
n=length(time);
s = sqrt(sum(r.^2)/(n-2));
Sxx = sum((duration-mean(duration)).^2);
b(2)+tinv(.975, n-2)*s/sqrt(Sxx) % upper CI
b(2)-tinv(.975, n-2)*s/sqrt(Sxx) % lower CI

% R-square, coefficient of determination
stats(1)
1-sum(r.^2)/sum((time-mean(time)).^2)

% Anscombe Data [anscombe.dat]
data=load('anscombe.dat');res=zeros(11,4);
for i= 1:4
    subplot(2,2,i);
    x=data(:,(2*i-1)); y=data(:,2*i);
    X = [ones(length(x),1) x]; 
    [b,bint ,r,rint,stats] = regress(y, X, 0.05);  
    yfit=b(1)+b(2).*x;
    plot(x,y, '.');
    hold on;
    plot(x, yfit, 'r-');
    hold off;
    res(:,i)=r;
end

for i=1:4
    subplot(2,2,i);
    plot( data(:,(2*i-1)),res(:,i), '.');
end;

% BUCHANAN'S VOTE IN PALM BEACH COUNTY
%
% One of the strongest allegations of voting irregularities in the
% 2000 Presidential election was that a faulty ballot design in Palm
% Beach County, Florida, caused many votes to cast their ballots for the
% Reform Party candidate Patrick Buchanan, when they intended to vote
% for Al Gore. The following is a regression analysis of Buchanan's
% vote over all 67 counties of Florida, using demographic variables
% (population size, race, age distribution, education level and income)
% as covariates. 

% fldat1.txt Florida elections data set
% fldat2.txt Explanations of Florida elections data set

% load data
y=data(:,18);
X=[ones(length(y),1) data(:,4:17)];
[b,bint ,r,rint,stats] = regress(y, X, 0.05);  
plot(y-r, r, '.');
[newr, I]=sort(r);
I
text((y(50)-r(50)), r(50), 'outlier')