% linear regression for oldfaith data
[date, duration, time]=textread('oldfaith.dat', '%d%f%d', 'headerlines',1);
plot(duration, time, '.');
X = [ones(length(duration),1) duration];
% X needs to be a matrix with a leading column of ones.
[b,bint ,r,rint,stats] = regress(time, X, 0.05);
% b : estimates for the regression coefficients
% r : residuals
% stats(1) : R-square
b
% Now add the regression line to the scatter plot.
x = [1.5 5.5];
y = b(1)+ b(2).*x;
hold on;
plot(x , y, 'r-');
hold off;
% CI for b(2)
bint
n=length(time);
s = sqrt(sum(r.^2)/(n-2));
Sxx = sum((duration-mean(duration)).^2);
b(2)+tinv(.975, n-2)*s/sqrt(Sxx) % upper CI
b(2)-tinv(.975, n-2)*s/sqrt(Sxx) % lower CI
% R-square, coefficient of determination
stats(1)
1-sum(r.^2)/sum((time-mean(time)).^2)
% Anscombe Data [anscombe.dat]
data=load('anscombe.dat');res=zeros(11,4);
for i= 1:4
subplot(2,2,i);
x=data(:,(2*i-1)); y=data(:,2*i);
X = [ones(length(x),1) x];
[b,bint ,r,rint,stats] = regress(y, X, 0.05);
yfit=b(1)+b(2).*x;
plot(x,y, '.');
hold on;
plot(x, yfit, 'r-');
hold off;
res(:,i)=r;
end
for i=1:4
subplot(2,2,i);
plot( data(:,(2*i-1)),res(:,i), '.');
end;
% BUCHANAN'S VOTE IN PALM BEACH COUNTY
%
% One of the strongest allegations of voting irregularities in the
% 2000 Presidential election was that a faulty ballot design in Palm
% Beach County, Florida, caused many votes to cast their ballots for the
% Reform Party candidate Patrick Buchanan, when they intended to vote
% for Al Gore. The following is a regression analysis of Buchanan's
% vote over all 67 counties of Florida, using demographic variables
% (population size, race, age distribution, education level and income)
% as covariates.
% fldat1.txt Florida elections data set
% fldat2.txt Explanations of Florida elections data set
% load data
y=data(:,18);
X=[ones(length(y),1) data(:,4:17)];
[b,bint ,r,rint,stats] = regress(y, X, 0.05);
plot(y-r, r, '.');
[newr, I]=sort(r);
I
text((y(50)-r(50)), r(50), 'outlier')