Interest lies in explaining price, distinguishing models according to price, depreciation with age and mileage, possible varying depreciation rates across models of car, collinearity issues for age and mileage, possible vendor differences on asking prices, outliers, influential cases, predicting prices
Download data from here. If you have downloaded the data in lab 9, you don't have download it again this time.
Data columns:
merc <- read.table("mercedes") i <- order(merc[,5]); merc <- merc[i,] # Sortin order of mileage y <- log(merc[,2]) # Why *log* price? (NB: # not $) mod <- merc[,3] # Model: 0=500, 1=450, ..., 4=200 age <- merc[,4] # Age in units of 6-mo mile <- merc[,5] # Mileage in thousands vend <- merc[,6] # Four different vendors Mod <- as.factor(mod) # Treat as category, not number #------------------------------------------------------------------------ #First let's plot log(price) against miles for different models plot( mile, y, xlab="Miles",ylab="Log Price",type='n') for (j in 0:4) { ok <- mod==j; points(mile[ok], y[ok], pch=paste(j), col=j+1); fit <- lm(y[ok]~mile[ok]); abline(fit, col=j+1); } # do you think the slopes are different with miles ? # we fit log(price) on mod + mile # and then fit log(price) on mod + mile + mod/mile # if the slopes are statistically different, # we'd expect the second model is better than the first one # the F-test would be significant fit <- lm( y~ Mod + mile , x=T) summary(fit) newfit <- lm( y ~ Mod+mile+Mod/mile) # Try also: y ~ Mod/mile summary(newfit, corr=F) # Subset F test -- are all "interactions" significant? # check where we got the numbers from print(fit); print(newfit) Qb <- 48*0.136^2; Qa <- 44*0.131^2; ssa <- 0.131^2; fobs <- (Qb-Qa)/(4*ssa) 1-pf(fobs,4,44) # doesn't look statistically significant overall #----------------------------------------------------------------------- # Check also age: plot( mile, age, xlab="Miles",ylab="Age",type='n') for (j in 0:4) { i <- mod==j; points(mile[i],age[i],pch=paste(j),col=j+1) } # compare models including Age variable rather than or with Miles # e.g., fit <- lm(y ~ Mod+age) summary(fit) # now maybe slopes are really not different ... pre <- predict.lm(fit,se.fit=T) plot( age, y, xlab="Age",ylab="Log Price",type='n') for (j in 0:4) { ok<-mod==j; points(age[ok],y[ok],pch=paste(j),col=j+1); lines(age[ok],pre$fit[ok],col=j+1); } summary( fit1 <- lm(y ~ Mod+age+Mod/age) , correlation=F) # subset F test for different slopes: Qb <- 48*0.1019^2; Qa <- 44*0.105^2; ssa <- 0.105^2; fobs <- (Qb-Qa)/(4*ssa) 1-pf(fobs,4,44)