# predicting human population from stork population storks=read.table("storks.txt",header=T) attach(storks) plot(storks$storks,people,xlab="storks") storks.lm=lm(people~storks$storks) lines(storks$storks,fitted(storks.lm)) summary(storks.lm) Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 36.54715 4.57751 7.984 0.000498 *** storks$storks 0.14910 0.02285 6.525 0.001265 ** --- Signif. codes: 0 `***' 0.001 `**' 0.01 `*' 0.05 `.' 0.1 ` ' 1 Residual standard error: 2.809 on 5 degrees of freedom Multiple R-Squared: 0.8949, Adjusted R-squared: 0.8739 F-statistic: 42.57 on 1 and 5 DF, p-value: 0.001265 #re-create these results by hand X=cbind(rep(1,length(storks$storks)),storks$storks) X t(X)%*%X solve(t(X)%*%X)%*%t(X)%*%people coef(storks.lm) #get Bayesian results, using 1/sigma^2 prior solve(t(X)%*%X) summary(storks.lm)$cov.unscaled summary(storks.lm)$sigma^2 t(resid(storks.lm))%*%resid(storks.lm)/(length(storks$storks)-2) summary(storks.lm)$cov.unscaled*summary(storks.lm)$sigma^2 storks.lm.sigma=sqrt(diag(summary(storks.lm)$cov.unscaled*summary(storks.lm)$sigma^2)) storks.lm.sigma #marginal posterior 95% credible intervals for coefficients coef(storks.lm)-qt(.975,length(storks$storks)-2)*storks.lm.sigma coef(storks.lm)+qt(.975,length(storks$storks)-2)*storks.lm.sigma #using the 1/sigma prior from the book storks.lm.sigma=sqrt(diag(summary(storks.lm)$cov.unscaled*as.numeric(t(resid(storks.lm))%*%resid(storks.lm))/(length(storks$storks)-4))) coef(storks.lm)-qt(.975,length(storks$storks)-2)*storks.lm.sigma coef(storks.lm)+qt(.975,length(storks$storks)-2)*storks.lm.sigma # example with outliers olymp=read.table("olympics.txt",header=T) pairs(olymp) attach(olymp) plot(year,long_jump) long.lm=lm(long_jump~year) lines(year,fitted(long.lm)) summary(long.lm) long.lm.sigma=sqrt(diag(summary(long.lm)$cov.unscaled*summary(long.lm)$sigma^2)) coef(long.lm)-qt(.975,length(year)-2)*long.lm.sigma coef(long.lm)+qt(.975,length(year)-2)*long.lm.sigma plot(year,resid(long.lm)) # remove outliers and compare detach() olymp2=olymp[-c(1,16),] attach(olymp2) long.lm2=lm(long_jump~year) summary(long.lm2) long.lm2.sigma=sqrt(diag(summary(long.lm2)$cov.unscaled*summary(long.lm2)$sigma^2)) coef(long.lm2)-qt(.975,length(year)-2)*long.lm2.sigma coef(long.lm2)+qt(.975,length(year)-2)*long.lm2.sigma plot(olymp$year,olymp$long_jump) lines(olymp$year,fitted(long.lm)) lines(year,fitted(long.lm2),lty=2)