# Simple illustrations of linear models # Example 1 - Weight of newborn girls # ----------------------------------- # Consider the weight of six newborn girls # selected from the data set given in week 34: data="http://www.uio.no/studier/emner/matnat/math/STK3100/data/firstborn.txt" firstborn=read.table(data,header=T) girls=firstborn[c(32,36,40,44,48,52,56),c(1,3)] # Plot data: plot(girls$weeks,girls$weight,col='red',pch=16,xlab="Weeks",ylab="Weight") # Fit linear model and plot fitted line: fit1=lm(weight~weeks, data=girls) summary(fit1) abline(fit1$coef[1],fit1$coef[2],col='red',lwd=2) # Find model matrix: X1=model.matrix(fit1) X1 # Compute beta-estimates using matrix formula: b1=solve(t(X1)%*%X1)%*%t(X1)%*%girls$weight b1 # Fit model with duration centered (i.e. subtract mean duration) #and plot fitted line (in same plot) mean(girls$weeks) girls$weeks.c=girls$weeks-39 fit2=lm(weight~weeks.c, data=girls) summary(fit2) lines(girls$weeks.c+39,fit2$coef[1]+fit2$coef[2]*girls$weeks.c,lty=2,lwd=2) # Inspect model matrix X2=model.matrix(fit2) X2 # Compute estimates using matrix formula: b2=solve(t(X2)%*%X2)%*%t(X2)%*%girls$weight b2 b1 #the beta1 estimates are the same! b2[1]-b2[2]*39 #beta0=beta0*-beta1*mean(x) # Compute hat matrix corresponding to the two model matrices H1=X1%*%solve(t(X1)%*%X1)%*%t(X1) H2=X2%*%solve(t(X2)%*%X2)%*%t(X2) sum((H1-H2)^2) # The two hat matrices are equal for the two different parametrizations #(apart from rounding errors) #========================================================== # Example 2 - Vitamin C in bread # ------------------------------ # Consider the example from week 34 concerning # vitamin C in bread # Read data: group=c(1,1, 2,2, 3,3) vitc=c(47.62,49.79, 40.45,43.46, 21.25,22.34) group=factor(group) # Fit models with the four model matrices given # on slide 19 of week 34 # a) fit.a=lm(vitc~group-1) summary(fit.a) Xa=model.matrix(fit.a) Xa # b) fit.b=lm(vitc~group) summary(fit.b) Xb=model.matrix(fit.b) Xb # c) newgroup=relevel(group,ref="3") fit.c=lm(vitc~newgroup) summary(fit.c) Xc=model.matrix(fit.c) Xc # d) contrasts(group)=contr.sum fit.d=lm(vitc~group) summary(fit.d) Xd=model.matrix(fit.d) Xd # Compute hat matrix corresponding to the four model matrices Ha=Xa%*%solve(t(Xa)%*%Xa)%*%t(Xa) Hb=Xb%*%solve(t(Xb)%*%Xb)%*%t(Xb) Hc=Xc%*%solve(t(Xc)%*%Xc)%*%t(Xc) Hd=Xd%*%solve(t(Xd)%*%Xd)%*%t(Xd) Ha Hb Hc Hd # The four hat matrices are (of course) equal for the different #parametrizations (apart from rounding errors) # Fit model with the model matrix given on slide 20 of week 34 # e) x1=as.numeric(group==1) x2=as.numeric(group==2) x3=as.numeric(group==3) fit.e=lm(vitc~x1+x2+x3) summary(fit.e) Xe=model.matrix(fit.e) Xe # Compute beta-estimates using matrix formula with # Moore-Penrose inverse library(MASS) be=ginv(t(Xe)%*%Xe)%*%t(Xe)%*%vitc # Check that the estimates are solutions of the normal # equations X^T*y=X^T*X*betahat (apart from rounding errors) t(Xe)%*%vitc t(Xe)%*%Xe%*%be # Other solutions of the normal equations have the form be+D # where D is the vector with element of the form (d,-d,-d,-d). # illustration: D=matrix(c(2,-2,-2,-2),ncol=1) t(Xe)%*%Xe%*%(be-D) # Compute hat matrix corresponding to the model matrix Xe He=Xe%*%ginv(t(Xe)%*%Xe)%*%t(Xe) sum((He-Ha)^2) # This hat matrix is equal to the other four # (apart from rounding errors) #and hence (of course) the fitted values are also all the same He%*%vitc Ha%*%vitc