# Simple illustrations of linear models

# Example 1 - Weight of newborn girls
# -----------------------------------
# Consider the weight of six newborn girls 
# selected from the data set given in week 34:
data="http://www.uio.no/studier/emner/matnat/math/STK3100/data/firstborn.txt"
firstborn=read.table(data,header=T)
girls=firstborn[c(32,36,40,44,48,52,56),c(1,3)]

# Plot data: 
plot(girls$weeks,girls$weight,col='red',pch=16,xlab="Weeks",ylab="Weight")

# Fit linear model and plot fitted line:
fit1=lm(weight~weeks, data=girls)
summary(fit1)
abline(fit1$coef[1],fit1$coef[2],col='red',lwd=2)

# Find model matrix:  
X1=model.matrix(fit1)
X1

# Compute beta-estimates using matrix formula:
b1=solve(t(X1)%*%X1)%*%t(X1)%*%girls$weight
b1

# Fit model with duration centered (i.e. subtract mean duration) 
#and plot fitted line (in same plot)
mean(girls$weeks)
girls$weeks.c=girls$weeks-39
fit2=lm(weight~weeks.c, data=girls)
summary(fit2)
lines(girls$weeks.c+39,fit2$coef[1]+fit2$coef[2]*girls$weeks.c,lty=2,lwd=2)

# Inspect model matrix
X2=model.matrix(fit2)
X2

# Compute estimates using matrix formula:
b2=solve(t(X2)%*%X2)%*%t(X2)%*%girls$weight
b2 
b1
#the beta1 estimates are the same!
b2[1]-b2[2]*39
#beta0=beta0*-beta1*mean(x)

# Compute hat matrix corresponding to the two model matrices
H1=X1%*%solve(t(X1)%*%X1)%*%t(X1)
H2=X2%*%solve(t(X2)%*%X2)%*%t(X2)

sum((H1-H2)^2)
# The two hat matrices are equal for the two different parametrizations
#(apart from rounding errors)

#==========================================================

# Example 2 - Vitamin C in bread
# ------------------------------
# Consider the example from week 34 concerning 
# vitamin C in bread 

# Read data:
group=c(1,1, 2,2, 3,3)
vitc=c(47.62,49.79, 40.45,43.46, 21.25,22.34)
group=factor(group)

# Fit models with the four model matrices given 
# on slide 19 of week 34

# a)
fit.a=lm(vitc~group-1)
summary(fit.a)
Xa=model.matrix(fit.a)
Xa

# b)
fit.b=lm(vitc~group)
summary(fit.b)
Xb=model.matrix(fit.b)
Xb

# c)
newgroup=relevel(group,ref="3")
fit.c=lm(vitc~newgroup)
summary(fit.c)
Xc=model.matrix(fit.c)
Xc

# d)
contrasts(group)=contr.sum
fit.d=lm(vitc~group)
summary(fit.d)
Xd=model.matrix(fit.d)
Xd

# Compute hat matrix corresponding to the four model matrices
Ha=Xa%*%solve(t(Xa)%*%Xa)%*%t(Xa)
Hb=Xb%*%solve(t(Xb)%*%Xb)%*%t(Xb)
Hc=Xc%*%solve(t(Xc)%*%Xc)%*%t(Xc)
Hd=Xd%*%solve(t(Xd)%*%Xd)%*%t(Xd)
Ha
Hb
Hc
Hd
# The four hat matrices are (of course) equal for the different 
#parametrizations (apart from rounding errors)

# Fit model with the model matrix given on slide 20 of week 34

# e)
x1=as.numeric(group==1)
x2=as.numeric(group==2)
x3=as.numeric(group==3)
fit.e=lm(vitc~x1+x2+x3)
summary(fit.e)
Xe=model.matrix(fit.e)
Xe

# Compute beta-estimates using matrix formula with 
# Moore-Penrose inverse
library(MASS)
be=ginv(t(Xe)%*%Xe)%*%t(Xe)%*%vitc

# Check that the estimates are solutions of the normal 
# equations X^T*y=X^T*X*betahat (apart from rounding errors)
t(Xe)%*%vitc
t(Xe)%*%Xe%*%be

# Other solutions of the normal equations have the form be+D
# where D is the vector with element of the form (d,-d,-d,-d).
# illustration:
D=matrix(c(2,-2,-2,-2),ncol=1)
t(Xe)%*%Xe%*%(be-D)

# Compute hat matrix corresponding to the model matrix Xe
He=Xe%*%ginv(t(Xe)%*%Xe)%*%t(Xe)
sum((He-Ha)^2)
# This hat matrix is equal to the other four 
# (apart from rounding errors)
#and hence (of course) the fitted values are also all the same
He%*%vitc
Ha%*%vitc