rm(list=ls())
library(pls)

# read data 
auto <- read.table("http://azzalini.stat.unipd.it/Book-DM/auto.dat", header = TRUE)
attach(auto)

X <- auto[ , c('wheel.base', 'length', 'width', 'height', 'curb.weight', 'engine.size', 'compression.ratio',
               'HP', 'peak.rot')]
pc <- prcomp(X, center = TRUE, scale = TRUE)
summary(pc)
plot(pc$x[,1:2], cex = 2, pch = 16)
points(pc$x[auto$brand == 'mercedes-gas', 1:2], col = 2, pch = 16, cex = 2)

# fit a pcr model
model.pcr <- pcr(city.distance ~ ., data = as.data.frame(X), validation = 'CV',
                 scale = TRUE)
summary(model.pcr)

# we could have obtained the same results manually by using the pcs in a linear model
model.pc <- lm(city.distance ~ pc$x[ , 1] + pc$x[ , 2] + pc$x[ , 3]) 
summary(model.pc)

# compare the results
mean((city.distance - predict(model.pcr, newdata = X, ncomp = 3))^2)
mean((city.distance - predict(model.pc, newdata = X))^2)


# pls
model.pls <- plsr(city.distance ~ ., data = as.data.frame(X), validation = 'CV',
                 scale = TRUE)
summary(model.pls)

mean((city.distance - predict(model.pls, newdata = X, ncomp = 3))^2)