# code based on the files "Code regarding section 3.2 (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa)" # and "Code regarding section 3.3-3.5" (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa) # read data dataset <- read.table("http://azzalini.stat.unipd.it/Book-DM/yesterday.dat", header = TRUE) attach(dataset) n <- nrow(dataset) y.range <- range(c(y.yesterday, y.tomorrow)) # define f(x) as polynomials polynomial <- function(p) lm(y.yesterday ~ poly(x,p)) polynomials <- apply(cbind(1:23), 1, polynomial) error.train <- sapply(polynomials, deviance) rss <- function(p) sum((y.tomorrow - predict(polynomials[[p]]))^2) error.test <- apply(cbind(1:23), 1, rss) out <- cbind(1:23, error.train, error.test) colnames(out) <- c('complexity', 'train.error', 'test.error') out rss60<- function(p) sum((y.tomorrow - predict(polynomials[[p]]))^2)+ sum((y.yesterday - predict(polynomials[[p]]))^2) Error <- apply(cbind(1:23), 1, rss) y <- c(y.yesterday, y.tomorrow) xx <- c(x, x) ### bootstrap ### B <- 1000 bootstrap.err.b <- matrix(NA, nrow = B, ncol = 7) # just for the first 6 degrees of the polynomial for (b in 1:B) { set.seed(b) # set seed, useful for data replication index <- sample(n, replace = TRUE) # the selected observations form the training set temp.train.data <- data.frame(y[index], xx[index]) # the rest the test set temp.test.data <- data.frame(y[-index], xx[-index]) colnames(temp.test.data) <- colnames(temp.train.data) <- c('y', 'x') # fit the model on the training set for each polynomial polynoms <- sapply(0:6, function(p) if(p == 0) lm(y ~ 1, data = temp.train.data) else lm(y ~ poly(x, p), data = temp.train.data)) # compute the error computeError <- function(model, newdata) sum((newdata[ , 'y'] - predict(model, newdata = newdata))^2) # save the error in the matrix initialized above bootstrap.err.b[b, ] <- unlist(lapply(polynoms, computeError, newdata = temp.test.data)) print(b) } # compute the average error for the K folds, for each degree of the polynomial bootstrap.err <- apply(bootstrap.err.b, 2, mean) # select the degree with smallest cross-validation error which.min(bootstrap.err) # plot plot(0:6, bootstrap.err[1:7], type = "b", xlab = "Model complexity", pch = 16, ylab = "Error", cex = 2, lwd = 3) # 0.632 bootstrap bootstrap.632.err <- 0.368*error.train[1:7] + 0.632*bootstrap.err plot(0:6, bootstrap.632.err[1:7], type = "b", xlab = "Model complexity", pch = 16, ylab = "Error", cex = 2, lwd = 3) ### INFORMATION CRITERIA ### # figure 3.10 p <- 1:(23) n <- 2*nrow(dataset) AIC <- n * log(Error / n) + 2 * p BIC <- n * log(Error / n)+ log(n) * p plot(c(1, max(p)), c(min(AIC, BIC), max(AIC, BIC)), type = "n", xlab = "Model complexity", ylab = "AIC and BIC criterion") lines(AIC, col = 2, type = "b", pch = 16, cex = 2) lines(BIC, col = 4, type = "b", pch = 18, cex = 2) #legend('bottomright', legend = c("AIC", "AIC.c", "BIC"), lty =1, # col = 2:4, cex = 1) # figure 3.11 x <- dataset[ , 1] y <- as.vector(data.matrix(dataset[, 2:3])) xx <- c(x, x) n <- length(xx) n0 <- n / 2 plot(xx, y, type = "n", cex = 2, xlab = "x") points(xx[1:n0], y[1:n0], pch = 16, col = 2, cex = 2) points(xx[n0+1:n], y[n0+1:n], pch = 17, col = 4, cex = 2) y.hat <- fitted(lm(y ~ poly(xx,4))) lines(xx[1:n0], y.hat[1:n0]) legend(2.3, 0.47, legend = c("Training set", "Test set"), pch = 16:17, col = c(2, 4)) detach(dataset) auto <- read.table("http://azzalini.stat.unipd.it/Book-DM/auto.dat", header = TRUE) summary(auto) attach(auto) # backward elimination full.model <- lm(city.distance ~ fuel + aspiration + bodystyle + drive.wheels + engine.location + wheel.base + length + width + height + curb.weight + engine.size + compression.ratio + HP + peak.rot + n.cylinders) AIC(full.model) summary(full.model) # remove witdh mod.1 <- lm(city.distance ~ fuel + aspiration + bodystyle + drive.wheels + engine.location + wheel.base + length + height + curb.weight + engine.size + compression.ratio + HP + peak.rot + n.cylinders) AIC(mod.1) summary(mod.1) mod.2 <- lm(city.distance ~ fuel + aspiration + drive.wheels + engine.location + wheel.base + length + height + curb.weight + engine.size + compression.ratio + HP + peak.rot + n.cylinders) # remove body.style AIC(mod.2) summary(mod.2) mod.3 <- lm(city.distance ~ fuel + aspiration + engine.location + wheel.base + length + height + curb.weight + engine.size + compression.ratio + HP + peak.rot + n.cylinders) AIC(mod.3) summary(mod.3) # remove drive.wheels mod.4 <- lm(city.distance ~ fuel + aspiration + engine.location + wheel.base + length + height + curb.weight + engine.size + compression.ratio + HP + peak.rot) AIC(mod.4) summary(mod.4) # remove engine.location mod.5 <- lm(city.distance ~ fuel + aspiration + wheel.base + length + height + curb.weight + engine.size + compression.ratio + HP + peak.rot) AIC(mod.5) # using AIC as stopping criterion, we keep mod.4 summary(mod.5) # same with the function stepAIC of the package 'MASS' library(MASS) model.backward <- stepAIC(full.model, direction = "backward") extractAIC(full.model) extractAIC(mod.1) extractAIC(mod.2) extractAIC(mod.3) extractAIC(mod.4) extractAIC(mod.5) # forward selection null.model <- lm(city.distance ~ 1) summary(null.model) add1(null.model, full.model) # HP leads to the best improvement in AIC mod.a <- lm(city.distance ~ HP) summary(mod.a) add1(mod.a, full.model) # length leads to the best improvement in AIC # and so on # with stepAIC function model.forward <- stepAIC(object = null.model, scope = full.model$call, direction = "forward") summary(model.forward) library(pls) X <- auto[ , c('wheel.base', 'length', 'width', 'height', 'curb.weight', 'engine.size', 'compression.ratio', 'HP', 'peak.rot')] pc <- prcomp(X, center = TRUE, scale = TRUE) summary(pc) plot(pc$x[,1:2], cex = 2, pch = 16) points(pc$x[auto$brand == 'mercedes-gas', 1:2], col = 2, pch = 16, cex = 2) model.pcr <- pcr(city.distance ~ ., data = as.data.frame(X), validation = 'CV', scale = TRUE) summary(model.pcr) model.pc <- lm(city.distance ~ pc$x[ , 1] + pc$x[ , 2] + pc$x[ , 3]) summary(model.pc) mean((city.distance - predict(model.pcr, newdata = X, ncomp = 3))^2) mean((city.distance - predict(model.pc, newdata = X))^2)