library(kknn) auto <- read.table("http://azzalini.stat.unipd.it/Book-DM/auto.dat", header = TRUE) attach(auto) # code based on the file "Code regarding section 4.2" (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa) y <- city.distance x <- engine.size # figure 4.3a from Azzalini & Scarpa (2012) aaa <- kknn(y ~ x, train = data.frame(x = x, y = y), test = data.frame(x = x, y = y), k = 10) plot(x, y, xlab = "Engine size (L)", ylab = "City distance (km/L)", pch = 16, cex = 1.5) lines(sort(x), aaa$fitted.values[order(x)], col = 2, lwd = 2) # figure 4.3b from Azzalini & Scarpa (2012) bbb <- kknn(y ~ x, train = data.frame(x = x, y = y), test = data.frame(x = x, y = y), k = 60) lines(sort(x), bbb$fitted.values[order(x)], col = 3, lwd = 2) # curse of dimensionality dist <- function(n, p) (1 - 0.5^(1/n))^(1/p) P <- 10 N <- 10 distance <- matrix(NA, ncol = P, nrow = N) for (i in 1:N) for (j in 1:P) distance[i, j] <- dist(i, j) plot(distance[, 1], xlab = 'number of observations', ylab = 'distance', main = '1 dimension') plot(distance[1, ], xlab = 'number of dimensions', ylab = 'distance', main = '1 observation') heatmap(t(distance), Rowv = NA, Colv = NA, scale = 'none', xlab = 'observations', ylab = 'dimensions')