library(rpart)

##### Exercise 10.4 #####

### Point (a) ###

myAdaBoost <- function(X, y, M)
{
  # Step 1: Initialization
  N <- nrow(X)
  f <- list()
  w <- rep(1/N, N)
  b <- matrix(NA, ncol = N, nrow = M)
  alpha <- rep(NA, M)
  X <- as.data.frame(X)
  
  # Step 2: Boosting iterations
  for (m in 1:M)
  {
    f[[m]] <- rpart(as.factor(y) ~ ., data = X, weights = w,
                    control = rpart.control(maxdepth = 1))
    # transforming back to -1/1 numeric values
    b[m, ] <- 2*(as.numeric(predict(f[[m]], newdata = X, type = 'class')) - 1.5)
    err_m <- (w %*% (b[m, ] != y)) / sum(w)
    alpha[m] <- log((1 - err_m) / err_m)
    w <- w * exp(alpha[m] * as.numeric(b[m, ] != y))
#    w <- w * exp(alpha[m] * as.numeric(b[m, ] != y) - alpha[m]/2) # if one want to take into account constant
    w <- w / sum(w)
    print(m)
  }

  # Step 3: Predictions of final classifier
  pred_train <- sign(colSums(matrix(rep(alpha, N), ncol = N) * b))
  return(list(prediction_train = pred_train, alpha = alpha, b = b, f = f))
}


### Point (b) ###

M <- 400

# Generate training data
set.seed(1)
N_train <- 2000
p <- 10
X_train <- matrix(rnorm(N_train * p), ncol = p)
y_train <- apply(X_train^2, 1, sum)
y_train[y_train <= qchisq(p = 0.5, df = 10)] <- -1
y_train[y_train > qchisq(p = 0.5, df = 10)] <- 1
table(y_train)

# Generate test data
N_test <- 10000
X_test <- matrix(rnorm(N_test * p), ncol = p)
y_test <- apply(X_test^2, 1, sum)
y_test[y_test <= qchisq(p = 0.5, df = 10)] <- -1
y_test[y_test > qchisq(p = 0.5, df = 10)] <- 1
table(y_test)

# Fit the algorithm
model <- myAdaBoost(X_train, y_train, M = M)

# Training error at each iteration
train_error <- rep(NA, M) # vector("numeric", M)
for (i in 1:M)
{
  print(i)
  yhat <- sign(colSums(matrix(rep(model$alpha[1:i], N_train), ncol = N_train) * model$b[1:i, ]))
  train_error[i] <- mean(yhat != y_train)
}
plot(1:M, train_error, type = 'l', col = "blue", ylab = "error", xlab = "Boosting iteration")

# Test error at each iteration
b_test <- t(sapply(model$f,
                   function(x)
                     2*(as.numeric(predict(x, newdata = as.data.frame(X_test), type = 'class')) - 1.5)))
test_error <- rep(NA, M) # vector("numeric", M)
for (i in 1:M)
{
  print(i)
  yhat <- sign(colSums(matrix(rep(model$alpha[1:i], N_test), ncol = N_test) * b_test[1:i, ]))
  test_error[i] <- mean(yhat != y_test)
}
lines(1:M, test_error, col = "red")
legend(x = "topright", legend = c("Training error", "Test error"),
       col = c("blue", "red"), lty = c(1,1), cex = 1)


### Point(c) ###

# Rerun point (b) with M=1000, but still no overfitting
M <- 1000

### Point (d) ###

M <- 400

# Generate training data
set.seed(1)
N_train <- 2000
p <- 10
y_train <- c(rep(1, 1000), rep(-1, 1000))
X_train <- matrix(NaN, nrow = N_train, ncol = p)
X_train[1:1000, ] <- rnorm(n = N_train * p / 2)
for (i in 1001:2000)
{
  cont <- TRUE
  while(cont)
  {
    tmp <- rnorm(n = 10)
    if (sum(tmp^2) > 12)
    {
      X_train[i,] <- tmp
      cont <- FALSE
    }
  }
}

# Generate test data
N_test <- 10000
p <- 10
y_test <- c(rep(1, 5000), rep(-1, 5000))
X_test <- matrix(NaN, nrow = N_test, ncol = p)
X_test[1:5000, ] <- rnorm(n = N_test * p / 2)
for (i in 5001:10000)
{
  cont <- TRUE
  while(cont)
  {
    tmp <- rnorm(n = 10)
    if (sum(tmp^2) > 12)
    {
      X_test[i, ] <- tmp
      cont <- FALSE
    }
  }
}

# Fit the algorithm
model <- myAdaBoost(X_train, y_train, M = M)

# Training error at each iteration
train_error <- vector("numeric", M)
for (i in 1:M)
{
  print(i)
  yhat <- sign(colSums(matrix(rep(model$alpha[1:i], N_train), ncol = N_train) * model$b[1:i, ]))
  train_error[i] <- mean(yhat != y_train)
}
plot(1:M, train_error, type = 'l', col = "blue", ylab = "error", xlab = "Boosting iteration")

# Test error at each iteration
b_test <- t(sapply(model$f,
                   function(x)
                     2*(as.numeric(predict(x, newdata = as.data.frame(X_test), type = 'class')) - 1.5)))
test_error <- vector("numeric", M)
for (i in 1:M)
{
  print(i)
  yhat <- sign(colSums(matrix(rep(model$alpha[1:i], N_test), ncol = N_test) * b_test[1:i, ]))
  test_error[i] <- mean(yhat != y_test)
}
lines(1:M, test_error, col = "red")
legend(x = "topright", legend = c("Training error", "Test error"),
       col = c("blue", "red"), lty = c(1, 1), cex = 1)