# Ex 3.2 simulation experiment
# Author: Vera Kvisgaard
# Date: 23.02.25 Vera Kvisgaard
# Based on Lars' script from 2023 /studier/emner/matnat/math/STK2100/v23/Exercise%20solutions/esl_ex_3_2.r

# Prep ----
# Set seed for reproducibility
set.seed(3)

# Set the significance level.
alpha = 0.05
level = 1 - alpha

# The number of x values to consider
N = 100

# The standard deviation of the noise
sigma_noise = 1

# Generate data ----
# Draw x from a uniform distribution
x = runif(N, min = -2, max = 2)

# Define the true beta values
beta = matrix(c(1, -1.5, -0.5, 1), nrow = 4, ncol = 1)

# Generate the design matrix
X = matrix(c(x^0, x, x^2, x^3), nrow = N, ncol = 4)
colnames(X) <- c("X0", "X1", "X2", "X3")

# Compute true expected value of y for each value of x
mu = X %*% beta

# Draw the response, y
noise = rnorm(N, mean = 0, sd = sigma_noise)
y = mu + noise

# Plot data
plot(x, y, col = "red", pch = 1)

# Compare with true means
points(x, mu, pch = 16)
legend("bottomright",
       legend = c("f(x)", "Data"),
       col = c(1,2),
       pch = c(16, 1),
       cex = 1.1)

# Fit linear regression model ----
## Using lm() ----
## Use lm() to estimate coefficients
lm_data = data.frame(y = y, X)
lm_model = lm(y~.-1, data = lm_data) #-1 as we do not want an additional intercept
summary(lm_model)

# Can compare the true coefficients with the estimated ones
rbind(true = beta[, 1],
      estimated = lm_model$coefficients) # Reasonably good

## "By hand" ----
## Estimate coefficients with base-R matrix functionality
help(solve)
help("%*%")
XTX    <- t(X) %*% X               # going to reuse this one
XTXinv <- solve(XTX)               # .. and this one
beta_hat = XTXinv %*% t(X) %*% y

# Compare with what lm function got.
rbind(summary(lm_model)$coeff[,"Estimate"], t(beta_hat))

# Estimate sigma
p = nrow(beta) - 1 # Number of explanatory variables (intercept does not count)
sigma_hat = sqrt(sum(lm_model$residuals^2)/(N-p-1))

# Compare with what lm function got.
rbind(summary(lm_model)$sigma, sigma_hat)

# Construct confidence bands ----

# First, define a grid of x0 values
x0 = seq(-2, 2, by = .1)
X0 = matrix(c(x0^0, x0, x0^2, x0^3), nrow = length(x0), ncol = 4)
colnames(X0) <- c("X0", "X1", "X2", "X3")

## Approach 1: Point-wise confidence intervals ----
## Construct confidence bands for f(x) by computing confidence intervals for each
## value of x0 separately (point-wise)

### Use model object and predict.lm() function ----
help(predict.lm)
res_lm <- predict(lm_model, newdata = data.frame(X0), interval = "confidence")
head(res_lm)
plot(x0, res_lm[, "fit"], type = "l")
lines(x0, res_lm[, "lwr"], col = "red", lty = "dashed")
lines(x0, res_lm[, "upr"], col = "red", lty = "dashed")
legend("bottomright",
       legend = c("Fitted curve", "Point-wise CI"),
       col = c("black", "red"),
       lty = c("solid", "dashed"))

### "By-hand" ----
# get percentile in t-distribution
t <- qt(1-alpha/2, N-p-1)

# init matrix for storing results
res <- matrix(NA, nrow = nrow(X0), ncol = 3)
colnames(res) <- c("fit", "lwr", "upr")

# iterate over each x0-value and compute confidence interval
for (i in seq_along(x0)) {

  # compute point-estimate
  res[i, "fit"] <- X0[i, ]%*%beta_hat

  # compute standard error of point-estimate at current x0-value
  s <- sigma_hat*sqrt((X0[i, , drop = FALSE]%*%XTXinv)%*%t(X0[i, , drop = FALSE]))

  # compute confidence interval
  res[i, "lwr"] <- res[i, "fit"]-t*s
  res[i, "upr"] <- res[i, "fit"]+t*s
}

head(res)

# compare with lm() result
max(abs(res_lm-res))


## Approach 2 using equation 3.15 ----------------------------------------------
## Construct confidence bands for f(x) by through the confidence set Cbeta defined
## in 3.15. This *confidence set* for the parameters corresponds to a confidence
## set for f(x) for *all* values of x.
## The confidence set Cbeta involves a quadtratic form in (beta_hat-beta).
## Here, we explore the set by sampling a large set of beta vectors and check
## wheter or not each candidate belongs to the confidence set.

# Define a function that checks if a coefficient vector is in the confidence set
is_in_confidence_set <- function(beta, X, sigma_hat){
  # input:
  # beta:  a p-length vector
  # X: the N x p design matrix
  # sigma_hat: the estimated residual standard error
  # output:
  # TRUE if beta is in the confidence set, FALSE otherwise

  # compute value of quadtratic form at beta
  XTX <- t(X)%*%X
  q <-  t(beta_hat - beta) %*% XTX %*% (beta_hat - beta)

  # check if inequality holds
  q <= sigma_hat^2 * qchisq(level, df = ncol(X))
}

# test
is_in_confidence_set(beta_hat, X, sigma_hat)

# Generate a sample of beta-vectors centred in beta_hat
n_beta_sample = 10**4

Cbeta <- Cf <- list()
for (i in seq_len(n_beta_sample)) {

  # sample a coefficient vector
  beta_sample <- MASS::mvrnorm(1, beta_hat, sigma_hat*XTXinv)

  # check if sampled vector is in set
  tmp  <- is_in_confidence_set(beta_sample, X, sigma_hat)
  cat("Iteration", i, ": beta is in confidence set", tmp, "\n")

  if (tmp) {
    # if the sampled beta vector is in the confidence set..

    j <- length(Cbeta)

    # add beta to confidence set for beta
    Cbeta[[j+1]] <- beta_sample

    # add the corresponding value of f(x) at each value of x0 to confidence set for f
    Cf[[j+1]] <- X0%*%beta_sample
  }
}

# Compute acceptance rate
acceptance_rate <- length(Cbeta)/n_beta_sample
cat("Of the", n_beta_sample, "sampled vectors, the proportion of vectors in Cbeta is", acceptance_rate, ".\n")

# Plot confidence set of f(x)
Cf_mat <- do.call("cbind", Cf)    # store in a matrix
matplot(x0, Cf_mat, type = "l", col = "grey", ylab = "f(x)")

# add fitted curve and point-wise confidence intervals (CI) for comparison
lines(x0, res_lm[, "fit"], type = "l")
lines(x0, res_lm[, "lwr"], col = "red", lty = "dashed")
lines(x0, res_lm[, "upr"], col = "red", lty = "dashed")
legend("bottomright",
       legend = c("Fitted curve", "Point-wise CI", "Confidence set"),
       col = c("black", "red", "grey"),
       lty = c("solid", "dashed", "blank"),
       pch = c(-1, -1, 15))

# People who discuss the exercise or related stuff
# https://waxworksmath.com/Authors/G_M/Hastie/WriteUp/Weatherwax_Epstein_Hastie_Solution_Manual.pdf (p 33, Do not understand how they generate the lines from the CS)
# https://stats.stackexchange.com/questions/18322/confidence-set-for-parameter-vector-in-linear-regression
# https://math.stackexchange.com/questions/3331735/on-confidence-sets-and-linear-regression
# https://pages.stat.wisc.edu/~st849-1/lectures/Ch05.pdf (5.9)
# https://www.stat.cmu.edu/~cshalizi/mreg/15/lectures/18/lecture-18.pdf (section 3) (thorough)
# https://getd.libs.uga.edu/pdfs/ma_james_c_201412_ms.p (pp 44, use wrong df?)