# code based on the file "Code regarding section 5.2 (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa)"

# function to procude the ROC curve
lift.roc <- function(previsti, g, type = "bin", plot.it = TRUE)
{
  library(sm)
  if(!is.numeric(g)) stop("g not numeric")
  ind <- rev(order(previsti))
  n <- length(g)
  x1 <- (1:n)/n
  x2 <- cumsum(g[ind]) / (mean(g) * (1:n))
  if(type == "crude" & plot.it) 
    plot(x1, x2, type = "l", col = 2,
         xlab = "fraction of predicted subjects", ylab = "lift")
  if(type == "sm") {
    a<- sm.regression(x1, x2, h = 0.1, display = "none")
    if(plot.it)
      plot(a$eval, a$estimate, type = "l", xlim=c(0,1), col = 2,
           xlab = "fraction of predicted subjects", ylab = "lift")
  }
  if(type == "bin") {
    b <-  binning(x1, x2, breaks = (-0.001:10) / 9.999)
    x <- c(0, seq(0.05, 0.95, by = 0.1), 1)
    if (plot.it) plot(x, c(x2[1], b$means, 1), type = "b", xlim = c(0, 1),
                      ylim = c(1, max(x2)), cex = 0.75, col = 2,
                      xlab = "fraction of predicted subjects",
                      ylab = "improvement factor")
    x1 <- x
    x2 <- c(x2[1], b$means,1)
  }
  if(plot.it) pause("press enter")
  u1 <- cumsum(1 - g[ind]) / sum(1 - g)
  u2 <- cumsum(g[ind]) / sum(g)
  if(type == "crude" & plot.it)
    plot(u1, u2, type = "l", xlim = c(0, 1), ylim = c(0, 1), col = 2,
         xlab = "1-specificity", ylab = "sensibility")
  if(type == "sm"){
    # browser()
    eps <- 0.00001
    a <- sm.regression(u1, log((u2 + eps) / (1 - u2 + 2 * eps)), h = 0.1, display = "none")
    q <- exp(a$estimate) / (1 + exp(a$estimate))
    if(plot.it) plot(a$eval, q, type = "l", xlim = c(0, 1), ylim = c(0, 1),
                     xlab = "1-specificity", ylab = "sensibility", col = 2)
  }
  if(type == "bin"){
    b <- binning(u1, u2, breaks = (-0.001:10) / 9.999)
    x <- c(0, seq(0.05, 0.95, by = 0.1),1)
    y<- c(0, b$means,1)
    if(plot.it)
      plot(x, y, type = "b", xlim = c(0,1),
           ylim = c(0, 1),cex = 0.75, xlab = "1-specificity",
           ylab = "sensibility", col = 2)
    u1 <- x
    u2 <- y
  }                      
  if(plot.it) {
    abline(0, 1, lty = 2, col = 3)
  }
  invisible(list(x1, x2, u1, u2))
}

# function to compute the confusion matrix
confusion.matrix <- function(predicted, observed){
  a <-  table(predicted, observed)
  err.tot <- 1 - sum(diag(a)) / sum(a)
  fn <- a[1, 2] / (a[1, 2] + a[2, 2])
  fp <- a[2, 1] / (a[1, 1] + a[2, 1])
  print(a)
  cat("total error: ", format(err.tot),"\n")
  cat("false positives/negatives: ",format(c(fp, fn)),"\n")
  invisible(a)
}


# read data and select the variables used in the book
juice <- read.table("http://azzalini.stat.unipd.it/Book-DM/juice.data", header = TRUE) 
juice[ , "store"] <- factor(juice[ , "store"])
v <- c(1, 3, 6:9, 13, 21)
juice <- juice[ , v]

# select training and test
set.seed(123)
n <- nrow(juice)
n1 <- round(n * 0.75)
n2 <- n - n1
permuta <- sample(1:n, n)
training <- sort(permuta[1:n1])
juice1 <- juice[training, ] # training set
juice2 <- juice[-training, ] # test set
attach(juice1)

# figure 5.1a-f
par(mfrow = c(2, 3))
K <- length(v)
for(k in 2:(K-1))
  boxplot(juice1[ , k] ~ choice, col = 5, ylab = names(juice1)[k])
par(mfrow = c(1, 1))

# figure 5.1g
freq <- table(choice, store)
freq <- 100 * freq / outer(rep(1, 2), apply(freq, 2, sum))
barplot(freq[2, ], width = 1, space = 1.25, col = 5, ylim = c(0, 100),
        ylab = "Percentage of choosing MM", xlab = "Shop")
abline(h = 100 * 318 / nrow(juice1), lty = 3, col = 2)

f1 <- as.formula(paste("as.factor(choice) ~ ", paste(names(juice1)[-1], collapse = " + "),
                       collapse = NULL))
m1 <- glm(f1, data = juice1, family = binomial(link = "logit"))
m2 <- update(m1, . ~ . - week, data = juice1)

# test error
p2 <- predict(m2, newdata = juice2, type = "response")
# table 5.2
confusion.matrix(p2 > 0.5, juice2[ , 1])
addmargins(confusion.matrix(p2 > 0.5, juice2[ , 1]))

# figure 5.2a
g <- as.numeric(juice2[, 1] == "MM")
a <- lift.roc(p2, g, type = "crude", plot.it = FALSE)
p0 <- 0.5
plot(a[[3]], a[[4]], type = "l", xlab = "1-specificity", ylab = "Sensibility")
abline(0, 1, col = 4, lty = 3)
i0 <- order(abs(p2 - p0))[1]
q0 <- a[[3]][sum(p2 > p2[i0])]
abline(v = q0, col = 3, lty = 3)
text(q0, 0, paste("Threshold=",format(p0), sep = "", collapse = NULL))

# figure 5.2b
a <- lift.roc(p2, g, type = "bin", plot.it = FALSE)
plot(a[[3]], a[[4]], type = "b", xlab = "1-specificity", ylab = "Sensibility")
abline(0, 1, col = 4, lty = 3)
i0 <- order(abs(p2 - p0))[1]
q0 <- a[[3]][sum(p2 > p2[i0])]
abline(v = q0, col = 3, lty = 3)
text(q0, 0, paste("Threshold=", format(p0), sep = "", collapse = NULL))

detach(juice1)


#########################################


dataset <- read.table("http://azzalini.stat.unipd.it/Book-DM/classes.dat",
                      header = TRUE, nrows = 200)
x1 <- dataset[,1]
x2 <- dataset[,2]
gr <- (2 - dataset[ , 3]) # the groups are indicated by  0 and 1
K <- 2
n <- nrow(dataset)

# figure 5.8a
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:K)
{
  g <- (dataset[ , 3] == k)
  points(dataset[g, 1:2], pch = 15 + k, col = 1 + k, cex = 1.5)
}

m1 <- lm(gr ~ x1 + x2)
beta <- coef(m1)
# we look for the line such that:  b0 + b1*x1 + b2*x2 = 0.5
a0 <- (0.5 - beta[1])/beta[3]
a1<- -beta[2] / beta[3]
abline(a0, a1, lty = 2, lwd = 2)

m2 <- lm(gr ~ x1 + x2 + I(x1^2) + I(x1*x2) + I(x2^2))
summary(m2) # coeff of I(x1^2) not significant
m2 <- lm(gr ~ x1 + x2 + I(x1*x2) + I(x2^2))
beta <- c(coef(m2)[1:3], 0, coef(m2)[4:5])


# figure 5.8b
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:K)
{
  g <- (dataset[ , 3] == k)
  points(dataset[g, 1:2], pch = 15 + k, col = 1 + k, cex = 1.5)
}
aq <- beta[6]
x.seq <- seq(min(x1), max(x1), length = 500)
x0 <- y1 <- y2 <- numeric(0)
for(x in x.seq)
{
  bq <- beta[5] * x +  beta[3]
  cq <- beta[1] + beta[2] * x + beta[4] * x^2 - 0.5
  delta2 <- bq^2 - 4 * aq * cq
  if(delta2 >= 0)
  {
    x0 <- c(x0, x)
    y1 <- c(y1, (-bq + sqrt(delta2)) / (2 * aq))
    y2 <- c(y2, (-bq - sqrt(delta2)) / (2 * aq))
  }
}
lines(x0, y1, lwd = 2)
lines(x0, y2, lwd = 2)
lines(rep(x0[1], 2), c(y1[1], y2[1]), lwd = 2)