### logistic regression ###
bank <- read.csv("http://azzalini.stat.unipd.it/Book-DM/brazil.csv", header = TRUE)
summary(bank)
y <- ifelse(bank$satisfaction < 2.5, 'low', 'high')
x <- ifelse(bank$age <= 45, 'young', 'old')
table(x, y)

# transform y in a 0-1 variable
y <- ifelse(bank$satisfaction < 2.5, 0, 1)

# fit the logistic model
mod <- glm(y ~ x, family = 'binomial')
summary(mod)
beta <- mod$coefficients

# recover the probability of high satisfaction
pi.old = exp(beta[1]) / (1 + exp(beta[1]))
pi.young = exp(beta[1] + beta[2]) / (1 + exp(beta[1] + beta[2]))

# compute manually coefficients
pi.old.hat <- sum(y[x == 'old']) / sum(x == 'old')
pi.young.hat <- sum(y[x == 'young']) / sum(x == 'young')
# odds
odd.old <- pi.old.hat / (1 - pi.old.hat)
log(odd.old) # baseline odds in the model
odd.young <- pi.young.hat / (1 - pi.young.hat)
log(odd.young)
# log odds ratio
log(odd.young / odd.old)

# fit the model with age as a continuous variable
mod_tab25up <- glm(y ~ bank$age + I(bank$age^2), family = 'binomial')
summary(mod_tab25up)

mod_tab25down <- glm(y ~ bank$age, family = 'binomial')
summary(mod_tab25down)

# difference in deviance
diff_deviance <- mod_tab25down$deviance - mod_tab25up$deviance
1 - pchisq(q = diff_deviance, df = 4 - 3) # not significative at level 0.05


# code based on files "Code regarding section 4.2" (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa),
# "Code regarding section 5.4" (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa),
# and "Code regarding section 5.5" (copyright 2003, 2004, 2012 A.Azzalini and B.Scarpa)

pred.square <- function(model, x1, x2, grid = 50, Z.flag = FALSE)
{
  u <- pretty(x1, n = 10)
  x <- seq(u[1], u[10], length = grid)
  u <- pretty(x2, n = 10)
  y <- seq(u[1], u[10], length = grid)
  nx <- length(x)
  ny <- length(y)
  xoy <- cbind(rep(x, ny), as.vector(matrix(y, nx, ny, byrow = TRUE)))
  X <- matrix(xoy, nx * ny, 2, byrow = FALSE)
  if(Z.flag) pred <- predict(model, newdata = data.frame(z1 = X[ , 1], z2 = X[ , 2]))
  else  pred <- predict(model, newdata = data.frame(x1 = X[ , 1], x2 = X[ , 2]))
  list(pred = pred, x = x, y = y, X = X)
}

# function to procude the ROC curve
lift.roc <- function(previsti, g, type = "bin", plot.it = TRUE)
{
  library(sm)
  if(!is.numeric(g)) stop("g not numeric")
  ind <- rev(order(previsti))
  n <- length(g)
  x1 <- (1:n)/n
  x2 <- cumsum(g[ind]) / (mean(g) * (1:n))
  if(type == "crude" & plot.it) 
    plot(x1, x2, type = "l", col = 2,
         xlab = "fraction of predicted subjects", ylab = "lift")
  if(type == "sm") {
    a<- sm.regression(x1, x2, h = 0.1, display = "none")
    if(plot.it)
      plot(a$eval, a$estimate, type = "l", xlim=c(0,1), col = 2,
           xlab = "fraction of predicted subjects", ylab = "lift")
  }
  if(type == "bin") {
    b <-  binning(x1, x2, breaks = (-0.001:10) / 9.999)
    x <- c(0, seq(0.05, 0.95, by = 0.1), 1)
    if (plot.it) plot(x, c(x2[1], b$means, 1), type = "b", xlim = c(0, 1),
                      ylim = c(1, max(x2)), cex = 0.75, col = 2,
                      xlab = "fraction of predicted subjects",
                      ylab = "improvement factor")
    x1 <- x
    x2 <- c(x2[1], b$means,1)
  }
  if(plot.it) pause("press enter")
  u1 <- cumsum(1 - g[ind]) / sum(1 - g)
  u2 <- cumsum(g[ind]) / sum(g)
  if(type == "crude" & plot.it)
    plot(u1, u2, type = "l", xlim = c(0, 1), ylim = c(0, 1), col = 2,
         xlab = "1-specificity", ylab = "sensibility")
  if(type == "sm"){
    # browser()
    eps <- 0.00001
    a <- sm.regression(u1, log((u2 + eps) / (1 - u2 + 2 * eps)), h = 0.1, display = "none")
    q <- exp(a$estimate) / (1 + exp(a$estimate))
    if(plot.it) plot(a$eval, q, type = "l", xlim = c(0, 1), ylim = c(0, 1),
                     xlab = "1-specificity", ylab = "sensibility", col = 2)
  }
  if(type == "bin"){
    b <- binning(u1, u2, breaks = (-0.001:10) / 9.999)
    x <- c(0, seq(0.05, 0.95, by = 0.1),1)
    y<- c(0, b$means,1)
    if(plot.it)
      plot(x, y, type = "b", xlim = c(0,1),
           ylim = c(0, 1),cex = 0.75, xlab = "1-specificity",
           ylab = "sensibility", col = 2)
    u1 <- x
    u2 <- y
  }                      
  if(plot.it) {
    abline(0, 1, lty = 2, col = 3)
  }
  invisible(list(x1, x2, u1, u2))
}


# function to compute the confusion matrix
confusion.matrix <- function(predicted, observed){
  a <-  table(predicted, observed)
  err.tot <- 1 - sum(diag(a)) / sum(a)
  fn <- a[1, 2] / (a[1, 2] + a[2, 2])
  fp <- a[2, 1] / (a[1, 1] + a[2, 1])
  print(a)
  cat("total error: ", format(err.tot),"\n")
  cat("false positives/negatives: ",format(c(fp, fn)),"\n")
  invisible(a)
}

dataset <- read.table("http://azzalini.stat.unipd.it/Book-DM/classes.dat",
                      header = TRUE, nrows = 200)
x1 <- dataset[,1]
x2 <- dataset[,2]
gr <- (2 - dataset[ , 3]) # the groups are indicated by  0 and 1
K <- 2
n <- nrow(dataset)

# figure 5.8a
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:K)
{
  g <- (dataset[ , 3] == k)
  points(dataset[g, 1:2], pch = 15 + k, col = 1 + k, cex = 1.5)
}

m1 <- lm(gr ~ x1 + x2)
beta <- coef(m1)
# we look for the line such that:  b0 + b1*x1 + b2*x2 = 0.5
a0 <- (0.5 - beta[1])/beta[3]
a1<- -beta[2] / beta[3]
abline(a0, a1, lty = 2, lwd = 2)

m2 <- lm(gr ~ x1 + x2 + I(x1^2) + I(x1*x2) + I(x2^2))
summary(m2) # coeff of I(x1^2) not significant
m2 <- lm(gr ~ x1 + x2 + I(x1*x2) + I(x2^2))
beta <- c(coef(m2)[1:3], 0, coef(m2)[4:5])


# figure 5.8b
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:K)
{
  g <- (dataset[ , 3] == k)
  points(dataset[g, 1:2], pch = 15 + k, col = 1 + k, cex = 1.5)
}
aq <- beta[6]
x.seq <- seq(min(x1), max(x1), length = 500)
x0 <- y1 <- y2 <- numeric(0)
for(x in x.seq)
{
  bq <- beta[5] * x +  beta[3]
  cq <- beta[1] + beta[2] * x + beta[4] * x^2 - 0.5
  delta2 <- bq^2 - 4 * aq * cq
  if(delta2 >= 0)
  {
    x0 <- c(x0, x)
    y1 <- c(y1, (-bq + sqrt(delta2)) / (2 * aq))
    y2 <- c(y2, (-bq - sqrt(delta2)) / (2 * aq))
  }
}
lines(x0, y1, lwd = 2)
lines(x0, y2, lwd = 2)
lines(rep(x0[1], 2), c(y1[1], y2[1]), lwd = 2)


#################

juice <- read.table("http://azzalini.stat.unipd.it/Book-DM/juice.data", header = TRUE) 
juice[ , "store"] <- factor(juice[ , "store"])
v <- c(1, 3, 6:9, 13, 21)
juice <- juice[ , v]
#--select training and test
set.seed(123)
n <- nrow(juice)
n1 <- round(n * 0.75)
n2 <- n - n1
permuta <- sample(1:n, n)
training <- sort(permuta[1:n1])
juice1 <- juice[training, ]
juice2 <- juice[-training, ]

f0 <- as.formula(paste("as.numeric(as.factor(choice))-1 ~",
                       paste(names(juice1)[-1], collapse = "+"),
                       collapse = NULL))
lm1 <- lm(f0, data = juice1)
lm2 <- update(lm1, "~.-week")
p3 <- predict(lm2, newdata = juice2)

m1 <- glm(f0, data = juice1, family = binomial)
m2 <- update(m1, "~.-week")
p2 <- predict(m2, newdata = juice2, type = "response")

# figure 5.9
plot(log(p2 / (1 - p2)), p3,
     xlab = "Logit(probability) predicted by the logistic model",
     ylab = "Predicted values with the linear model", cex = 1.5)
abline(h = 0.5, col = 2, lty = 2)
abline(v = 0, col = 2, lty = 2)

# use again class.dat
x1 <- dataset[ , 1]
x2 <- dataset[ , 2]
gr <- (2 - dataset[ , 3])
K <- 3
n <- nrow(dataset)
x1 <- z1 <- dataset[ , 1]
x2 <- z2 <- dataset[ , 2]
gr <- dataset[ , 3]
group <- factor(gr)
n.grid <- 800

Y <- model.matrix( ~ group - 1)
m1 <- lm(Y ~ x1 + x2)
p <- pred.square(m1, x1, x2, n.grid)
pred <- array(p$pred, dim = c(n.grid, n.grid, 3))
ind <- apply(pred, c(1, 2), order)[3, , ] 

# figure 5.10a
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:K) points(dataset[gr == k, 1:2], pch = 15 + k, col= 1 + k)
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2, lty = 2)

m2 <- lm(Y ~ x1 + x2+ I(x1^2) + I(x1 * x2)+ I (x2^2))
p <- pred.square(m2, x1, x2, n.grid)
pred <- array(p$pred, dim = c(n.grid, n.grid, 3))
ind  <- apply(pred, c(1, 2), order)[3, , ] 

# figure 5.10b
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:3)
  points(dataset[gr == k, 1:2], pch = 15 + k, col = 1 + k)
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2, lty = 2)


dataset <- read.table("http://azzalini.stat.unipd.it/Book-DM/classes.dat", header = TRUE)
x1 <- dataset[ , 1]
x2 <- dataset[ , 2]
gr <- (2 - dataset[ , 3])
K  <- 3
n <- nrow(dataset)
x1 <- z1 <- dataset[ , 1]
x2 <- z2 <- dataset[ , 2]
gr <- dataset[,3]
group <- factor(gr)
n.grid <- 800

library(MASS)
d1 <- lda(group ~ x1 + x2)
p <- pred.square(d1, x1, x2, n.grid)
pred <- array(p$pred$posterior, c(n.grid, n.grid, 3))
ind <- apply(pred, c(1, 2), order)[3, , ] 

# figure 5.11a
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:3) points(dataset[gr == k, 1:2], pch = 15 + k, col = 1 + k)   
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2, lty = 2)

d2 <- lda(group ~ x1 + x2 + I(x1^2) + I(x1*x2) + I(x2^2))
p <- pred.square(d2, x1, x2, n.grid)
pred <- array(p$pred$posterior, c(n.grid, n.grid, 3))
ind <- apply(pred, c(1, 2), order)[3, , ] 

# figure 5.11b
plot(dataset[, 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:3) points(dataset[gr == k, 1:2], pch = 15 + k, col = 1 + k)   
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2, lty = 2)


### QDA ###

d3 <- qda(group ~ x1 + x2)
p <- pred.square(d3, x1, x2, n.grid)
pred <- array(p$pred$posterior, c(n.grid, n.grid, 3))
ind <- apply(pred, c(1, 2), order)[3, , ] 

# figure 5.12a
plot(dataset[, 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:3) points(dataset[gr == k, 1:2], pch = 15 + k, col = 1  + k)   
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2)

d4 <- qda(group ~ x1 + x2 + I(x1^2) + I(x1*x2) + I(x2^2))
p <- pred.square(d4, x1, x2, n.grid)
pred <- array(p$pred$posterior, c(n.grid, n.grid, 3))
ind <- apply(pred, c(1, 2), order)[3, , ] 

#  figure 5.12b
plot(dataset[ , 1:2], type = "n", xlab = expression(italic(x)[1]),
     ylab = expression(italic(x)[2]))
for(k in 1:3) points(dataset[gr == k, 1:2], pch = 15 + k, col = 1 + k)   
contour(p$x, p$y, ind, add = TRUE, drawlabels = FALSE, nlevels = 2, lty = 2)


f1 <- as.formula(paste("choice ~ ", paste(names(juice1)[-1], collapse = "+"),
                       collapse = NULL))
d1 <- lda(f1, data = juice1)
d2 <- qda(f1, data = juice1)
p1 <- predict(d1, newdata = juice2)
p2 <- predict(d2, newdata = juice2)

# table 5.7
confusion.matrix(p1$class, juice2[ , "choice"])
confusion.matrix(p2$class, juice2[ , "choice"])

g <- as.numeric(juice2[ , 1] == "MM")
lr1 <- lift.roc(p1$post[ , 2], g, type = "bin", plot.it = FALSE)
lr2 <- lift.roc(p2$post[ , 2], g, type = "bin", plot.it = FALSE)

# figure 5.13b
plot(lr1[[3]], lr1[[4]], type = "b", xlim = c(0,1), pch = 16,
     ylim = c(0, 1), cex = 1.5, xlab = "1-specificity",
     ylab = "Sensibility", col = 2)
lines(lr2[[3]], lr2[[4]], type = "b", cex = 1.5, col = 3, pch = 17)
abline(0, 1, lty = 2, col = 4)
legend('bottomright', legend = c("LDA", "QDA"), col = 2:3, pch = 16:17)