---
title:  'STK2100'
subtitle: |
  | Machine Learning and Statistical Methods for Prediction and Classification
  | Mandatory Assignment 2
author: "Lars H. B. Olsen"
date: "`r format(Sys.time(), '%d %B %Y')`"
output:
  pdf_document: default
fig_caption: true
urlcolor: blue
extra_dependencies:
  caption: labelfont={bf}
  asmath: null
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


# Exercise 1 

```{r Ex1, cache = TRUE}
# Read the spam data from the webpage
fil = "http://www.uio.no/studier/emner/matnat/math/STK2100/data/spam_data.txt"
spam = read.table(fil, header = T)
```


## 1a)

```{r Ex1a, cache = TRUE}
# Fit a standard logistic regression model to the training data. Include suppressWarnings
# to not get "Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred".
fit = suppressWarnings(
  glm(y ~ . - train, data = spam, subset = spam$train, family = binomial))

# Predict the response class of the test data
pred = predict(fit, spam[!spam$train, ], type = "response") > 0.5
```


## 1b)

```{r Ex1b_chunk1, cache = TRUE}
# Compute the principal components of the variables, which we scale to
# have variances equal to 1 before transformation.
x.prcomp = prcomp(spam[ , 1:57], scale = TRUE)

# Insert the principal components, response, and indicator
# for training data into a data frame.
d = data.frame(x.prcomp$x, y = spam$y, train = spam$train)
```

```{r Ex1b_chunk2, cache = TRUE}
# Fit a logistic model based on the first 2 principal components to the training data 
fit.pc = glm(y ~ . - train,
             data = d[ , c(1:2, 58, 59)],
             family = binomial,
             subset = d$train) 

# Predict the response class of the test data
pred.pc = predict(fit.pc,d[!d$train,], type = "response") > 0.5
```

## 1c)


## 1d)

```{r Ex1d, cache = TRUE}
par(mfrow = c(1, 3))
plot.ts(x.prcomp$rotation[ , 1],
        ylab = bquote("Weights of components in first principal component"),
        xlab = bquote("Component X"[j]),
        ylim = c(-0.25, 0.45))
plot.ts(x.prcomp$rotation[ , 2],
        ylab = bquote("Weights of components in second principal component"),
        xlab = bquote("Component X"[j]),
        ylim = c(-0.25, 0.45))
plot.ts(x.prcomp$rotation[ , 3],
        ylab = bquote("Weights of components in third principal component"),
        xlab = bquote("Component X"[j]),
        ylim = c(-0.25, 0.45))
```


## 1e)

```{r Ex1e, cache = TRUE}
# Import the necessary libraries
library(splines)
library(foreach)
library(gam)

# Fit a generalized additive model based on the first
# three explanatory variable to the training data.
fit.gam = gam(y ~ s(x1) + s(x2) + s(x3), data=spam, subset=spam$train, family = binomial)

# Predict the response class of the test data
pred.gam = predict(fit.gam, spam[!spam$train, ], type = "response") > 0.5

# Look at summary of the fitted GAM and the significance levels of nonparametric effects
summary(fit.gam)

# Plot the three estimated non-linear effect functions with and without upper and lower
# pointwise twice-standard-error curves for each plot and a univariate
# histogram (also called rugplot) along the base of each plot.
par(mfrow = c(2, 3))
plot(fit.gam, se = T)
plot(fit.gam, se = F)
```


## 1f)

```{r Ex1f, cache = TRUE}
# Fit a generalized additive model based on the first
# three principal components to the training data.
fit.pc.gam = gam(y ~ s(PC1) + s(PC2) + s(PC3),
                 data = d,
                 subset = d$train,
                 family = binomial)

# Look at summary of the fitted GAM and the significance levels of nonparametric effects
summary(fit.pc.gam)

# Predict the response class of the test data
pred.pc.gam = predict(fit.pc.gam, d[!d$train, ], type = "response") > 0.5

# Make same type of plots as above
par(mfrow = c(2, 3))
plot(fit.pc.gam, se = T)
plot(fit.pc.gam, se = F)
```


## 1g)
```{r Ex1g, cache = TRUE}
# Extract the names of the principal components
nam = names(d)[1:57]

# Set the number of principal components to include in the model
k = 20

# Create a formula with the given number of principal components
formula = as.formula(
  paste("y", paste(paste("s(", nam[1:k], ")", sep = ""), collapse = "+"), sep = "~"))

# Visually verify that the formula is correct
print(formula)

# Fit a generalized additive model based on the first
# k principal components to the training data.
fit.pc.gam = gam(formula, data = d, subset = d$train, family = binomial)

# Predict the response class of the test data
pred.pc.gam= predict(fit.pc.gam, d[!d$train, ], type = "response") > 0.5
```


## 1h)

## 1i)


# Exercise 2
We will in this exercise continue on the spam data set, but now consider the use of neural nets.

## 2a)
```{r Ex2a, cache = TRUE}
# Import the necessary libraries
library(MASS)
library(nnet)
library(class)

# Convert y to factor
spam$y = as.factor(spam$y)

# Fit a neural network with 10 hidden nodes and decay parameter 0.3 to the train data
spam.nnet = nnet(y ~ . - train,
                 data = spam[spam$train == TRUE, ], 
                 size = 10,
                 decay = 0.3,
                 MaxNWts = 10000,
                 maxit = 500,
                 trace = FALSE)

# Use the fitted neural network to predict the class of the test data 
pred = predict(spam.nnet,spam[spam$train == FALSE, ], type = "class")

# Compare the predicted class with the true class of the test data
show(table(spam$y[spam$train == FALSE], pred))
```


## 2b)

## 2c)

```{r Ex2c, cache = TRUE}
# Import the necessary libraries
library(Rcpp)
library(RSNNS)

# Extract the training and test data sets
spamTargets = decodeClassLabels(spam$y)
spamTargets.train = spamTargets[spam$train == TRUE, ]
spamTargets.test = spamTargets[spam$train == FALSE, ]

# Fit a deep neural network with 3 layers (6, 10, and 6 neurons in each layer),
# and learning rate 0.3 to the training data.
spam.dnet = mlp(as.matrix(spam[spam$train == TRUE, 1:57]),
                spamTargets.train,
                size = c(6, 10, 6),
                learnFuncParams = c(0.3),
                maxit = 300)

# Compare the predicted class with the true class of the test data
pred.dnet = predict(spam.dnet,spam[spam$train == FALSE, 1:57])
pred.dnet = apply(pred.dnet, 1, which.is.max) - 1
pred.dnet = pred.dnet == 1
err.dnet = mean(spam$y[spam$train == FALSE] != pred.dnet)
```

## 2d)

## 2e)