# STK 4900
# Exercise 12

# Clean up the memory before we start.
rm(list=ls(all=TRUE))

# Read the data.
gun.data = read.table(file="http://www.uio.no/studier/emner/matnat/math/STK4900/v17/gun.dat", col.names=c("method","phys","team","rounds"))
# Take a look at the data.
gun.data


# a)
# Compute correlations.
cor(gun.data)
# How are the correlations between the covariates ("method","phys","team")?
# Can you explain the reason for this?
# How are the correlations between the covariates and "rounds"?

# Hint: create a scatter plot matrix.
# Open a pdf device.

plot(gun.data)

table(gun.data$method)
table(gun.data$phys)
table(gun.data$team)


# b), c), d)
# Define the covariates as factors (categorical covariates).
gun.data[,"method"] = factor(gun.data[,"method"])
gun.data[,"phys"] = factor(gun.data[,"phys"])
gun.data[,"team"] = factor(gun.data[,"team"])

# Now the correlation command gives an error - with categorical variables is not possible
# to compute the "usual" correlation, we must use Kendall Tau for example.
cor(gun.data)

# Fit a model with main effects and interactions and write the anova table:
lm.obj = lm(rounds~method*phys*team, data=gun.data)
summary(lm.obj)

# Notice the aren't any "1"s in the model, those are the baselines!


anova(lm.obj)


# What does the anova table tell you? (See slide 33, Lecture 5)
# Which interactions and main effects are significant?


#### Part b ####

# For the data in R-exercise 12 on the gun data we could be interested in predicting the rounds given specified levels of method, physique and team.
# and find confidence interval for estimated expected values as well as prediction intervals for new observations given the levels of these factors.


# a) Set up a data frame for values where you would like to make predictions, e.g.
gun.test=data.frame(method=factor(c(1,2,1,2)),
                    phys=factor(c(1,1,2,3)),
                    team=factor(c(1,2,3,1)))

# Then find fitted/predicted values for your favourite model gfitfav from R-exercise 12 by
predict(lm.obj, newdata=gun.test)


# b) Then obtain confidence intervals for the expected values at this levels of the factors by
predict(lm.obj, newdata=gun.test, interval="confidence")


# c) Next find the corresponding prediction intervals by
predict(lm.obj, newdata=gun.test, interval="prediction")


# Compare and discuss the difference between the confidence and prediction intervals.