#Advertising data Advertising <- read.csv("../data/Advertising.csv") #First model with all 3 explanatory variables fit.lm <- lm(Sales~TV+Radio+Newspaper,data=Advertising) summary(fit.lm) #Rejection level for T test qt(0.975,196) #P-verdi 2*(1-pt(0.177,196)) #Rejection level for F test qf(0.95,3,196) #P-verdi 1-pf(570.3,3,196) #Confidensinterval confint(fit.lm) #Confidensinterval for E[Y|x] for new x newdata <- data.frame(TV=100,Radio=20,Newspaper=50) predict(fit.lm,newdata,interval="confidence") #Prediction (with intervaller) for new datapoint newdata <- data.frame(TV=c(100,110),Radio=c(20,25), Newspaper=c(50,45)) predict(fit.lm,newdata,interval="predict") #New model where Newspaper is removed fit2.lm <- lm(Sales~TV+Radio,data=Advertising) summary(fit2.lm) #Manual calculation of P value qf(0.95,2,197) 1-pf(859.6,2,197) anova(fit.lm,fit2.lm) #Manual calculation of F value, for comparison of models RSS <- sum(fit.lm$residuals^2) RSS0 <- sum(fit2.lm$residuals^2) n = nrow(Advertising) p = 3 q = 1 Fobs <- ((RSS0-RSS)/q)/(RSS/(n-p-1)) Fobs 1-pf(Fobs,q,n-p-1) #Direct use of anova command anova(fit.lm,fit2.lm) Advertising$Sum = Advertising$TV+Advertising$Radio+Advertising$Newspaper fit.err = lm(Sales~TV+Radio+Newspaper+Sum,data=Advertising) summary(fit.err) X = model.matrix(~TV+Radio+Newspaper+Sum,data=Advertising) XX = t(X)%*%X show(XX) show(round(eigen(XX)$values,5)) library(Matrix) show(rankMatrix(X)) summary(fit.err) n = nrow(Advertising) Advertising$Sum = Advertising$TV+Advertising$Radio+Advertising$Newspaper+rnorm(n,0,0.001) fit.err = lm(Sales~TV+Radio+Newspaper+Sum,data=Advertising) summary(fit.err) fit3.lm <- lm(Sales~TV,data=Advertising) anova(fit.lm,fit3.lm) #Compare also with model where only TV as x-variable fit1.lm <- lm(Sales~TV,data=Advertising) anova(fit1.lm,fit2.lm,fit.lm) #Comparing F-test og t-test 2*(1-pt(0.177,196)) 1-pf(0.03122,1,196) #Model with interaction fit3.lm <- lm(Sales~TV+Radio+TV:Radio,data=Advertising) summary(fit3.lm) anova(fit2.lm,fit3.lm) #Confidence interval confint(fit3.lm) #Confidence interval for E[Y|x] fornew x newdata <- data.frame(TV=100,Radio=20,Newspaper=50) predict(fit2.lm,newdata,interval="confidence") #Prediction (with intervals) for new data point newdata <- data.frame(TV=100,Radio=20,Newspaper=50) predict(fit2.lm,newdata,interval="predict")