# Hitters data - Ridge regression library(ISLR) names(Hitters) dim(Hitters) sum(is.na(Hitters$Salary)) Hitters=na.omit(Hitters) dim(Hitters) sum(is.na(Hitters)) Hitters2 = Hitters Hitters2$PutOuts = (Hitters2$PutOuts-mean(Hitters2$PutOuts))/sd(Hitters2$PutOuts) Hitters2$Hits = (Hitters2$Hits-mean(Hitters2$Hits))/sd(Hitters2$Hits) fit.lm = lm(Salary~PutOuts+Hits,data=Hitters2) x=model.matrix(Salary~.-1,Hitters) y=Hitters$Salary #For understanding the following commands, you can look at the help pages or #Look at section 6.5 in the ISLR book (James et al) library(glmnet) grid=10^seq(10,-2,length=100) ridge.mod=glmnet(x,y,alpha=0,lambda=grid) plot(ridge.mod) class(ridge.mod) help(plot.elnet) help(plot.glmnet) plot(ridge.mod,xvar="lambda") dim(coef(ridge.mod)) ridge.mod$lambda[50] coef(ridge.mod)[,50] sqrt(sum(coef(ridge.mod)[-1,50]^2)) ridge.mod$lambda[60] coef(ridge.mod)[,60] sqrt(sum(coef(ridge.mod)[-1,60]^2)) predict(ridge.mod,s=50,type="coefficients")[1:20,] set.seed(1) train=sample(1:nrow(x), nrow(x)/2) test=(-train) y.test=y[test] ridge.mod=glmnet(x[train,],y[train],alpha=0,lambda=grid, thresh=1e-12) ridge.pred=predict(ridge.mod,s=4,newx=x[test,]) mean((ridge.pred-y.test)^2) mean((mean(y[train])-y.test)^2) ridge.pred=predict(ridge.mod,s=1e10,newx=x[test,]) mean((ridge.pred-y.test)^2) ridge.pred=predict(ridge.mod,s=0,newx=x[test,],exact=T,x=x[train,],y=y[train]) mean((ridge.pred-y.test)^2) lm(y~x, subset=train) predict(ridge.mod,s=0,exact=T,x=x[train,],y=y[train],type="coefficients")[1:20,] set.seed(1) cv.out=cv.glmnet(x[train,],y[train],alpha=0) plot(cv.out) bestlam=cv.out$lambda.min bestlam ridge.pred=predict(ridge.mod,s=bestlam,newx=x[test,]) mean((ridge.pred-y.test)^2) out=glmnet(x,y,alpha=0) #Coefficients predict(out,type="coefficients",s=bestlam)[,1]