# Load BostonHousing data rm(list = ls()) library(mlbench) data("BostonHousing") # Libraries for regularized regression library(glmnet) library(tidyverse) library(broom) # Predictors should be a matrix predictors = BostonHousing %>% select(crim, zn, indus, chas, nox, rm, age, dis, rad, tax, ptratio, b, lstat) %>% data.matrix() response_variable = BostonHousing$medv # Specify a range of lambda and run the regression. # Ridge (alpha = 0), lasso (alpha = 1), elastic net (0 < alpha < 1) lambdas = 10^seq(2, -2, by = -.1) fit = glmnet(predictors, response_variable, alpha = 0.5, lambda = lambdas) plot(fit, xvar = "lambda", label = TRUE) # Determine the best lambda by cross validation set.seed(1) lambda_calc = cv.glmnet(predictors, response_variable, alpha = 1, lambda = lambdas, grouped = FALSE) plot(lambda_calc) optlambda = lambda_calc$lambda.min # Save the minimum lambda coef(fit, s = optlambda) # Comparison of actual and predicted values y_predicted = predict(fit, s = optlambda, newx = predictors) plot(response_variable, y_predicted, xlab = "Observed medv", ylab = "Predicted medv") correlation = cor.test(response_variable, y_predicted, method = c("pearson")) correlation