Load Package And Data

# Set Parallel Processing - Decrease computation time
if (!require("doMC")) install.packages("doMC")
registerDoMC(cores = 4)

Hyper parameter tuning using tuneGrid

  1. Model Tuning Parameter Set

  2. Cross Validation Set

    Cross validation method can be one amongst:
    • ‘boot’: Bootstrap sampling
    • ‘boot632’: Bootstrap sampling with 63.2% bias correction applied
    • ‘optimism_boot’: The optimism bootstrap estimator
    • ‘boot_all’: All boot methods.
    • ‘cv’: k-Fold cross validation
    • ‘repeatedcv’: Repeated k-Fold cross validation
    • ‘oob’: Out of Bag cross validation
    • ‘LOOCV’: Leave one out cross validation
    • ‘LGOCV’: Leave group out cross validation
  3. Training And Tuning

  4. Predict

  5. Confusion Matrix

# Step 1: Define the tuneGrid
marsGrid <-  expand.grid(nprune = c(2, 4, 6, 8, 10), 
                        degree = c(1, 2, 3))

# Step 2: Define the training control
fitControl <- trainControl(
    method = 'cv',                   # k-fold cross validation
    number = 5,                      # number of folds
    savePredictions = 'final',       # saves predictions for optimal tuning parameter
    classProbs = T,                  # should class probabilities be returned
    summaryFunction=twoClassSummary  # results summary function

# Step 3: Training and Tuning hyper parameters by setting tuneGrid
model_mars3 = train(Purchase ~ ., data=trainData, method='earth', metric='ROC', tuneGrid = marsGrid, trControl = fitControl)
## Multivariate Adaptive Regression Spline 
## 857 samples
##  18 predictor
##   2 classes: 'CH', 'MM' 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 685, 685, 687, 686, 685 
## Resampling results across tuning parameters:
##   degree  nprune  ROC        Sens       Spec     
##   1        2      0.8745398  0.8700916  0.7006784
##   1        4      0.8924657  0.8662454  0.7394844
##   1        6      0.8912361  0.8719414  0.7334238
##   1        8      0.8886974  0.8661722  0.7334238
##   1       10      0.8879988  0.8623626  0.7423790
##   2        2      0.8745398  0.8700916  0.7006784
##   2        4      0.8953757  0.8739377  0.7454998
##   2        6      0.8917824  0.8681868  0.7515152
##   2        8      0.8904559  0.8624359  0.7574401
##   2       10      0.8932377  0.8547436  0.7784261
##   3        2      0.8582783  0.8777106  0.6618725
##   3        4      0.8914544  0.8662454  0.7544550
##   3        6      0.8910605  0.8586264  0.7665310
##   3        8      0.8838647  0.8452015  0.7456355
##   3       10      0.8827056  0.8471062  0.7426504
## ROC was used to select the optimal model using the largest value.
## The final values used for the model were nprune = 4 and degree = 2.
# Step 4: Predict on testData 
predicted3 <- predict(model_mars3, testData4)

# Step 5: Compute the confusion matrix
confusionMatrix(reference = testData$Purchase, data = predicted3, mode='everything', positive='MM')
## Confusion Matrix and Statistics
##           Reference
## Prediction  CH  MM
##         CH 117  21
##         MM  13  62
##                Accuracy : 0.8404          
##                  95% CI : (0.7841, 0.8869)
##     No Information Rate : 0.6103          
##     P-Value [Acc > NIR] : 2.164e-13       
##                   Kappa : 0.6585          
##  Mcnemar's Test P-Value : 0.2299          
##             Sensitivity : 0.7470          
##             Specificity : 0.9000          
##          Pos Pred Value : 0.8267          
##          Neg Pred Value : 0.8478          
##               Precision : 0.8267          
##                  Recall : 0.7470          
##                      F1 : 0.7848          
##              Prevalence : 0.3897          
##          Detection Rate : 0.2911          
##    Detection Prevalence : 0.3521          
##       Balanced Accuracy : 0.8235          
##        'Positive' Class : MM              
