8 How To Evaluate Performance Of Multiple Machine Learning Algorithms?
Load Package And Data
load("../../data/craet_7.Rdata")
library(tidyverse)
library(caret)
#Set Parallel Processing - Decrease computation time
if (!require("doMC")) install.packages("doMC")
library(doMC)
registerDoMC(cores = 4)
Caret provides the resamples() function where you can provide multiple machine learning models and collectively evaluate them
Define the training control
fitControl <- trainControl(
method = 'cv', # k-fold cross validation
number = 5, # number of folds
savePredictions = 'final', # saves predictions for optimal tuning parameter
classProbs = T, # should class probabilities be returned
summaryFunction=twoClassSummary # results summary function
)
train models
set.seed(100)
# Training Adaboost using adaboost
model_adaboost = train(Purchase ~ ., data=trainData, method='adaboost', tuneLength=2, trControl = fitControl)
# Training Random Forest model using rf
model_rf = train(Purchase ~ ., data=trainData, method='rf', tuneLength=5, trControl = fitControl)
# Training xgBoost Dart
#model_xgbDART = train(Purchase ~ ., data=trainData, method='xgbDART', tuneLength=5, trControl = fitControl, verbose=F)
# Train SVM using MARS
model_svmRadial = train(Purchase ~ ., data=trainData, method='svmRadial', tuneLength=15, trControl = fitControl)
Run resamples() to compare the models
# Compare model performances using resample()
models_compare <- resamples(list(ADABOOST=model_adaboost, RF=model_rf, MARS=model_mars3, SVM=model_svmRadial))
# Summary of the models performances
summary(models_compare)
##
## Call:
## summary.resamples(object = models_compare)
##
## Models: ADABOOST, RF, MARS, SVM
## Number of resamples: 5
##
## ROC
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## ADABOOST 0.8126510 0.8462687 0.8682549 0.8657598 0.8868515 0.9147727 0
## RF 0.8635394 0.8647908 0.8748565 0.8841388 0.9046198 0.9128875 0
## MARS 0.8520967 0.8660981 0.9091561 0.8953757 0.9118590 0.9376688 0
## SVM 0.8769723 0.8839375 0.8902597 0.8895479 0.8948048 0.9017652 0
##
## Sens
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## ADABOOST 0.7619048 0.7904762 0.7904762 0.8070330 0.8076923 0.8846154 0
## RF 0.7809524 0.8000000 0.8461538 0.8451832 0.8750000 0.9238095 0
## MARS 0.8190476 0.8476190 0.8857143 0.8739377 0.8942308 0.9230769 0
## SVM 0.8750000 0.8761905 0.8761905 0.8891209 0.9047619 0.9134615 0
##
## Spec
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## ADABOOST 0.7014925 0.7462687 0.7727273 0.7635007 0.7761194 0.8208955 0
## RF 0.6119403 0.6363636 0.7462687 0.7332429 0.8208955 0.8507463 0
## MARS 0.6567164 0.7164179 0.7313433 0.7454998 0.7424242 0.8805970 0
## SVM 0.6969697 0.7164179 0.7313433 0.7364089 0.7611940 0.7761194 0
# Draw box plots to compare models
scales <- list(x=list(relation="free"), y=list(relation="free"))
bwplot(models_compare, scales=scales)
save.image("../../data/craet_8.Rdata")