3. ## Confusion Matrix and Statistics
##
## nb.tstclass
## 0 1
## 0 28 12
## 1 3 48
##
## Accuracy : 0.8352
## 95% CI : (0.7427, 0.9047)
## No Information Rate : 0.6593
## P-Value [Acc > NIR] : 0.0001482
##
## Kappa : 0.6571
##
## Mcnemar's Test P-Value : 0.0388671
##
## Sensitivity : 0.9032
## Specificity : 0.8000
## Pos Pred Value : 0.7000
## Neg Pred Value : 0.9412
## Prevalence : 0.3407
## Detection Rate : 0.3077
## Detection Prevalence : 0.4396
## Balanced Accuracy : 0.8516
##
## 'Positive' Class : 0
##
start_tm <- proc.time()
df<-trcatheart
runModel<-function(df) {naiveBayes(target~.,data=df[sample(1:nrow(df),nrow(d
f),replace=T),])}
lapplyrunmodel<-function(x)runModel(df)
system.time(models<-lapply(1:100,lapplyrunmodel))
## user system elapsed
## 0.32 0.02 0.33
object.size(models)
## 1110448 bytes
end_tm<-proc.time()
print(paste("time taken to run 100 bootstrapps",(end_tm-start_tm),sep=":"))
Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi...
3 of 11 11/23/2020, 5:39 PM
4. ## [1] "time taken to run 100 bootstrapps:0.46"
## [2] "time taken to run 100 bootstrapps:0.02"
## [3] "time taken to run 100 bootstrapps:0.47"
## [4] "time taken to run 100 bootstrapps:NA"
## [5] "time taken to run 100 bootstrapps:NA"
bagging_preds<-lapply(models,FUN=function(M,D=tstcatheart[,-c(9)])predict(M,
D,type='raw'))
bagging_cfm<-lapply(bagging_preds,FUN=function(P,A=tstcatheart[[9]])
{pred_class<-unlist(apply(round(P),1,which.max))-1
pred_tbl<-table(A,pred_class)
pred_cfm<-caret::confusionMatrix(pred_tbl)
pred_cfm
})
bagging.perf<-as.data.frame(do.call('rbind',lapply(bagging_cfm,FUN=function
(cfm)c(cfm$overall,cfm$byClass))))
bagging.perf.mean<-apply(bagging.perf[bagging.perf$AccuracyPValue<0.01,-c(6:
7)],2,mean)
bagging.perf.var<-apply(bagging.perf[bagging.perf$AccuracyPValue<0.01,-c(6:
7)],2,sd)
bagging.perf.var
## Accuracy Kappa AccuracyLower
## 0.01618750 0.03355331 0.01846838
## AccuracyUpper AccuracyNull Sensitivity
## 0.01273569 0.01795716 0.03073122
## Specificity Pos Pred Value Neg Pred Value
## 0.01470108 0.02693220 0.02200582
## Precision Recall F1
## 0.02693220 0.03073122 0.02087685
## Prevalence Detection Rate Detection Prevalence
## 0.01795716 0.01183833 0.00000000
## Balanced Accuracy
## 0.01875328
bagging.perf.mean
Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi...
4 of 11 11/23/2020, 5:39 PM
5. ## Accuracy Kappa AccuracyLower
## 0.8323565 0.6521225 0.7396540
## AccuracyUpper AccuracyNull Sensitivity
## 0.9023711 0.6496947 0.8891540
## Specificity Pos Pred Value Neg Pred Value
## 0.8025070 0.7077778 0.9300654
## Precision Recall F1
## 0.7077778 0.8891540 0.7876655
## Prevalence Detection Rate Detection Prevalence
## 0.3503053 0.3111111 0.4395604
## Balanced Accuracy
## 0.8458305
(bagging_tm<-proc.time()-start_tm)
## user system elapsed
## 2.35 0.02 2.36
N<-nrow(trcatheart)
cv_df<-do.call('rbind',lapply(1:N,FUN=function(idx,data=trcatheart) { # For
each observation
m<-naiveBayes(target~.,data=data[-idx,]) # train with ALL other observatio
ns
p<-predict(m,data[idx,-c(9)],type='raw') # predict that one observation
# NB returns the probabilities of the classes, as per Bayesian Classifie
r,we take the classs with the higher probability
pc<-unlist(apply(round(p),1,which.max))-1 # -1 to make class to be 0 or
1, which.max returns 1 or 2
#pred_tbl<-table(data[idx,c(9)],pc)
#pred_cfm<-caret::confusionMatrix(pred_tbl)
list(fold=idx,m=m,predicted=pc,actual=data[idx,c(9)]) # store the idx, mod
el, predicted class and actual class
}
))
cv_df<-as.data.frame(cv_df)
head(cv_df)
Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi...
5 of 11 11/23/2020, 5:39 PM