7 Classification Metrics

We use RMSE (Root Mean Squared Error) for evaluating the performance of a prediction model with continuous outcome. However, RMSE is not used commonly for categorical outcomes. Instead, we use a few other metrics are driven from the confusion matrix, which is introduced in the Naive Bayes handouts. Therefore, the first step of discussing the prediction power of a statistical model with a categorical outcome is forming a confusion matrix.

7.1 Confusion matrix and Miss-Classification

“Ethnicity, Insurgency, and Civil War” by Fearn and Laitin (2003) published in American Political Science Review is one the most cited studies in conflict literature. We will use this data for this session.

rawData=read.csv("https://raw.githubusercontent.com/babakrezaee/MethodsCourses/master/DataSets/FearonLaitin2003.csv")

head(rawData, n=2)

  ccode country cname cmark year wars war warl onset ethonset durest aim
1     2     USA   USA     1 1945    0   0    0     0        0     NA  NA
2     2     USA   USA     0 1946    0   0    0     0        0     NA  NA
  casename ended ethwar waryrs    pop     lpop polity2 gdpen gdptype gdpenl
1             NA     NA        140969 11.85630      10 7.626       3  7.626
2             NA     NA        141936 11.86313      10 7.654       3  7.626
  lgdpenl1  lpopl1                        region western eeurop lamerica
1 8.939319 11.8563 western democracies and japan       1      0        0
2 8.939319 11.8563 western democracies and japan       1      0        0
  ssafrica asia nafrme colbrit colfra mtnest  lmtnest elevdiff Oil ncontig
1        0    0      0       1      0   23.9 3.214868     6280   0       1
2        0    0      0       1      0   23.9 3.214868     6280   0       1
    ethfrac       ef plural second numlang relfrac plurrel minrelpc muslim
1 0.3569501 0.490957  0.691  0.125       3   0.596      56       28    1.9
2 0.3569501 0.490957  0.691  0.125       3   0.596      56       28    1.9
  nwstate polity2l instab anocl deml empethfrac empwarl emponset empgdpenl
1       0       10      0     0    1  0.4936721       0        0   6.79718
2       0       10      0     0    1  0.3569501       0        0   6.79718
  emplpopl emplmtnest empncontig empolity2l sdwars sdonset colwars colonset
1 11.97919   3.211572          1    7.68712      0       0      NA       NA
2 11.97919   3.214868          1   10.00000      0       0      NA       NA
  cowwars cowonset cowwarl sdwarl colwarl
1       0        0       0      0      NA
2       0        0       0      0      NA

Data=na.omit(cbind(rawData$onset,rawData$warl,rawData$gdpenl,rawData$lpopl1,
               rawData$lmtnest,rawData$ncontig,rawData$Oil,rawData$nwstate,rawData$instab,rawData$polity2l,rawData$ethfrac,rawData$relfrac)) 

scf = function(x) {return((x-min(x))/(max(x)-min(x)))}

x=Data[,-1]


x = apply(x,2,scf)

y=Data[,1]

Data=cbind(y,x)

Data=data.frame(Data)

colnames(Data)<- c("onset","warL","gdpenL","populationL","mountainous",
                   "contiguous", "oil", "newStata", "instability","polityL",
                   "ELF", "RelELF")

is.factor(Data$onset)

[1] FALSE

table(Data$onset)


   0    1    4 
6221  105    1

# It seems there is a mistake in data entry. We need to fix it:

Data$onset[Data$onset>1] <- 1

table(Data$onset)


   0    1 
6221  106

Data$onset=factor(Data$onset)



library(glmnet)

fm_binomial=glm(onset~warL+gdpenL+populationL+mountainous+contiguous+oil+newStata+instability+ polityL+ELF+RelELF, data = Data, family = "binomial")

fm_binomial$fitted.values[1:10]

          1           2           3           4           5           6 
0.009458287 0.009458287 0.009385206 0.008282231 0.007654327 0.008318317 
          7           8           9          10 
0.006527791 0.005844748 0.005942122 0.005525181

library(dplyr)
Yhat_logit = if_else(fm_binomial$fitted.values>.5, 1, 0)

table(Yhat_logit)

Yhat_logit
   0 
6327

In the above example, we first estimated the probability of civil war onset, and the used a threshold, i.e. \(.5\), to categorize the predicted outcomes to zero, i.e. no civil, and 1, i.e. civil war. Indeed, in classification problems, we often estimate the probability of each category, then we pick a threshold to find the predicted categorical outcome.

Yhat_logit = if_else(fm_binomial$fitted.values>.1, 1, 0)

table(Yhat_logit)

Yhat_logit
   0    1 
6234   93

Y=Data$onset

ctab=table(Y,Yhat_logit)

ctab

   Yhat_logit
Y      0    1
  0 6144   77
  1   90   16

missClass=(sum(ctab)-sum(diag(ctab)))/sum(ctab)

perCW=ctab[2,2]/sum(ctab[,2])

cat("Missclassification and Civil War classification rate using Logit model are",round(missClass,2),"% and ",round(perCW,2),"%, respectively \n")

Missclassification and Civil War classification rate using Logit model are 0.03 % and  0.17 %, respectively

logitProbFit = data.frame(CivilWar=Data$onset,1-fm_binomial$fitted.values,fm_binomial$fitted.values)

names(logitProbFit)[2:3] = c("No", "Yes")

par(mfrow=c(1,2))
par(mai=c(1,1,.5,.5))
plot(No~CivilWar,logitProbFit,col=c(grey(.5),2:3),cex.lab=2.4,cex.axis=2.4)
plot(Yes~CivilWar,logitProbFit,col=c(grey(.5),2:3),cex.lab=2.4,cex.axis=2.4)

library(kknn)
fm_knn = kknn(onset~warL+gdpenL+populationL+mountainous+contiguous+oil+newStata+instability+ polityL+ELF+RelELF,Data,Data,k=10,kernel = "rectangular")

table(fm_knn$fitted.values)


   0    1 
6325    2

fm_knn$fitted.values[545:615]

 [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Levels: 0 1

rawData$onset[545:615]

 [1] 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0

ctab=table(Data$onset,fm_knn$fitted.values)

ctab

   
       0    1
  0 6220    1
  1  105    1

Yhat_kNN <- if_else( fm_knn$prob[,2]>.1, 1, 0)

ctab=table(Y,Yhat_kNN)
 
ctab

   Yhat_kNN
Y      0    1
  0 6146   75
  1   84   22

missClass=(sum(ctab)-sum(diag(ctab)))/sum(ctab)

perCW=ctab[2,2]/sum(ctab[,2])

cat("Missclassification and Civil War classification rate using KNN model are",round(missClass,2),"% and ",round(perCW,2),"%, respectively \n")

Missclassification and Civil War classification rate using KNN model are 0.03 % and  0.23 %, respectively

kNNProbFit = data.frame(CivilWar=Data$onset,fm_knn$prob)

names(kNNProbFit)[2:3] = c("No", "Yes")

par(mfrow=c(1,2))
par(mai=c(1,1,.5,.5))
plot(No~CivilWar,kNNProbFit,col=c(grey(.5),2:3),cex.lab=2.4,cex.axis=2.4)
plot(Yes~CivilWar,kNNProbFit,col=c(grey(.5),2:3),cex.lab=2.4,cex.axis=2.4)

These plots presents an important issue about the quantitative models of Civil War onset. Similar to Fearon and Laitin (2003), these models are good in predicting peace, but not civil war/conflict onset!

7.2 Lift, ROC, and AUC

Below graph shows the different terms that are usually used for labeling different elements of a confusion matrix. Most of the classification measures are defined using these terms/labels.

7.2.1 Lift Curve

The lift curve is one of the commonly used measures of model classification performance. This measure shows how much model, that is \(\hat{p}=P(Y=1|x)\), is successful in capturing correct outcome, \(Y\).

To plot this curve, we first sort the fitting data based on \(\hat{p}\). In other words, we expect that the \(\hat{p}\) with the largest value be the one most likely to predict the correct realization of the outcome, i.e. True Positive. Then, we plot the percent observations taken vs the cumulative number of

library(devtools)
source_url('https://raw.githubusercontent.com/babakrezaee/DataWrangling/master/Functions/lfitPlot_fun.R')



olift = liftf(Data$onset,fm_binomial$fitted.values,dopl=FALSE)
olift2 = liftf(Data$onset,fm_knn$prob[,2] ,dopl=FALSE)

ii = (1:length(olift))/length(olift)

plot(ii,olift,type='n',lwd=2,xlab='% tried',ylab='% of successes',cex.lab=2)
lines(ii,olift,col='red')
lines(ii,olift2,col='blue')
abline(0,1,lty=2)

legend('bottomright',legend=c('logit','kNN'),col=c('red','blue'),lwd=3)

7.2.2 Sensitivity, Specificity, and Precision

\[\begin{align} Recall/Sensitivity/True~Positive~Rate=\frac{TP}{TP+FN} \end{align}\]

\[\begin{align} Specificity/Selectivity/True~Negative~Rate=\frac{TN}{TN+FP} \end{align}\]

library(caret)
confusionMatrix(data = as.factor(Yhat_kNN), reference = as.factor(Y))

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 6146   84
         1   75   22
                                          
               Accuracy : 0.9749          
                 95% CI : (0.9707, 0.9786)
    No Information Rate : 0.9832          
    P-Value [Acc > NIR] : 1.0000          
                                          
                  Kappa : 0.204           
                                          
 Mcnemar's Test P-Value : 0.5258          
                                          
            Sensitivity : 0.9879          
            Specificity : 0.2075          
         Pos Pred Value : 0.9865          
         Neg Pred Value : 0.2268          
             Prevalence : 0.9832          
         Detection Rate : 0.9714          
   Detection Prevalence : 0.9847          
      Balanced Accuracy : 0.5977          
                                          
       'Positive' Class : 0

confusionMatrix(data = as.factor(Yhat_logit), reference = as.factor(Y))

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 6144   90
         1   77   16
                                          
               Accuracy : 0.9736          
                 95% CI : (0.9694, 0.9774)
    No Information Rate : 0.9832          
    P-Value [Acc > NIR] : 1.0000          
                                          
                  Kappa : 0.1475          
                                          
 Mcnemar's Test P-Value : 0.3531          
                                          
            Sensitivity : 0.9876          
            Specificity : 0.1509          
         Pos Pred Value : 0.9856          
         Neg Pred Value : 0.1720          
             Prevalence : 0.9832          
         Detection Rate : 0.9711          
   Detection Prevalence : 0.9853          
      Balanced Accuracy : 0.5693          
                                          
       'Positive' Class : 0

\[\begin{align} F=2\frac{Recall \times Precision}{Recall+Precision} \end{align}\]

confusionMatrix(data = as.factor(Yhat_kNN), reference = as.factor(Y), mode = "prec_recall")

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 6146   84
         1   75   22
                                          
               Accuracy : 0.9749          
                 95% CI : (0.9707, 0.9786)
    No Information Rate : 0.9832          
    P-Value [Acc > NIR] : 1.0000          
                                          
                  Kappa : 0.204           
                                          
 Mcnemar's Test P-Value : 0.5258          
                                          
              Precision : 0.9865          
                 Recall : 0.9879          
                     F1 : 0.9872          
             Prevalence : 0.9832          
         Detection Rate : 0.9714          
   Detection Prevalence : 0.9847          
      Balanced Accuracy : 0.5977          
                                          
       'Positive' Class : 0

confusionMatrix(data = as.factor(Yhat_logit), reference = as.factor(Y), mode = "prec_recall")

Confusion Matrix and Statistics

          Reference
Prediction    0    1
         0 6144   90
         1   77   16
                                          
               Accuracy : 0.9736          
                 95% CI : (0.9694, 0.9774)
    No Information Rate : 0.9832          
    P-Value [Acc > NIR] : 1.0000          
                                          
                  Kappa : 0.1475          
                                          
 Mcnemar's Test P-Value : 0.3531          
                                          
              Precision : 0.9856          
                 Recall : 0.9876          
                     F1 : 0.9866          
             Prevalence : 0.9832          
         Detection Rate : 0.9711          
   Detection Prevalence : 0.9853          
      Balanced Accuracy : 0.5693          
                                          
       'Positive' Class : 0

library(pROC)

par(mfrow=c(1,2))
par(mai=c(1,1,.5,.5))



rocR = roc(response=Y,predictor=fm_binomial$fitted.values)
AUC = auc(rocR)
plot(rocR, col='maroon')
title(main=paste("logit model AUC= ",round(AUC,2)))

rocR = roc(response=Y,predictor=fm_knn$prob[,2])
AUC = auc(rocR)
plot(rocR,  col='blue')
title(main=paste("kNN model AUC= ",round(AUC,2)))

7.3 How to decide about the threshold?

To answer this question, we can use \({\tt ROCR}\) package.

library(ROCR)

ROCR_logit=prediction(fm_binomial$fitted.values, Y)

ROCRperf_logit = performance(ROCR_logit, "tpr", "fpr")

ROCR_kNN=prediction(fm_knn$prob[,2], Y)

ROCRperf_kNN = performance(ROCR_kNN, "tpr", "fpr")



plot(ROCRperf_logit, colorize=TRUE, print.cutoffs.at=seq(0,1,by=0.1), text.adj=c(-0.2,1.7))

plot(ROCRperf_kNN, colorize=TRUE, print.cutoffs.at=seq(0,1,by=0.1), text.adj=c(-0.2,1.7))