Homework Unit 8: Advanced Performance Metrics

Author

TA Key

Published

March 19, 2024

Introduction

This file serves as the answer key for the Unit_08 homework. Unit 8 Advanced Performance Metrics in the course web book contains all materials required for this assignment.

In this assignment, we demonstrate how the selected performance metric and the outcome balance affect our selection and evaluation of the best performing model.

Setup

Handle conflicts

options(conflicts.policy = "depends.ok")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")
tidymodels_conflictRules()

Load required packages

library(tidyverse) 
library(tidymodels)

Source function scripts (John’s or your own)

devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_plots.R?raw=true")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_eda.R?raw=true")

Specify other global settings

theme_set(theme_classic())
options(tibble.width = Inf, dplyr.print_max=Inf)

Paths

path_data <- "homework/unit_08"

Set up parallel processing

Note you can type cl into your console to see how many cores your computer has.

cl <- parallel::makePSOCKcluster(parallel::detectCores(logical = FALSE))
doParallel::registerDoParallel(cl)

Read in data

Read in the breast_cancer.csv data file and save as an object called data_all, perform any checks (i.e., light cleaning EDA) needed on the data and set the outcome diagnosis as a factor with malignant as the first level.

data_all <- read_csv(here::here(path_data, "breast_cancer.csv"), col_types = cols()) |> 
  glimpse()

Rows: 423
Columns: 31
$ diagnosis               <chr> "malignant", "benign", "benign", "malignant", …
$ perimeter_se            <dbl> 1.9740, 1.6670, 1.4890, 2.9890, 2.6840, 1.4450…
$ fractal_dimension_mean  <dbl> 0.05986, 0.06320, 0.05828, 0.06768, 0.05934, 0…
$ concave_points_worst    <dbl> 0.12520, 0.11050, 0.03002, 0.20270, 0.06544, 0…
$ symmetry_mean           <dbl> 0.1594, 0.1886, 0.1845, 0.2157, 0.1834, 0.1514…
$ texture_se              <dbl> 0.3621, 0.7339, 1.6470, 0.9489, 0.8429, 1.0660…
$ concave_points_se       <dbl> 0.008260, 0.013040, 0.003419, 0.012710, 0.0091…
$ concavity_mean          <dbl> 0.075500, 0.070970, 0.004967, 0.169000, 0.0263…
$ fractal_dimension_se    <dbl> 0.002881, 0.001982, 0.002534, 0.003884, 0.0014…
$ radius_worst            <dbl> 17.770, 12.640, 12.360, 18.810, 12.970, 14.200…
$ concave_points_mean     <dbl> 0.040790, 0.044970, 0.006434, 0.089230, 0.0206…
$ radius_mean             <dbl> 15.120, 11.610, 11.220, 14.870, 11.500, 12.620…
$ smoothness_se           <dbl> 0.005472, 0.005884, 0.004359, 0.006985, 0.0063…
$ smoothness_worst        <dbl> 0.14910, 0.14150, 0.09994, 0.18780, 0.11830, 0…
$ symmetry_se             <dbl> 0.01523, 0.01848, 0.01916, 0.01602, 0.02292, 0…
$ radius_se               <dbl> 0.2711, 0.2456, 0.2239, 0.4266, 0.3927, 0.2449…
$ concavity_worst         <dbl> 0.33270, 0.23020, 0.02318, 0.47040, 0.08105, 0…
$ concavity_se            <dbl> 0.020390, 0.026310, 0.003223, 0.030110, 0.0124…
$ compactness_se          <dbl> 0.019190, 0.020050, 0.006813, 0.025630, 0.0106…
$ smoothness_mean         <dbl> 0.08876, 0.10880, 0.07780, 0.11620, 0.09345, 0…
$ area_se                 <dbl> 26.440, 15.890, 15.460, 41.180, 26.990, 18.510…
$ area_worst              <dbl> 989.5, 475.7, 470.9, 1095.0, 508.9, 624.0, 544…
$ perimeter_mean          <dbl> 98.78, 75.46, 70.79, 98.64, 73.28, 81.35, 79.0…
$ compactness_mean        <dbl> 0.09588, 0.11680, 0.03574, 0.16490, 0.05991, 0…
$ area_mean               <dbl> 716.6, 408.2, 386.8, 682.5, 407.4, 496.4, 466.…
$ fractal_dimension_worst <dbl> 0.09740, 0.07427, 0.07307, 0.10650, 0.06487, 0…
$ texture_mean            <dbl> 16.68, 16.02, 33.81, 16.67, 18.45, 23.97, 18.5…
$ perimeter_worst         <dbl> 117.70, 81.93, 78.44, 127.10, 83.12, 90.67, 85…
$ symmetry_worst          <dbl> 0.3415, 0.2787, 0.2911, 0.3585, 0.2740, 0.2826…
$ texture_worst           <dbl> 20.24, 19.67, 41.78, 27.37, 22.46, 31.31, 27.4…
$ compactness_worst       <dbl> 0.33310, 0.21700, 0.06885, 0.44800, 0.10490, 0…

data_all |> 
  skim_some()

Data summary
Name	data_all
Number of rows	423
Number of columns	31
_______________________
Column type frequency:
character	1
numeric	30
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
diagnosis	0	1	6	9	0	2	0

Variable type: numeric

skim_variable	complete_rate	p0	p100
perimeter_se	1	0.76	21.98
fractal_dimension_mean	1	0.05	0.10
concave_points_worst	1	0.00	0.29
symmetry_mean	1	0.11	0.29
texture_se	1	0.36	4.88
concave_points_se	1	0.00	0.05
concavity_mean	1	0.00	0.43
fractal_dimension_se	1	0.00	0.03
radius_worst	1	7.93	33.13
concave_points_mean	1	0.00	0.19
radius_mean	1	6.98	28.11
smoothness_se	1	0.00	0.02
smoothness_worst	1	0.07	0.21
symmetry_se	1	0.01	0.08
radius_se	1	0.11	2.87
concavity_worst	1	0.00	1.25
concavity_se	1	0.00	0.40
compactness_se	1	0.00	0.11
smoothness_mean	1	0.05	0.16
area_se	1	6.80	525.60
area_worst	1	185.20	3432.00
perimeter_mean	1	43.79	188.50
compactness_mean	1	0.02	0.35
area_mean	1	143.50	2499.00
fractal_dimension_worst	1	0.06	0.17
texture_mean	1	9.71	39.28
perimeter_worst	1	50.41	229.30
symmetry_worst	1	0.16	0.66
texture_worst	1	12.02	49.54
compactness_worst	1	0.03	0.87

data_all <- data_all |> 
  mutate(diagnosis = factor(diagnosis, levels = c("malignant", "benign"))) |> 
  glimpse()

Rows: 423
Columns: 31
$ diagnosis               <fct> malignant, benign, benign, malignant, benign, …
$ perimeter_se            <dbl> 1.9740, 1.6670, 1.4890, 2.9890, 2.6840, 1.4450…
$ fractal_dimension_mean  <dbl> 0.05986, 0.06320, 0.05828, 0.06768, 0.05934, 0…
$ concave_points_worst    <dbl> 0.12520, 0.11050, 0.03002, 0.20270, 0.06544, 0…
$ symmetry_mean           <dbl> 0.1594, 0.1886, 0.1845, 0.2157, 0.1834, 0.1514…
$ texture_se              <dbl> 0.3621, 0.7339, 1.6470, 0.9489, 0.8429, 1.0660…
$ concave_points_se       <dbl> 0.008260, 0.013040, 0.003419, 0.012710, 0.0091…
$ concavity_mean          <dbl> 0.075500, 0.070970, 0.004967, 0.169000, 0.0263…
$ fractal_dimension_se    <dbl> 0.002881, 0.001982, 0.002534, 0.003884, 0.0014…
$ radius_worst            <dbl> 17.770, 12.640, 12.360, 18.810, 12.970, 14.200…
$ concave_points_mean     <dbl> 0.040790, 0.044970, 0.006434, 0.089230, 0.0206…
$ radius_mean             <dbl> 15.120, 11.610, 11.220, 14.870, 11.500, 12.620…
$ smoothness_se           <dbl> 0.005472, 0.005884, 0.004359, 0.006985, 0.0063…
$ smoothness_worst        <dbl> 0.14910, 0.14150, 0.09994, 0.18780, 0.11830, 0…
$ symmetry_se             <dbl> 0.01523, 0.01848, 0.01916, 0.01602, 0.02292, 0…
$ radius_se               <dbl> 0.2711, 0.2456, 0.2239, 0.4266, 0.3927, 0.2449…
$ concavity_worst         <dbl> 0.33270, 0.23020, 0.02318, 0.47040, 0.08105, 0…
$ concavity_se            <dbl> 0.020390, 0.026310, 0.003223, 0.030110, 0.0124…
$ compactness_se          <dbl> 0.019190, 0.020050, 0.006813, 0.025630, 0.0106…
$ smoothness_mean         <dbl> 0.08876, 0.10880, 0.07780, 0.11620, 0.09345, 0…
$ area_se                 <dbl> 26.440, 15.890, 15.460, 41.180, 26.990, 18.510…
$ area_worst              <dbl> 989.5, 475.7, 470.9, 1095.0, 508.9, 624.0, 544…
$ perimeter_mean          <dbl> 98.78, 75.46, 70.79, 98.64, 73.28, 81.35, 79.0…
$ compactness_mean        <dbl> 0.09588, 0.11680, 0.03574, 0.16490, 0.05991, 0…
$ area_mean               <dbl> 716.6, 408.2, 386.8, 682.5, 407.4, 496.4, 466.…
$ fractal_dimension_worst <dbl> 0.09740, 0.07427, 0.07307, 0.10650, 0.06487, 0…
$ texture_mean            <dbl> 16.68, 16.02, 33.81, 16.67, 18.45, 23.97, 18.5…
$ perimeter_worst         <dbl> 117.70, 81.93, 78.44, 127.10, 83.12, 90.67, 85…
$ symmetry_worst          <dbl> 0.3415, 0.2787, 0.2911, 0.3585, 0.2740, 0.2826…
$ texture_worst           <dbl> 20.24, 19.67, 41.78, 27.37, 22.46, 31.31, 27.4…
$ compactness_worst       <dbl> 0.33310, 0.21700, 0.06885, 0.44800, 0.10490, 0…

data_all |> 
  skim_some() |> 
  skimr::yank("factor")

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
diagnosis	0	1	FALSE	2	ben: 357, mal: 66

Print a table to see the balance of positive and negative cases of your outcome variable.

data_all |> 
  janitor::tabyl(diagnosis)

 diagnosis   n   percent
 malignant  66 0.1560284
    benign 357 0.8439716

Our outcome variable is unbalanced (357:66). This could create problems with the Positive Predictive Value of our model and yield lower sensitivity (i.e., true positives).

Split data into train and test

Hold out 1/3 of the data as a test set for evaluation using the initial_split() function. Use the provided seed. Stratify on diagnosis.

set.seed(12345)

splits_test <- data_all |>  
  initial_split(prop = 2/3, strata = "diagnosis")

data_train <- splits_test |> 
  analysis()

data_test <- splits_test |> 
  assessment()

Light modeling EDA

Look at correlations between predictors in data_train.

data_train |> 
  select(where(is.numeric)) |> 
  cor() |> 
  corrplot::corrplot(type = "upper")

Visualize the variance of your predictors in data_train.

data_train |> 
  select(where(is.numeric)) |> 
  names() |> 
  map(\(name) plot_box_violin(df = data_train, x = name)) |> 
  cowplot::plot_grid(plotlist = _, ncol = 5)

Looking at the variability of our predictors allows us to examine the variable distribution and flag any potential outlier values. Examining the variable distribution is especially important here since we are considering fitting a linear model, and linear models assume that our predictors are normally distributed. Outlier values can be problematic for all types of models if they have high influence and impact the shape of the relationship between our predictors and the outcome.

Transformations (e.g. log, Yeo Johnson) can be applied to normalize the distributional shape of predictors. Options for handling outliers include excluding outlier values that are clearly errors/invalid, retaining outliers that we have no evidence are incorrect, or bringing outlier values to the fence.

Almost all variables are very positively skewed, with a handful of variables (e.g. concave_points_se, perimeter_se, area_se… basically all _se variables) having a few points with unusually high values that appear separated from the distribution and may be outliers.

GLM vs KNN

Bootstrap splits

Split data_train into 100 bootstrap samples stratified on diagnosis.

set.seed(12345)

splits_boot <- data_train |> 
  bootstraps(times = 100, strata = "diagnosis") 

splits_boot

# Bootstrap sampling using stratification 
# A tibble: 100 × 2
    splits            id          
    <list>            <chr>       
  1 <split [282/106]> Bootstrap001
  2 <split [282/101]> Bootstrap002
  3 <split [282/104]> Bootstrap003
  4 <split [282/98]>  Bootstrap004
  5 <split [282/109]> Bootstrap005
  6 <split [282/91]>  Bootstrap006
  7 <split [282/103]> Bootstrap007
  8 <split [282/95]>  Bootstrap008
  9 <split [282/108]> Bootstrap009
 10 <split [282/101]> Bootstrap010
 11 <split [282/110]> Bootstrap011
 12 <split [282/100]> Bootstrap012
 13 <split [282/107]> Bootstrap013
 14 <split [282/106]> Bootstrap014
 15 <split [282/100]> Bootstrap015
 16 <split [282/98]>  Bootstrap016
 17 <split [282/108]> Bootstrap017
 18 <split [282/106]> Bootstrap018
 19 <split [282/114]> Bootstrap019
 20 <split [282/110]> Bootstrap020
 21 <split [282/108]> Bootstrap021
 22 <split [282/107]> Bootstrap022
 23 <split [282/103]> Bootstrap023
 24 <split [282/111]> Bootstrap024
 25 <split [282/101]> Bootstrap025
 26 <split [282/98]>  Bootstrap026
 27 <split [282/107]> Bootstrap027
 28 <split [282/102]> Bootstrap028
 29 <split [282/113]> Bootstrap029
 30 <split [282/104]> Bootstrap030
 31 <split [282/99]>  Bootstrap031
 32 <split [282/92]>  Bootstrap032
 33 <split [282/103]> Bootstrap033
 34 <split [282/100]> Bootstrap034
 35 <split [282/102]> Bootstrap035
 36 <split [282/93]>  Bootstrap036
 37 <split [282/99]>  Bootstrap037
 38 <split [282/97]>  Bootstrap038
 39 <split [282/101]> Bootstrap039
 40 <split [282/97]>  Bootstrap040
 41 <split [282/107]> Bootstrap041
 42 <split [282/106]> Bootstrap042
 43 <split [282/97]>  Bootstrap043
 44 <split [282/103]> Bootstrap044
 45 <split [282/119]> Bootstrap045
 46 <split [282/99]>  Bootstrap046
 47 <split [282/103]> Bootstrap047
 48 <split [282/99]>  Bootstrap048
 49 <split [282/100]> Bootstrap049
 50 <split [282/112]> Bootstrap050
 51 <split [282/101]> Bootstrap051
 52 <split [282/103]> Bootstrap052
 53 <split [282/115]> Bootstrap053
 54 <split [282/94]>  Bootstrap054
 55 <split [282/113]> Bootstrap055
 56 <split [282/100]> Bootstrap056
 57 <split [282/101]> Bootstrap057
 58 <split [282/100]> Bootstrap058
 59 <split [282/94]>  Bootstrap059
 60 <split [282/102]> Bootstrap060
 61 <split [282/105]> Bootstrap061
 62 <split [282/105]> Bootstrap062
 63 <split [282/111]> Bootstrap063
 64 <split [282/106]> Bootstrap064
 65 <split [282/100]> Bootstrap065
 66 <split [282/104]> Bootstrap066
 67 <split [282/98]>  Bootstrap067
 68 <split [282/107]> Bootstrap068
 69 <split [282/95]>  Bootstrap069
 70 <split [282/102]> Bootstrap070
 71 <split [282/108]> Bootstrap071
 72 <split [282/106]> Bootstrap072
 73 <split [282/101]> Bootstrap073
 74 <split [282/97]>  Bootstrap074
 75 <split [282/102]> Bootstrap075
 76 <split [282/98]>  Bootstrap076
 77 <split [282/100]> Bootstrap077
 78 <split [282/102]> Bootstrap078
 79 <split [282/109]> Bootstrap079
 80 <split [282/112]> Bootstrap080
 81 <split [282/108]> Bootstrap081
 82 <split [282/106]> Bootstrap082
 83 <split [282/112]> Bootstrap083
 84 <split [282/104]> Bootstrap084
 85 <split [282/103]> Bootstrap085
 86 <split [282/102]> Bootstrap086
 87 <split [282/102]> Bootstrap087
 88 <split [282/114]> Bootstrap088
 89 <split [282/107]> Bootstrap089
 90 <split [282/112]> Bootstrap090
 91 <split [282/100]> Bootstrap091
 92 <split [282/99]>  Bootstrap092
 93 <split [282/106]> Bootstrap093
 94 <split [282/103]> Bootstrap094
 95 <split [282/98]>  Bootstrap095
 96 <split [282/101]> Bootstrap096
 97 <split [282/110]> Bootstrap097
 98 <split [282/100]> Bootstrap098
 99 <split [282/108]> Bootstrap099
100 <split [282/97]>  Bootstrap100

Build recipes

rec_glm <- recipe(diagnosis ~ ., data = data_train)

rec_knn <- recipe(diagnosis ~ ., data = data_train) |> 
  step_range(all_predictors())

Fit GLM

fit_lr_boot <-
  logistic_reg() |> 
  set_engine("glm") |> 
  fit_resamples(preprocessor = rec_glm, 
                resamples = splits_boot, 
                metrics = metric_set(roc_auc))

Fitting our bootstrap splits in this way appears to work just fine. However, upon closer look we see that there were some warnings. First lets look for the warnings by calling the fit object directly:

fit_lr_boot

# Resampling results
# Bootstrap sampling using stratification 
# A tibble: 100 × 4
    splits            id           .metrics         .notes          
    <list>            <chr>        <list>           <list>          
  1 <split [282/106]> Bootstrap001 <tibble [1 × 4]> <tibble [1 × 3]>
  2 <split [282/101]> Bootstrap002 <tibble [1 × 4]> <tibble [1 × 3]>
  3 <split [282/104]> Bootstrap003 <tibble [1 × 4]> <tibble [1 × 3]>
  4 <split [282/98]>  Bootstrap004 <tibble [1 × 4]> <tibble [1 × 3]>
  5 <split [282/109]> Bootstrap005 <tibble [1 × 4]> <tibble [1 × 3]>
  6 <split [282/91]>  Bootstrap006 <tibble [1 × 4]> <tibble [1 × 3]>
  7 <split [282/103]> Bootstrap007 <tibble [1 × 4]> <tibble [1 × 3]>
  8 <split [282/95]>  Bootstrap008 <tibble [1 × 4]> <tibble [1 × 3]>
  9 <split [282/108]> Bootstrap009 <tibble [1 × 4]> <tibble [1 × 3]>
 10 <split [282/101]> Bootstrap010 <tibble [1 × 4]> <tibble [1 × 3]>
 11 <split [282/110]> Bootstrap011 <tibble [1 × 4]> <tibble [1 × 3]>
 12 <split [282/100]> Bootstrap012 <tibble [1 × 4]> <tibble [1 × 3]>
 13 <split [282/107]> Bootstrap013 <tibble [1 × 4]> <tibble [1 × 3]>
 14 <split [282/106]> Bootstrap014 <tibble [1 × 4]> <tibble [1 × 3]>
 15 <split [282/100]> Bootstrap015 <tibble [1 × 4]> <tibble [1 × 3]>
 16 <split [282/98]>  Bootstrap016 <tibble [1 × 4]> <tibble [1 × 3]>
 17 <split [282/108]> Bootstrap017 <tibble [1 × 4]> <tibble [1 × 3]>
 18 <split [282/106]> Bootstrap018 <tibble [1 × 4]> <tibble [1 × 3]>
 19 <split [282/114]> Bootstrap019 <tibble [1 × 4]> <tibble [1 × 3]>
 20 <split [282/110]> Bootstrap020 <tibble [1 × 4]> <tibble [1 × 3]>
 21 <split [282/108]> Bootstrap021 <tibble [1 × 4]> <tibble [1 × 3]>
 22 <split [282/107]> Bootstrap022 <tibble [1 × 4]> <tibble [1 × 3]>
 23 <split [282/103]> Bootstrap023 <tibble [1 × 4]> <tibble [1 × 3]>
 24 <split [282/111]> Bootstrap024 <tibble [1 × 4]> <tibble [1 × 3]>
 25 <split [282/101]> Bootstrap025 <tibble [1 × 4]> <tibble [1 × 3]>
 26 <split [282/98]>  Bootstrap026 <tibble [1 × 4]> <tibble [1 × 3]>
 27 <split [282/107]> Bootstrap027 <tibble [1 × 4]> <tibble [1 × 3]>
 28 <split [282/102]> Bootstrap028 <tibble [1 × 4]> <tibble [1 × 3]>
 29 <split [282/113]> Bootstrap029 <tibble [1 × 4]> <tibble [1 × 3]>
 30 <split [282/104]> Bootstrap030 <tibble [1 × 4]> <tibble [1 × 3]>
 31 <split [282/99]>  Bootstrap031 <tibble [1 × 4]> <tibble [1 × 3]>
 32 <split [282/92]>  Bootstrap032 <tibble [1 × 4]> <tibble [1 × 3]>
 33 <split [282/103]> Bootstrap033 <tibble [1 × 4]> <tibble [1 × 3]>
 34 <split [282/100]> Bootstrap034 <tibble [1 × 4]> <tibble [1 × 3]>
 35 <split [282/102]> Bootstrap035 <tibble [1 × 4]> <tibble [1 × 3]>
 36 <split [282/93]>  Bootstrap036 <tibble [1 × 4]> <tibble [1 × 3]>
 37 <split [282/99]>  Bootstrap037 <tibble [1 × 4]> <tibble [1 × 3]>
 38 <split [282/97]>  Bootstrap038 <tibble [1 × 4]> <tibble [1 × 3]>
 39 <split [282/101]> Bootstrap039 <tibble [1 × 4]> <tibble [1 × 3]>
 40 <split [282/97]>  Bootstrap040 <tibble [1 × 4]> <tibble [1 × 3]>
 41 <split [282/107]> Bootstrap041 <tibble [1 × 4]> <tibble [1 × 3]>
 42 <split [282/106]> Bootstrap042 <tibble [1 × 4]> <tibble [1 × 3]>
 43 <split [282/97]>  Bootstrap043 <tibble [1 × 4]> <tibble [1 × 3]>
 44 <split [282/103]> Bootstrap044 <tibble [1 × 4]> <tibble [1 × 3]>
 45 <split [282/119]> Bootstrap045 <tibble [1 × 4]> <tibble [1 × 3]>
 46 <split [282/99]>  Bootstrap046 <tibble [1 × 4]> <tibble [1 × 3]>
 47 <split [282/103]> Bootstrap047 <tibble [1 × 4]> <tibble [1 × 3]>
 48 <split [282/99]>  Bootstrap048 <tibble [1 × 4]> <tibble [1 × 3]>
 49 <split [282/100]> Bootstrap049 <tibble [1 × 4]> <tibble [1 × 3]>
 50 <split [282/112]> Bootstrap050 <tibble [1 × 4]> <tibble [1 × 3]>
 51 <split [282/101]> Bootstrap051 <tibble [1 × 4]> <tibble [1 × 3]>
 52 <split [282/103]> Bootstrap052 <tibble [1 × 4]> <tibble [1 × 3]>
 53 <split [282/115]> Bootstrap053 <tibble [1 × 4]> <tibble [1 × 3]>
 54 <split [282/94]>  Bootstrap054 <tibble [1 × 4]> <tibble [1 × 3]>
 55 <split [282/113]> Bootstrap055 <tibble [1 × 4]> <tibble [1 × 3]>
 56 <split [282/100]> Bootstrap056 <tibble [1 × 4]> <tibble [1 × 3]>
 57 <split [282/101]> Bootstrap057 <tibble [1 × 4]> <tibble [1 × 3]>
 58 <split [282/100]> Bootstrap058 <tibble [1 × 4]> <tibble [1 × 3]>
 59 <split [282/94]>  Bootstrap059 <tibble [1 × 4]> <tibble [1 × 3]>
 60 <split [282/102]> Bootstrap060 <tibble [1 × 4]> <tibble [1 × 3]>
 61 <split [282/105]> Bootstrap061 <tibble [1 × 4]> <tibble [1 × 3]>
 62 <split [282/105]> Bootstrap062 <tibble [1 × 4]> <tibble [1 × 3]>
 63 <split [282/111]> Bootstrap063 <tibble [1 × 4]> <tibble [1 × 3]>
 64 <split [282/106]> Bootstrap064 <tibble [1 × 4]> <tibble [1 × 3]>
 65 <split [282/100]> Bootstrap065 <tibble [1 × 4]> <tibble [1 × 3]>
 66 <split [282/104]> Bootstrap066 <tibble [1 × 4]> <tibble [1 × 3]>
 67 <split [282/98]>  Bootstrap067 <tibble [1 × 4]> <tibble [1 × 3]>
 68 <split [282/107]> Bootstrap068 <tibble [1 × 4]> <tibble [1 × 3]>
 69 <split [282/95]>  Bootstrap069 <tibble [1 × 4]> <tibble [1 × 3]>
 70 <split [282/102]> Bootstrap070 <tibble [1 × 4]> <tibble [1 × 3]>
 71 <split [282/108]> Bootstrap071 <tibble [1 × 4]> <tibble [1 × 3]>
 72 <split [282/106]> Bootstrap072 <tibble [1 × 4]> <tibble [1 × 3]>
 73 <split [282/101]> Bootstrap073 <tibble [1 × 4]> <tibble [1 × 3]>
 74 <split [282/97]>  Bootstrap074 <tibble [1 × 4]> <tibble [1 × 3]>
 75 <split [282/102]> Bootstrap075 <tibble [1 × 4]> <tibble [1 × 3]>
 76 <split [282/98]>  Bootstrap076 <tibble [1 × 4]> <tibble [1 × 3]>
 77 <split [282/100]> Bootstrap077 <tibble [1 × 4]> <tibble [1 × 3]>
 78 <split [282/102]> Bootstrap078 <tibble [1 × 4]> <tibble [1 × 3]>
 79 <split [282/109]> Bootstrap079 <tibble [1 × 4]> <tibble [1 × 3]>
 80 <split [282/112]> Bootstrap080 <tibble [1 × 4]> <tibble [1 × 3]>
 81 <split [282/108]> Bootstrap081 <tibble [1 × 4]> <tibble [1 × 3]>
 82 <split [282/106]> Bootstrap082 <tibble [1 × 4]> <tibble [1 × 3]>
 83 <split [282/112]> Bootstrap083 <tibble [1 × 4]> <tibble [1 × 3]>
 84 <split [282/104]> Bootstrap084 <tibble [1 × 4]> <tibble [1 × 3]>
 85 <split [282/103]> Bootstrap085 <tibble [1 × 4]> <tibble [1 × 3]>
 86 <split [282/102]> Bootstrap086 <tibble [1 × 4]> <tibble [1 × 3]>
 87 <split [282/102]> Bootstrap087 <tibble [1 × 4]> <tibble [1 × 3]>
 88 <split [282/114]> Bootstrap088 <tibble [1 × 4]> <tibble [1 × 3]>
 89 <split [282/107]> Bootstrap089 <tibble [1 × 4]> <tibble [1 × 3]>
 90 <split [282/112]> Bootstrap090 <tibble [1 × 4]> <tibble [1 × 3]>
 91 <split [282/100]> Bootstrap091 <tibble [1 × 4]> <tibble [1 × 3]>
 92 <split [282/99]>  Bootstrap092 <tibble [1 × 4]> <tibble [1 × 3]>
 93 <split [282/106]> Bootstrap093 <tibble [1 × 4]> <tibble [1 × 3]>
 94 <split [282/103]> Bootstrap094 <tibble [1 × 4]> <tibble [1 × 3]>
 95 <split [282/98]>  Bootstrap095 <tibble [1 × 4]> <tibble [1 × 3]>
 96 <split [282/101]> Bootstrap096 <tibble [1 × 4]> <tibble [1 × 3]>
 97 <split [282/110]> Bootstrap097 <tibble [1 × 4]> <tibble [1 × 3]>
 98 <split [282/100]> Bootstrap098 <tibble [1 × 4]> <tibble [1 × 3]>
 99 <split [282/108]> Bootstrap099 <tibble [1 × 4]> <tibble [1 × 3]>
100 <split [282/97]>  Bootstrap100 <tibble [1 × 4]> <tibble [1 × 3]>

There were issues with some computations:

  - Warning(s) x100: glm.fit: algorithm did not converge, glm.fit: fitted probabilitie...

Run `show_notes(.Last.tune.result)` for more information.

This tells us that we should use: show_notes(.Last.tune.result). So let’s run that:

show_notes(.Last.tune.result)

unique notes:
────────────────────────────────────────────────────────────────────────────────
glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred

In this case, it just recapitulates the same message that presented by calling the object. The bigger point here is: always inspect error and warning messages.

We are getting a warning message about our fitted probabilities. This occurs because the outcome is totally separable (remember that issue from our discussions of logistic regression earlier). This model can still be used for prediction but we wouldn’t be able to use the statistical tests of the parameter estimates. Those are not our focus here.

We are going to fit this model on the whole sample so we can show you why we get this warning message. When we aren’t bootstrapping, the warning is provided when we fit.

#make feature matrix
rec_prep <- rec_glm |> 
  prep(data_train)

feat_train_glm <- rec_prep |> 
  bake(data_train)


#fit on full training sample
fit_best_glm <- logistic_reg() |> 
  set_engine("glm", control = list(maxit = 100)) |> 
  fit(diagnosis ~ ., data = feat_train_glm)

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

fit_best_glm$fit |> summary()


Call:
stats::glm(formula = diagnosis ~ ., family = stats::binomial, 
    data = data, control = ~list(maxit = 100))

Coefficients:
                          Estimate Std. Error z value Pr(>|z|)
(Intercept)             -1.531e+02  1.708e+07       0        1
perimeter_se             1.707e+01  1.334e+06       0        1
fractal_dimension_mean   6.355e+02  1.607e+08       0        1
concave_points_worst     8.240e+01  1.631e+07       0        1
symmetry_mean            1.176e+02  1.443e+07       0        1
texture_se              -1.961e+01  3.189e+05       0        1
concave_points_se       -7.613e+03  1.292e+08       0        1
concavity_mean          -1.484e+03  2.391e+07       0        1
fractal_dimension_se     1.078e+04  3.475e+08       0        1
radius_worst            -8.568e+00  2.694e+06       0        1
concave_points_mean      2.814e+02  4.573e+07       0        1
radius_mean              2.524e+02  2.835e+06       0        1
smoothness_se            1.435e+04  1.270e+08       0        1
smoothness_worst        -7.010e+02  4.114e+07       0        1
symmetry_se              1.954e+03  3.878e+07       0        1
radius_se               -2.772e+02  1.306e+07       0        1
concavity_worst          5.805e+01  6.757e+06       0        1
concavity_se             1.542e+03  3.167e+07       0        1
compactness_se          -1.799e+03  4.536e+07       0        1
smoothness_mean         -1.274e+03  8.379e+07       0        1
area_se                  1.624e+00  2.984e+04       0        1
area_worst              -2.532e-01  1.314e+04       0        1
perimeter_mean          -3.272e+01  3.664e+05       0        1
compactness_mean         2.339e+03  3.687e+07       0        1
area_mean               -2.674e-01  1.237e+04       0        1
fractal_dimension_worst -1.091e+03  7.320e+07       0        1
texture_mean            -6.494e+00  1.553e+05       0        1
perimeter_worst          2.138e+00  2.153e+05       0        1
symmetry_worst          -1.873e+02  7.068e+06       0        1
texture_worst            4.069e+00  1.055e+05       0        1
compactness_worst        4.504e+01  1.423e+07       0        1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 2.4423e+02  on 281  degrees of freedom
Residual deviance: 2.6409e-10  on 251  degrees of freedom
AIC: 62

Number of Fisher Scoring iterations: 29

Let’s take a closer look at the predictions our model is making.

# probabilities in train
predict(fit_best_glm, feat_train_glm, type = "prob") |> 
  print(n = 50)

# A tibble: 282 × 2
   .pred_malignant .pred_benign
             <dbl>        <dbl>
 1        2.22e-16         1   
 2        2.22e-16         1   
 3        2.94e-12         1.00
 4        2.22e-16         1   
 5        2.22e-16         1   
 6        2.22e-16         1   
 7        2.22e-16         1   
 8        2.22e-16         1   
 9        2.22e-16         1   
10        2.22e-16         1   
11        3.18e-12         1.00
12        2.22e-16         1   
13        2.22e-16         1   
14        2.22e-16         1   
15        5.41e-12         1.00
16        2.22e-16         1   
17        2.22e-16         1   
18        2.22e-16         1   
19        1.22e-12         1.00
20        2.22e-16         1   
21        2.22e-16         1   
22        2.22e-16         1   
23        2.22e-16         1   
24        2.22e-16         1   
25        2.22e-16         1   
26        2.22e-16         1   
27        2.22e-16         1   
28        2.22e-16         1   
29        2.22e-16         1   
30        3.15e-12         1.00
31        2.22e-16         1   
32        2.22e-16         1   
33        2.22e-16         1   
34        2.22e-16         1   
35        2.22e-16         1   
36        2.22e-16         1   
37        2.22e-16         1   
38        2.22e-16         1   
39        2.22e-16         1   
40        2.22e-16         1   
41        2.22e-16         1   
42        2.22e-16         1   
43        2.22e-16         1   
44        2.22e-16         1   
45        2.22e-16         1   
46        2.22e-16         1   
47        2.22e-16         1   
48        2.22e-16         1   
49        2.22e-16         1   
50        2.22e-16         1   
# ℹ 232 more rows

# confusion matrix for train
cm_trn <- tibble(truth = feat_train_glm$diagnosis,
                 estimate = predict(fit_best_glm, feat_train_glm)$.pred_class) |> 
  conf_mat(truth, estimate)

cm_trn

           Truth
Prediction  malignant benign
  malignant        44      0
  benign            0    238

cm_trn |> 
  summary()

# A tibble: 13 × 3
   .metric              .estimator .estimate
   <chr>                <chr>          <dbl>
 1 accuracy             binary         1    
 2 kap                  binary         1    
 3 sens                 binary         1    
 4 spec                 binary         1    
 5 ppv                  binary         1    
 6 npv                  binary         1    
 7 mcc                  binary         1    
 8 j_index              binary         1    
 9 bal_accuracy         binary         1    
10 detection_prevalence binary         0.156
11 precision            binary         1    
12 recall               binary         1    
13 f_meas               binary         1

Here we can see the cause of our warning message: Our data is perfectly predicting the classes (“perfect separation”) of the outcome in train (as seen from the confusion matrix above). This likely occurs because we can predict the outcome really well with these features in held out data (see below) and there is some overfittng in training which moves us from very good prediction to perfect prediction in training. Since we have confirmed that the warning message makes sense to us here, it is OK to progress.

Print the average ROC AUC of your logistic regression model

collect_metrics(fit_lr_boot, summarize = TRUE)

# A tibble: 1 × 6
  .metric .estimator  mean     n std_err .config             
  <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
1 roc_auc binary     0.945   100 0.00398 Preprocessor1_Model1

You might be wondering why our performance is not perfect across bootstrapped samples (we got the error for each of the 100 bootstraps above, which means our model always predicted perfectly in train). For demonstration purposes, we will assess this model in test here as well.

#test feature matrix 
feat_test_glm <- rec_prep |> 
  bake(data_test)


# confusion matrix for test
cm_test <- tibble(truth = feat_test_glm$diagnosis,
                  estimate = predict(fit_best_glm, feat_test_glm)$.pred_class) |> 
  conf_mat(truth, estimate)

cm_test

           Truth
Prediction  malignant benign
  malignant        21      1
  benign            1    118

Performance is still pretty good here, but due to the ratio of number of predictors relative to our low training cases across bootstrapped folds, our models are likely able to perfectly fit our training data every time, but do not always echo this same performance in their relative test sets.

You should always check for these sorts of warnings by printing the model fit object.

Fit KNN

Set up a hyperparameter grid to consider a range of values for neighbors in your KNN models.
NOTE be sure that you are not considering values higher than the N of your training sample! In this dataset N = 282

hyper_grid <- expand.grid(neighbors = seq(1, 280, by = 10))

Fit a KNN model using the recipe you created and your bootstrap splits. Use ROC AUC (roc_auc) as your performance metric.

fit_knn_boot <-
  nearest_neighbor(neighbors = tune()) |> 
  set_engine("kknn") |> 
  set_mode("classification") |>
  tune_grid(preprocessor = rec_knn, 
            resamples = splits_boot, 
            grid = hyper_grid, 
            metrics = metric_set(roc_auc))

Generate a plot to help you determine if you considered a wide enough range of values for neighbors.

plot_hyperparameters(fit_knn_boot, hp1 = "neighbors", metric = "roc_auc")

Print the best value for the neighbors hyperparameter across resamples based on model ROC AUC.

select_best(fit_knn_boot)

# A tibble: 1 × 2
  neighbors .config              
      <dbl> <chr>                
1       121 Preprocessor1_Model13

Print the average ROC AUC of your best KNN regression model

collect_metrics(fit_knn_boot, summarize = TRUE) |> 
  filter(neighbors == select_best(fit_knn_boot)$neighbors)

# A tibble: 1 × 7
  neighbors .metric .estimator  mean     n std_err .config              
      <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
1       121 roc_auc binary     0.982   100 0.00173 Preprocessor1_Model13

Select and fit best model

Select based on average roc_auc. Our best model configuration was a KNN algorithm with 121 neighbors.

Create training and test feature matrices for our best KNN model

rec_prep <- rec_knn |> 
  prep(data_train)

feat_train <- rec_prep |> 
  bake(data_train)

feat_test <- rec_prep |> 
  bake(data_test)

Fit on the full training sample

fit_best_knn <- nearest_neighbor(neighbors = select_best(fit_knn_boot)$neighbors) |> 
  set_engine("kknn") |> 
  set_mode("classification") |> 
  fit(diagnosis ~ ., data = feat_train)

Evaluate best model

Plot the ROC of your best model in the test set.

roc_plot <- 
  tibble(truth = feat_test$diagnosis,
         prob = predict(fit_best_knn, feat_test, type = "prob")$.pred_malignant) |> 
  roc_curve(prob, truth = truth)

roc_plot |>
  ggplot(aes(x = 1 - specificity, y = sensitivity, color = .threshold)) +
  geom_path() +
  geom_abline(lty = 3) +
  coord_equal() +
  labs(x = "1 - Specificity (FPR)",
       y = "Sensitivity (TPR)")

Confusion matrix depicting performance in test

cm <- tibble(truth = feat_test$diagnosis,
             estimate = predict(fit_best_knn, feat_test)$.pred_class) |> 
  conf_mat(truth, estimate)


cm

           Truth
Prediction  malignant benign
  malignant        11      0
  benign           11    119

cm |> 
  summary()

# A tibble: 13 × 3
   .metric              .estimator .estimate
   <chr>                <chr>          <dbl>
 1 accuracy             binary        0.922 
 2 kap                  binary        0.628 
 3 sens                 binary        0.5   
 4 spec                 binary        1     
 5 ppv                  binary        1     
 6 npv                  binary        0.915 
 7 mcc                  binary        0.677 
 8 j_index              binary        0.5   
 9 bal_accuracy         binary        0.75  
10 detection_prevalence binary        0.0780
11 precision            binary        1     
12 recall               binary        0.5   
13 f_meas               binary        0.667

Make a plot of your confusion matrix

autoplot(cm)

Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best model in test

Let’s set up a tibble to track results across portions of the assignment.

#ROC AUC
auc <- tibble(truth = feat_test$diagnosis,
       prob = predict(fit_best_knn, feat_test, type = "prob")$.pred_malignant) |> 
  roc_auc(prob, truth = truth) |> 
  pull(.estimate)

#Accuracy, sensitivity, specificity, PPV, NPV
#Using pivot_wider() to make a tracking tibble
results <- cm |> 
  summary() |> 
  filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |> 
  select(-.estimator) |> 
  pivot_wider(names_from = .metric, values_from = .estimate) |> 
  mutate(model = str_c("knn_", fit_best_knn$fit$best.parameters$k),
         roc_auc = auc) |> 
  select(model, roc_auc, everything())

print_kbl(results, height = "100px")

model	roc_auc	accuracy	sens	spec	ppv	npv
knn_121	1	0.92	0.5	1	1	0.92

Part 2: Addressing class imbalance

Since only 15% of our cases our malignant, let’s see if we can achieve higher sensitivity by up-sampling our data with SMOTE.

Build recipes

Update your previous recipes to up-sample the minority class (malignant) in diagnosis using step_smote(). Remember to make 2 recipes (one for glm, one for KNN)

rec_glm_up <- recipe(diagnosis ~ ., data = data_train) |> 
  themis::step_smote(diagnosis, over_ratio = 1, neighbors = 5)

rec_knn_up <- recipe(diagnosis ~ ., data = data_train) |> 
  themis::step_smote(diagnosis, over_ratio = 1, neighbors = 5) |> 
  step_range(all_predictors())

Fit GLM

Fit an up-sampled logistic regression classifier

fit_lr_boot_up <-
  logistic_reg() |> 
  set_engine("glm",  control = list(maxit = 100)) |> 
  fit_resamples(preprocessor = rec_glm_up, 
                resamples = splits_boot, 
                metrics = metric_set(roc_auc))

We are seeing the same problem as before.

show_notes(.Last.tune.result)

unique notes:
─────────────────────────────────────────────────────────
glm.fit: fitted probabilities numerically 0 or 1 occurred

Print the average ROC AUC of your logistic regression model

collect_metrics(fit_lr_boot_up, summarize = TRUE)

# A tibble: 1 × 6
  .metric .estimator  mean     n std_err .config             
  <chr>   <chr>      <dbl> <int>   <dbl> <chr>               
1 roc_auc binary     0.916   100 0.00469 Preprocessor1_Model1

3) Fit KNN

Set up a hyperparameter grid to consider a range of values for neighbors in your KNN models.

hyper_grid <- expand.grid(neighbors = seq(1, 280, by = 10))

Fit an up-sampled KNN using the new KNN recipe you created and your bootstrap splits. Use ROC AUC as your performance metric.

fit_knn_boot_up <-
  nearest_neighbor(neighbors = tune()) |> 
  set_engine("kknn") |> 
  set_mode("classification") |>
  tune_grid(preprocessor = rec_knn_up, 
            resamples = splits_boot, 
            grid = hyper_grid, 
            metrics = metric_set(roc_auc))

Generate a plot to help you determine if you considered a wide enough range of values for neighbors.

plot_hyperparameters(fit_knn_boot_up, hp1 = "neighbors", metric = "roc_auc")

Print the best value for the neighbors hyperparameter across resamples based on model ROC AUC.

select_best(fit_knn_boot_up)

# A tibble: 1 × 2
  neighbors .config              
      <dbl> <chr>                
1        71 Preprocessor1_Model08

Print the average ROC AUC of your best KNN regression model

collect_metrics(fit_knn_boot_up, summarize = TRUE) |> 
  filter(neighbors == select_best(fit_knn_boot_up)$neighbors)

# A tibble: 1 × 7
  neighbors .metric .estimator  mean     n std_err .config              
      <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>                
1        71 roc_auc binary     0.978   100 0.00186 Preprocessor1_Model08

Select and fit best model

Create the up-sampled training feature matrix using your best recipe (for us this is KNN). Remember, do not upsample your test data!

rec_prep <- rec_knn_up |> 
  prep(data_train)

feat_train_up <- rec_prep |> 
  bake(new_data = NULL)

Fit your best performing up-sampled model on the full training sample

fit_best_knn_up <- nearest_neighbor(neighbors = select_best(fit_knn_boot_up)$neighbors) |> 
  set_engine("kknn") |> 
  set_mode("classification") |> 
  fit(diagnosis ~ ., data = feat_train_up)

5) Evaluate best model

Make a figure to plot the ROC of your best up-sampled model in the test set

roc_plot_up <- 
  tibble(truth = feat_test$diagnosis,
         prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |> 
  roc_curve(prob, truth = truth)

roc_plot_up |>
  ggplot(aes(x = 1 - specificity, y = sensitivity, color = .threshold)) +
  geom_path() +
  geom_abline(lty = 3) +
  coord_equal() +
  labs(x = "1 - Specificity (FPR)",
       y = "Sensitivity (TPR)")

Generate a confusion matrix depicting your up-sampled model’s performance in test

cm_up <- tibble(truth = feat_test$diagnosis,
             estimate = predict(fit_best_knn_up, feat_test)$.pred_class) |> 
  conf_mat(truth, estimate)

cm_up

           Truth
Prediction  malignant benign
  malignant        21      2
  benign            1    117

Make a plot of your confusion matrix

autoplot(cm_up)

Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best up-sampled model in the held out test set

#ROC AUC
auc <- tibble(truth = feat_test$diagnosis,
       prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |> 
  roc_auc(prob, truth = truth) |> 
  pull(.estimate)

#Accuracy, sensitivity, specificity, PPV, NPV
results_up <- cm_up |> 
  summary() |> 
  filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |> 
  select(-.estimator) |> 
  pivot_wider(names_from = .metric, values_from = .estimate) |> 
  mutate(model = str_c("knn_smote_", fit_best_knn_up$fit$best.parameters$k),
         roc_auc = auc) |> 
  select(model, roc_auc, everything())

Joining results with our tibble from Part 1 for comparison

results <- results |> 
  bind_rows(results_up)

print_kbl(results, height = "150px")

model	roc_auc	accuracy	sens	spec	ppv	npv
knn_121	1	0.92	0.50	1.00	1.00	0.92
knn_smote_71	1	0.98	0.95	0.98	0.91	0.99

We get improvement with SMOTE! Increased sensitivity without much drop in specificity.

Part 3: New Classification Threshold

Now you want to check if there may be an additional benefit for your model’s performance if you adjust the classification threshold from its default 50% to a threshold of 40%

Adjust classification threshold to 40%

preds <- tibble(truth = feat_test$diagnosis,
                prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |> 
    mutate(estimate_40 = if_else(prob > .40, "malignant", "benign"),
         estimate_40 = factor(estimate_40, levels = c("malignant", "benign"))) 

preds

# A tibble: 141 × 3
    truth         prob estimate_40
    <fct>        <dbl> <fct>      
  1 benign    0.0142   benign     
  2 benign    0.0144   benign     
  3 malignant 1        malignant  
  4 benign    0.120    benign     
  5 malignant 0.956    malignant  
  6 benign    0        benign     
  7 benign    0.285    benign     
  8 benign    0.0139   benign     
  9 benign    0.00253  benign     
 10 benign    0        benign     
 11 benign    0.281    benign     
 12 benign    0.0258   benign     
 13 benign    0.249    benign     
 14 benign    0        benign     
 15 benign    0.0345   benign     
 16 benign    0.351    benign     
 17 benign    0.0169   benign     
 18 benign    0.304    benign     
 19 benign    0.264    benign     
 20 benign    0        benign     
 21 benign    0.0489   benign     
 22 malignant 0.552    malignant  
 23 benign    0.585    malignant  
 24 benign    0.212    benign     
 25 benign    0.0287   benign     
 26 benign    0.00512  benign     
 27 malignant 1        malignant  
 28 benign    0.119    benign     
 29 benign    0.450    malignant  
 30 benign    0.0573   benign     
 31 benign    0.00615  benign     
 32 benign    0.00718  benign     
 33 benign    0        benign     
 34 benign    0        benign     
 35 benign    0.0661   benign     
 36 benign    0        benign     
 37 benign    0.0644   benign     
 38 benign    0.00732  benign     
 39 malignant 0.996    malignant  
 40 benign    0.352    benign     
 41 benign    0.0113   benign     
 42 benign    0.0901   benign     
 43 benign    0.113    benign     
 44 benign    0        benign     
 45 benign    0.0785   benign     
 46 malignant 0.896    malignant  
 47 benign    0.0880   benign     
 48 malignant 0.922    malignant  
 49 benign    0.181    benign     
 50 benign    0.000859 benign     
 51 malignant 1        malignant  
 52 benign    0.184    benign     
 53 benign    0        benign     
 54 benign    0.0937   benign     
 55 benign    0.0935   benign     
 56 benign    0.000321 benign     
 57 benign    0        benign     
 58 benign    0.0455   benign     
 59 malignant 0.933    malignant  
 60 benign    0        benign     
 61 benign    0.0698   benign     
 62 benign    0.0425   benign     
 63 benign    0.603    malignant  
 64 benign    0        benign     
 65 malignant 1        malignant  
 66 benign    0        benign     
 67 benign    0.0459   benign     
 68 benign    0.0101   benign     
 69 benign    0        benign     
 70 benign    0.355    benign     
 71 benign    0.0163   benign     
 72 benign    0.0421   benign     
 73 benign    0.0552   benign     
 74 malignant 0.995    malignant  
 75 malignant 0.368    benign     
 76 benign    0        benign     
 77 benign    0.253    benign     
 78 benign    0.187    benign     
 79 benign    0        benign     
 80 benign    0.0446   benign     
 81 benign    0.0651   benign     
 82 malignant 0.947    malignant  
 83 benign    0.260    benign     
 84 malignant 1        malignant  
 85 malignant 0.949    malignant  
 86 benign    0.224    benign     
 87 benign    0.00242  benign     
 88 benign    0.00543  benign     
 89 benign    0.00974  benign     
 90 benign    0.00793  benign     
 91 benign    0.0618   benign     
 92 malignant 0.944    malignant  
 93 benign    0        benign     
 94 benign    0        benign     
 95 benign    0.230    benign     
 96 benign    0.0260   benign     
 97 benign    0.103    benign     
 98 benign    0.0146   benign     
 99 benign    0.0437   benign     
100 benign    0.181    benign     
101 benign    0        benign     
102 malignant 0.748    malignant  
103 benign    0.380    benign     
104 benign    0        benign     
105 benign    0.274    benign     
106 benign    0.0431   benign     
107 benign    0        benign     
108 malignant 1        malignant  
109 benign    0.0742   benign     
110 benign    0.449    malignant  
111 benign    0.00891  benign     
112 malignant 0.901    malignant  
113 benign    0        benign     
114 benign    0        benign     
115 benign    0.148    benign     
116 benign    0        benign     
117 benign    0        benign     
118 benign    0        benign     
119 benign    0        benign     
120 benign    0.00751  benign     
121 benign    0.000538 benign     
122 benign    0.261    benign     
123 benign    0.125    benign     
124 benign    0.0479   benign     
125 benign    0.0583   benign     
126 malignant 1        malignant  
127 benign    0.0161   benign     
128 benign    0.0992   benign     
129 benign    0.369    benign     
130 malignant 1        malignant  
131 benign    0        benign     
132 benign    0.0818   benign     
133 benign    0.123    benign     
134 benign    0.108    benign     
135 benign    0.0629   benign     
136 benign    0        benign     
137 benign    0.00798  benign     
138 malignant 0.831    malignant  
139 benign    0.0230   benign     
140 benign    0.0261   benign     
141 benign    0        benign

Evaluate model at new threshold

Generate a confusion matrix depicting your up-sampled model’s performance in test at your new threshold

cm_40 <- preds |> 
  conf_mat(truth = truth, estimate = estimate_40)

cm_40

           Truth
Prediction  malignant benign
  malignant        21      4
  benign            1    115

Make a plot of your confusion matrix

autoplot(cm_40)

Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best up-sampled model in the held out test set

#ROC AUC
auc <- tibble(truth = feat_test$diagnosis,
                prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |> 
  roc_auc(prob, truth = truth) |> 
  pull(.estimate)

#Accuracy, sensitivity, specificity, PPV, NPV
results_40 <- cm_40 |> 
  summary() |> 
  filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |> 
  select(-.estimator) |> 
  pivot_wider(names_from = .metric, values_from = .estimate) |> 
  mutate(model = str_c("knn_smote_", fit_best_knn_up$fit$best.parameters$k, "_thresh_40"),
         roc_auc = auc) |> 
  select(model, roc_auc, everything())

Join results with tibble from part 1 and 2 results for comparison

results <- results |> 
  bind_rows(results_40)


print_kbl(results, height = "200px")

model	roc_auc	accuracy	sens	spec	ppv	npv
knn_121	1	0.92	0.50	1.00	1.00	0.92
knn_smote_71	1	0.98	0.95	0.98	0.91	0.99
knn_smote_71_thresh_40	1	0.96	0.95	0.97	0.84	0.99

Adjusting the threshold did not help performance. Sensitivity stayed the same and specificity decreased! If we look at histograms below we see why this is. The black line shows a threshold of .5 (pretty good separation of classes). On the other hand a threshold of .4 creates more false positives (benign cases labeled as malignant) without capturing any additional true positives.

ggplot(data = preds, aes(x = prob)) + 
  geom_histogram(bins = 15) +
  facet_wrap(vars(truth), nrow = 2) +
  xlab("Pr(Disease)") +
  geom_vline(xintercept = .5) +
  geom_vline(xintercept = .4, color = "red")

Lets see what would happen if we changed threshold to .3.

ggplot(data = preds, aes(x = prob)) + 
  geom_histogram(bins = 15) +
  facet_wrap(vars(truth), nrow = 2) +
  xlab("Pr(Disease)") +
  geom_vline(xintercept = .3, color = "red")

preds <- tibble(truth = feat_test$diagnosis,
                prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |> 
    mutate(estimate_30 = if_else(prob > .30, "malignant", "benign"),
         estimate_30 = factor(estimate_30, levels = c("malignant", "benign"))) 

cm_30 <- preds |> 
  conf_mat(truth = truth, estimate = estimate_30)


results_30 <- cm_30 |> 
  summary() |> 
  filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |> 
  select(-.estimator) |> 
  pivot_wider(names_from = .metric, values_from = .estimate) |> 
  mutate(model = str_c("knn_smote_", fit_best_knn_up$fit$best.parameters$k, "_thresh_30"),
         roc_auc = auc) |> 
  select(model, roc_auc, everything())

results <- results |> 
  bind_rows(results_30)


print_kbl(results, height = "200px")

model	roc_auc	accuracy	sens	spec	ppv	npv
knn_121	1	0.92	0.50	1.00	1.00	0.92
knn_smote_71	1	0.98	0.95	0.98	0.91	0.99
knn_smote_71_thresh_40	1	0.96	0.95	0.97	0.84	0.99
knn_smote_71_thresh_30	1	0.93	1.00	0.92	0.69	1.00

We now have perfect sensitivity because we correctly predict all malignant cases as positive. However we now have lower specificity and ppv. However, this may be appropriate for this case since it would be better to falsely label benign cases as malignant as opposed to missing malignant cases!

We can also go back and look at our KNN model that was not upsampled.

With the default threshold of .5, what do you notice? Does this explain why we had perfect PPV and not very good sensitivity?

preds <- tibble(truth = feat_test$diagnosis,
                prob = predict(fit_best_knn, feat_test, type = "prob")$.pred_malignant)

ggplot(data = preds, aes(x = prob)) + 
  geom_histogram(bins = 15) +
  facet_wrap(vars(truth), nrow = 2) +
  xlab("Pr(Disease)") +
  geom_vline(xintercept = .5, color = "red")