options(conflicts.policy = "depends.ok")
::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")
devtoolstidymodels_conflictRules()
Homework Unit 8: Advanced Performance Metrics
Introduction
This file serves as the answer key for the Unit_08 homework. Unit 8 Advanced Performance Metrics in the course web book contains all materials required for this assignment.
In this assignment, we demonstrate how the selected performance metric and the outcome balance affect our selection and evaluation of the best performing model.
Setup
Handle conflicts
Load required packages
library(tidyverse)
library(tidymodels)
Source function scripts (John’s or your own)
::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_plots.R?raw=true")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_eda.R?raw=true") devtools
Specify other global settings
Since we are going to use cache_rds()
, we are also going to include rerun_setting <- FALSE
in this chunk
theme_set(theme_classic())
options(tibble.width = Inf, dplyr.print_max=Inf)
Paths
<- "homework/unit_08" path_data
Set up parallel processing
Note you can type cl
into your console to see how many cores your computer has.
<- parallel::makePSOCKcluster(parallel::detectCores(logical = FALSE))
cl ::registerDoParallel(cl) doParallel
Read in data
Read in the breast_cancer.csv
data file and save as an object called data_all
, perform any checks (i.e., light cleaning EDA) needed on the data and set the outcome diagnosis
as a factor with malignant as the second level.
<- read_csv(here::here(path_data, "breast_cancer.csv"), col_types = cols()) |>
data_all glimpse()
Rows: 423
Columns: 31
$ diagnosis <chr> "malignant", "benign", "benign", "malignant", …
$ perimeter_se <dbl> 1.9740, 1.6670, 1.4890, 2.9890, 2.6840, 1.4450…
$ fractal_dimension_mean <dbl> 0.05986, 0.06320, 0.05828, 0.06768, 0.05934, 0…
$ concave_points_worst <dbl> 0.12520, 0.11050, 0.03002, 0.20270, 0.06544, 0…
$ symmetry_mean <dbl> 0.1594, 0.1886, 0.1845, 0.2157, 0.1834, 0.1514…
$ texture_se <dbl> 0.3621, 0.7339, 1.6470, 0.9489, 0.8429, 1.0660…
$ concave_points_se <dbl> 0.008260, 0.013040, 0.003419, 0.012710, 0.0091…
$ concavity_mean <dbl> 0.075500, 0.070970, 0.004967, 0.169000, 0.0263…
$ fractal_dimension_se <dbl> 0.002881, 0.001982, 0.002534, 0.003884, 0.0014…
$ radius_worst <dbl> 17.770, 12.640, 12.360, 18.810, 12.970, 14.200…
$ concave_points_mean <dbl> 0.040790, 0.044970, 0.006434, 0.089230, 0.0206…
$ radius_mean <dbl> 15.120, 11.610, 11.220, 14.870, 11.500, 12.620…
$ smoothness_se <dbl> 0.005472, 0.005884, 0.004359, 0.006985, 0.0063…
$ smoothness_worst <dbl> 0.14910, 0.14150, 0.09994, 0.18780, 0.11830, 0…
$ symmetry_se <dbl> 0.01523, 0.01848, 0.01916, 0.01602, 0.02292, 0…
$ radius_se <dbl> 0.2711, 0.2456, 0.2239, 0.4266, 0.3927, 0.2449…
$ concavity_worst <dbl> 0.33270, 0.23020, 0.02318, 0.47040, 0.08105, 0…
$ concavity_se <dbl> 0.020390, 0.026310, 0.003223, 0.030110, 0.0124…
$ compactness_se <dbl> 0.019190, 0.020050, 0.006813, 0.025630, 0.0106…
$ smoothness_mean <dbl> 0.08876, 0.10880, 0.07780, 0.11620, 0.09345, 0…
$ area_se <dbl> 26.440, 15.890, 15.460, 41.180, 26.990, 18.510…
$ area_worst <dbl> 989.5, 475.7, 470.9, 1095.0, 508.9, 624.0, 544…
$ perimeter_mean <dbl> 98.78, 75.46, 70.79, 98.64, 73.28, 81.35, 79.0…
$ compactness_mean <dbl> 0.09588, 0.11680, 0.03574, 0.16490, 0.05991, 0…
$ area_mean <dbl> 716.6, 408.2, 386.8, 682.5, 407.4, 496.4, 466.…
$ fractal_dimension_worst <dbl> 0.09740, 0.07427, 0.07307, 0.10650, 0.06487, 0…
$ texture_mean <dbl> 16.68, 16.02, 33.81, 16.67, 18.45, 23.97, 18.5…
$ perimeter_worst <dbl> 117.70, 81.93, 78.44, 127.10, 83.12, 90.67, 85…
$ symmetry_worst <dbl> 0.3415, 0.2787, 0.2911, 0.3585, 0.2740, 0.2826…
$ texture_worst <dbl> 20.24, 19.67, 41.78, 27.37, 22.46, 31.31, 27.4…
$ compactness_worst <dbl> 0.33310, 0.21700, 0.06885, 0.44800, 0.10490, 0…
|>
data_all skim_some()
Name | data_all |
Number of rows | 423 |
Number of columns | 31 |
_______________________ | |
Column type frequency: | |
character | 1 |
numeric | 30 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
diagnosis | 0 | 1 | 6 | 9 | 0 | 2 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | p0 | p100 |
---|---|---|---|---|
perimeter_se | 0 | 1 | 0.76 | 21.98 |
fractal_dimension_mean | 0 | 1 | 0.05 | 0.10 |
concave_points_worst | 0 | 1 | 0.00 | 0.29 |
symmetry_mean | 0 | 1 | 0.11 | 0.29 |
texture_se | 0 | 1 | 0.36 | 4.88 |
concave_points_se | 0 | 1 | 0.00 | 0.05 |
concavity_mean | 0 | 1 | 0.00 | 0.43 |
fractal_dimension_se | 0 | 1 | 0.00 | 0.03 |
radius_worst | 0 | 1 | 7.93 | 33.13 |
concave_points_mean | 0 | 1 | 0.00 | 0.19 |
radius_mean | 0 | 1 | 6.98 | 28.11 |
smoothness_se | 0 | 1 | 0.00 | 0.02 |
smoothness_worst | 0 | 1 | 0.07 | 0.21 |
symmetry_se | 0 | 1 | 0.01 | 0.08 |
radius_se | 0 | 1 | 0.11 | 2.87 |
concavity_worst | 0 | 1 | 0.00 | 1.25 |
concavity_se | 0 | 1 | 0.00 | 0.40 |
compactness_se | 0 | 1 | 0.00 | 0.11 |
smoothness_mean | 0 | 1 | 0.05 | 0.16 |
area_se | 0 | 1 | 6.80 | 525.60 |
area_worst | 0 | 1 | 185.20 | 3432.00 |
perimeter_mean | 0 | 1 | 43.79 | 188.50 |
compactness_mean | 0 | 1 | 0.02 | 0.35 |
area_mean | 0 | 1 | 143.50 | 2499.00 |
fractal_dimension_worst | 0 | 1 | 0.06 | 0.17 |
texture_mean | 0 | 1 | 9.71 | 39.28 |
perimeter_worst | 0 | 1 | 50.41 | 229.30 |
symmetry_worst | 0 | 1 | 0.16 | 0.66 |
texture_worst | 0 | 1 | 12.02 | 49.54 |
compactness_worst | 0 | 1 | 0.03 | 0.87 |
<- data_all |>
data_all mutate(diagnosis = factor(diagnosis, levels = c("benign", "malignant"))) |>
glimpse()
Rows: 423
Columns: 31
$ diagnosis <fct> malignant, benign, benign, malignant, benign, …
$ perimeter_se <dbl> 1.9740, 1.6670, 1.4890, 2.9890, 2.6840, 1.4450…
$ fractal_dimension_mean <dbl> 0.05986, 0.06320, 0.05828, 0.06768, 0.05934, 0…
$ concave_points_worst <dbl> 0.12520, 0.11050, 0.03002, 0.20270, 0.06544, 0…
$ symmetry_mean <dbl> 0.1594, 0.1886, 0.1845, 0.2157, 0.1834, 0.1514…
$ texture_se <dbl> 0.3621, 0.7339, 1.6470, 0.9489, 0.8429, 1.0660…
$ concave_points_se <dbl> 0.008260, 0.013040, 0.003419, 0.012710, 0.0091…
$ concavity_mean <dbl> 0.075500, 0.070970, 0.004967, 0.169000, 0.0263…
$ fractal_dimension_se <dbl> 0.002881, 0.001982, 0.002534, 0.003884, 0.0014…
$ radius_worst <dbl> 17.770, 12.640, 12.360, 18.810, 12.970, 14.200…
$ concave_points_mean <dbl> 0.040790, 0.044970, 0.006434, 0.089230, 0.0206…
$ radius_mean <dbl> 15.120, 11.610, 11.220, 14.870, 11.500, 12.620…
$ smoothness_se <dbl> 0.005472, 0.005884, 0.004359, 0.006985, 0.0063…
$ smoothness_worst <dbl> 0.14910, 0.14150, 0.09994, 0.18780, 0.11830, 0…
$ symmetry_se <dbl> 0.01523, 0.01848, 0.01916, 0.01602, 0.02292, 0…
$ radius_se <dbl> 0.2711, 0.2456, 0.2239, 0.4266, 0.3927, 0.2449…
$ concavity_worst <dbl> 0.33270, 0.23020, 0.02318, 0.47040, 0.08105, 0…
$ concavity_se <dbl> 0.020390, 0.026310, 0.003223, 0.030110, 0.0124…
$ compactness_se <dbl> 0.019190, 0.020050, 0.006813, 0.025630, 0.0106…
$ smoothness_mean <dbl> 0.08876, 0.10880, 0.07780, 0.11620, 0.09345, 0…
$ area_se <dbl> 26.440, 15.890, 15.460, 41.180, 26.990, 18.510…
$ area_worst <dbl> 989.5, 475.7, 470.9, 1095.0, 508.9, 624.0, 544…
$ perimeter_mean <dbl> 98.78, 75.46, 70.79, 98.64, 73.28, 81.35, 79.0…
$ compactness_mean <dbl> 0.09588, 0.11680, 0.03574, 0.16490, 0.05991, 0…
$ area_mean <dbl> 716.6, 408.2, 386.8, 682.5, 407.4, 496.4, 466.…
$ fractal_dimension_worst <dbl> 0.09740, 0.07427, 0.07307, 0.10650, 0.06487, 0…
$ texture_mean <dbl> 16.68, 16.02, 33.81, 16.67, 18.45, 23.97, 18.5…
$ perimeter_worst <dbl> 117.70, 81.93, 78.44, 127.10, 83.12, 90.67, 85…
$ symmetry_worst <dbl> 0.3415, 0.2787, 0.2911, 0.3585, 0.2740, 0.2826…
$ texture_worst <dbl> 20.24, 19.67, 41.78, 27.37, 22.46, 31.31, 27.4…
$ compactness_worst <dbl> 0.33310, 0.21700, 0.06885, 0.44800, 0.10490, 0…
|>
data_all skim_some() |>
::yank("factor") skimr
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
diagnosis | 0 | 1 | FALSE | 2 | ben: 357, mal: 66 |
Print a table to see the balance of positive and negative cases of your outcome variable.
|>
data_all ::tabyl(diagnosis) janitor
diagnosis n percent
benign 357 0.8439716
malignant 66 0.1560284
Our outcome variable is unbalanced (357:66). This could create problems with the Positive Predictive Value of our model and yield lower sensitivity (i.e., true positives).
Split data into train and test
Hold out 1/3 of the data as a test set for evaluation using the initial_split()
function. Use the provided seed. Stratify on diagnosis.
set.seed(12345)
<- data_all |>
splits_test initial_split(prop = 2/3, strata = "diagnosis")
<- splits_test |>
data_train analysis()
<- splits_test |>
data_test assessment()
Light modeling EDA
Look at correlations between predictors in data_train
.
|>
data_train select(where(is.numeric)) |>
cor() |>
::corrplot(type = "upper") corrplot
Visualize the variance of your predictors in data_train
.
|>
data_train select(where(is.numeric)) |>
names() |>
map(\(name) plot_box_violin(df = data_train, x = name)) |>
::plot_grid(plotlist = _, ncol = 5) cowplot
Looking at the variability of our predictors allows us to examine the variable distribution and flag any potential outlier values. Examining the variable distribution is especially important here since we are considering fitting a linear model, and linear models assume that our predictors are normally distributed. Outlier values can be problematic for all types of models if they have high influence and impact the shape of the relationship between our predictors and the outcome.
Transformations (e.g. log, Yeo Johnson) can be applied to normalize the distributional shape of predictors. Options for handling outliers include excluding outlier values that are clearly errors/invalid, retaining outliers that we have no evidence are incorrect, or bringing outlier values to the fence.
Almost all variables are very positively skewed, with a handful of variables (e.g. concave_points_se
, perimeter_se
, area_se
… basically all _se
variables) having a few points with unusually high values that appear separated from the distribution and may be outliers.
GLM vs KNN
Bootstrap splits
Split data_train
into 100 bootstrap samples stratified on diagnosis.
set.seed(12345)
<- data_train |>
splits_boot bootstraps(times = 100, strata = "diagnosis")
splits_boot
# Bootstrap sampling using stratification
# A tibble: 100 × 2
splits id
<list> <chr>
1 <split [282/106]> Bootstrap001
2 <split [282/101]> Bootstrap002
3 <split [282/104]> Bootstrap003
4 <split [282/98]> Bootstrap004
5 <split [282/109]> Bootstrap005
6 <split [282/91]> Bootstrap006
7 <split [282/103]> Bootstrap007
8 <split [282/95]> Bootstrap008
9 <split [282/108]> Bootstrap009
10 <split [282/101]> Bootstrap010
11 <split [282/110]> Bootstrap011
12 <split [282/100]> Bootstrap012
13 <split [282/107]> Bootstrap013
14 <split [282/106]> Bootstrap014
15 <split [282/100]> Bootstrap015
16 <split [282/98]> Bootstrap016
17 <split [282/108]> Bootstrap017
18 <split [282/106]> Bootstrap018
19 <split [282/114]> Bootstrap019
20 <split [282/110]> Bootstrap020
21 <split [282/108]> Bootstrap021
22 <split [282/107]> Bootstrap022
23 <split [282/103]> Bootstrap023
24 <split [282/111]> Bootstrap024
25 <split [282/101]> Bootstrap025
26 <split [282/98]> Bootstrap026
27 <split [282/107]> Bootstrap027
28 <split [282/102]> Bootstrap028
29 <split [282/113]> Bootstrap029
30 <split [282/104]> Bootstrap030
31 <split [282/99]> Bootstrap031
32 <split [282/92]> Bootstrap032
33 <split [282/103]> Bootstrap033
34 <split [282/100]> Bootstrap034
35 <split [282/102]> Bootstrap035
36 <split [282/93]> Bootstrap036
37 <split [282/99]> Bootstrap037
38 <split [282/97]> Bootstrap038
39 <split [282/101]> Bootstrap039
40 <split [282/97]> Bootstrap040
41 <split [282/107]> Bootstrap041
42 <split [282/106]> Bootstrap042
43 <split [282/97]> Bootstrap043
44 <split [282/103]> Bootstrap044
45 <split [282/119]> Bootstrap045
46 <split [282/99]> Bootstrap046
47 <split [282/103]> Bootstrap047
48 <split [282/99]> Bootstrap048
49 <split [282/100]> Bootstrap049
50 <split [282/112]> Bootstrap050
51 <split [282/101]> Bootstrap051
52 <split [282/103]> Bootstrap052
53 <split [282/115]> Bootstrap053
54 <split [282/94]> Bootstrap054
55 <split [282/113]> Bootstrap055
56 <split [282/100]> Bootstrap056
57 <split [282/101]> Bootstrap057
58 <split [282/100]> Bootstrap058
59 <split [282/94]> Bootstrap059
60 <split [282/102]> Bootstrap060
61 <split [282/105]> Bootstrap061
62 <split [282/105]> Bootstrap062
63 <split [282/111]> Bootstrap063
64 <split [282/106]> Bootstrap064
65 <split [282/100]> Bootstrap065
66 <split [282/104]> Bootstrap066
67 <split [282/98]> Bootstrap067
68 <split [282/107]> Bootstrap068
69 <split [282/95]> Bootstrap069
70 <split [282/102]> Bootstrap070
71 <split [282/108]> Bootstrap071
72 <split [282/106]> Bootstrap072
73 <split [282/101]> Bootstrap073
74 <split [282/97]> Bootstrap074
75 <split [282/102]> Bootstrap075
76 <split [282/98]> Bootstrap076
77 <split [282/100]> Bootstrap077
78 <split [282/102]> Bootstrap078
79 <split [282/109]> Bootstrap079
80 <split [282/112]> Bootstrap080
81 <split [282/108]> Bootstrap081
82 <split [282/106]> Bootstrap082
83 <split [282/112]> Bootstrap083
84 <split [282/104]> Bootstrap084
85 <split [282/103]> Bootstrap085
86 <split [282/102]> Bootstrap086
87 <split [282/102]> Bootstrap087
88 <split [282/114]> Bootstrap088
89 <split [282/107]> Bootstrap089
90 <split [282/112]> Bootstrap090
91 <split [282/100]> Bootstrap091
92 <split [282/99]> Bootstrap092
93 <split [282/106]> Bootstrap093
94 <split [282/103]> Bootstrap094
95 <split [282/98]> Bootstrap095
96 <split [282/101]> Bootstrap096
97 <split [282/110]> Bootstrap097
98 <split [282/100]> Bootstrap098
99 <split [282/108]> Bootstrap099
100 <split [282/97]> Bootstrap100
Build recipes
<- recipe(diagnosis ~ ., data = data_train)
rec_glm
<- recipe(diagnosis ~ ., data = data_train) |>
rec_knn step_range(all_predictors())
Fit GLM
<-
fit_lr_boot logistic_reg() |>
set_engine("glm") |>
fit_resamples(preprocessor = rec_glm,
resamples = splits_boot,
metrics = metric_set(roc_auc))
Fitting our bootstrap splits in this way appears to work just fine. However, upon closer look we see that there were some warnings. First lets look for the warnings by calling the fit object directly:
fit_lr_boot
# Resampling results
# Bootstrap sampling using stratification
# A tibble: 100 × 4
splits id .metrics .notes
<list> <chr> <list> <list>
1 <split [282/106]> Bootstrap001 <tibble [1 × 4]> <tibble [1 × 3]>
2 <split [282/101]> Bootstrap002 <tibble [1 × 4]> <tibble [1 × 3]>
3 <split [282/104]> Bootstrap003 <tibble [1 × 4]> <tibble [1 × 3]>
4 <split [282/98]> Bootstrap004 <tibble [1 × 4]> <tibble [1 × 3]>
5 <split [282/109]> Bootstrap005 <tibble [1 × 4]> <tibble [1 × 3]>
6 <split [282/91]> Bootstrap006 <tibble [1 × 4]> <tibble [1 × 3]>
7 <split [282/103]> Bootstrap007 <tibble [1 × 4]> <tibble [1 × 3]>
8 <split [282/95]> Bootstrap008 <tibble [1 × 4]> <tibble [1 × 3]>
9 <split [282/108]> Bootstrap009 <tibble [1 × 4]> <tibble [1 × 3]>
10 <split [282/101]> Bootstrap010 <tibble [1 × 4]> <tibble [1 × 3]>
11 <split [282/110]> Bootstrap011 <tibble [1 × 4]> <tibble [1 × 3]>
12 <split [282/100]> Bootstrap012 <tibble [1 × 4]> <tibble [1 × 3]>
13 <split [282/107]> Bootstrap013 <tibble [1 × 4]> <tibble [1 × 3]>
14 <split [282/106]> Bootstrap014 <tibble [1 × 4]> <tibble [1 × 3]>
15 <split [282/100]> Bootstrap015 <tibble [1 × 4]> <tibble [1 × 3]>
16 <split [282/98]> Bootstrap016 <tibble [1 × 4]> <tibble [1 × 3]>
17 <split [282/108]> Bootstrap017 <tibble [1 × 4]> <tibble [1 × 3]>
18 <split [282/106]> Bootstrap018 <tibble [1 × 4]> <tibble [1 × 3]>
19 <split [282/114]> Bootstrap019 <tibble [1 × 4]> <tibble [1 × 3]>
20 <split [282/110]> Bootstrap020 <tibble [1 × 4]> <tibble [1 × 3]>
21 <split [282/108]> Bootstrap021 <tibble [1 × 4]> <tibble [1 × 3]>
22 <split [282/107]> Bootstrap022 <tibble [1 × 4]> <tibble [1 × 3]>
23 <split [282/103]> Bootstrap023 <tibble [1 × 4]> <tibble [1 × 3]>
24 <split [282/111]> Bootstrap024 <tibble [1 × 4]> <tibble [1 × 3]>
25 <split [282/101]> Bootstrap025 <tibble [1 × 4]> <tibble [1 × 3]>
26 <split [282/98]> Bootstrap026 <tibble [1 × 4]> <tibble [1 × 3]>
27 <split [282/107]> Bootstrap027 <tibble [1 × 4]> <tibble [1 × 3]>
28 <split [282/102]> Bootstrap028 <tibble [1 × 4]> <tibble [1 × 3]>
29 <split [282/113]> Bootstrap029 <tibble [1 × 4]> <tibble [1 × 3]>
30 <split [282/104]> Bootstrap030 <tibble [1 × 4]> <tibble [1 × 3]>
31 <split [282/99]> Bootstrap031 <tibble [1 × 4]> <tibble [1 × 3]>
32 <split [282/92]> Bootstrap032 <tibble [1 × 4]> <tibble [1 × 3]>
33 <split [282/103]> Bootstrap033 <tibble [1 × 4]> <tibble [1 × 3]>
34 <split [282/100]> Bootstrap034 <tibble [1 × 4]> <tibble [1 × 3]>
35 <split [282/102]> Bootstrap035 <tibble [1 × 4]> <tibble [1 × 3]>
36 <split [282/93]> Bootstrap036 <tibble [1 × 4]> <tibble [1 × 3]>
37 <split [282/99]> Bootstrap037 <tibble [1 × 4]> <tibble [1 × 3]>
38 <split [282/97]> Bootstrap038 <tibble [1 × 4]> <tibble [1 × 3]>
39 <split [282/101]> Bootstrap039 <tibble [1 × 4]> <tibble [1 × 3]>
40 <split [282/97]> Bootstrap040 <tibble [1 × 4]> <tibble [1 × 3]>
41 <split [282/107]> Bootstrap041 <tibble [1 × 4]> <tibble [1 × 3]>
42 <split [282/106]> Bootstrap042 <tibble [1 × 4]> <tibble [1 × 3]>
43 <split [282/97]> Bootstrap043 <tibble [1 × 4]> <tibble [1 × 3]>
44 <split [282/103]> Bootstrap044 <tibble [1 × 4]> <tibble [1 × 3]>
45 <split [282/119]> Bootstrap045 <tibble [1 × 4]> <tibble [1 × 3]>
46 <split [282/99]> Bootstrap046 <tibble [1 × 4]> <tibble [1 × 3]>
47 <split [282/103]> Bootstrap047 <tibble [1 × 4]> <tibble [1 × 3]>
48 <split [282/99]> Bootstrap048 <tibble [1 × 4]> <tibble [1 × 3]>
49 <split [282/100]> Bootstrap049 <tibble [1 × 4]> <tibble [1 × 3]>
50 <split [282/112]> Bootstrap050 <tibble [1 × 4]> <tibble [1 × 3]>
51 <split [282/101]> Bootstrap051 <tibble [1 × 4]> <tibble [1 × 3]>
52 <split [282/103]> Bootstrap052 <tibble [1 × 4]> <tibble [1 × 3]>
53 <split [282/115]> Bootstrap053 <tibble [1 × 4]> <tibble [1 × 3]>
54 <split [282/94]> Bootstrap054 <tibble [1 × 4]> <tibble [1 × 3]>
55 <split [282/113]> Bootstrap055 <tibble [1 × 4]> <tibble [1 × 3]>
56 <split [282/100]> Bootstrap056 <tibble [1 × 4]> <tibble [1 × 3]>
57 <split [282/101]> Bootstrap057 <tibble [1 × 4]> <tibble [1 × 3]>
58 <split [282/100]> Bootstrap058 <tibble [1 × 4]> <tibble [1 × 3]>
59 <split [282/94]> Bootstrap059 <tibble [1 × 4]> <tibble [1 × 3]>
60 <split [282/102]> Bootstrap060 <tibble [1 × 4]> <tibble [1 × 3]>
61 <split [282/105]> Bootstrap061 <tibble [1 × 4]> <tibble [1 × 3]>
62 <split [282/105]> Bootstrap062 <tibble [1 × 4]> <tibble [1 × 3]>
63 <split [282/111]> Bootstrap063 <tibble [1 × 4]> <tibble [1 × 3]>
64 <split [282/106]> Bootstrap064 <tibble [1 × 4]> <tibble [1 × 3]>
65 <split [282/100]> Bootstrap065 <tibble [1 × 4]> <tibble [1 × 3]>
66 <split [282/104]> Bootstrap066 <tibble [1 × 4]> <tibble [1 × 3]>
67 <split [282/98]> Bootstrap067 <tibble [1 × 4]> <tibble [1 × 3]>
68 <split [282/107]> Bootstrap068 <tibble [1 × 4]> <tibble [1 × 3]>
69 <split [282/95]> Bootstrap069 <tibble [1 × 4]> <tibble [1 × 3]>
70 <split [282/102]> Bootstrap070 <tibble [1 × 4]> <tibble [1 × 3]>
71 <split [282/108]> Bootstrap071 <tibble [1 × 4]> <tibble [1 × 3]>
72 <split [282/106]> Bootstrap072 <tibble [1 × 4]> <tibble [1 × 3]>
73 <split [282/101]> Bootstrap073 <tibble [1 × 4]> <tibble [1 × 3]>
74 <split [282/97]> Bootstrap074 <tibble [1 × 4]> <tibble [1 × 3]>
75 <split [282/102]> Bootstrap075 <tibble [1 × 4]> <tibble [1 × 3]>
76 <split [282/98]> Bootstrap076 <tibble [1 × 4]> <tibble [1 × 3]>
77 <split [282/100]> Bootstrap077 <tibble [1 × 4]> <tibble [1 × 3]>
78 <split [282/102]> Bootstrap078 <tibble [1 × 4]> <tibble [1 × 3]>
79 <split [282/109]> Bootstrap079 <tibble [1 × 4]> <tibble [1 × 3]>
80 <split [282/112]> Bootstrap080 <tibble [1 × 4]> <tibble [1 × 3]>
81 <split [282/108]> Bootstrap081 <tibble [1 × 4]> <tibble [1 × 3]>
82 <split [282/106]> Bootstrap082 <tibble [1 × 4]> <tibble [1 × 3]>
83 <split [282/112]> Bootstrap083 <tibble [1 × 4]> <tibble [1 × 3]>
84 <split [282/104]> Bootstrap084 <tibble [1 × 4]> <tibble [1 × 3]>
85 <split [282/103]> Bootstrap085 <tibble [1 × 4]> <tibble [1 × 3]>
86 <split [282/102]> Bootstrap086 <tibble [1 × 4]> <tibble [1 × 3]>
87 <split [282/102]> Bootstrap087 <tibble [1 × 4]> <tibble [1 × 3]>
88 <split [282/114]> Bootstrap088 <tibble [1 × 4]> <tibble [1 × 3]>
89 <split [282/107]> Bootstrap089 <tibble [1 × 4]> <tibble [1 × 3]>
90 <split [282/112]> Bootstrap090 <tibble [1 × 4]> <tibble [1 × 3]>
91 <split [282/100]> Bootstrap091 <tibble [1 × 4]> <tibble [1 × 3]>
92 <split [282/99]> Bootstrap092 <tibble [1 × 4]> <tibble [1 × 3]>
93 <split [282/106]> Bootstrap093 <tibble [1 × 4]> <tibble [1 × 3]>
94 <split [282/103]> Bootstrap094 <tibble [1 × 4]> <tibble [1 × 3]>
95 <split [282/98]> Bootstrap095 <tibble [1 × 4]> <tibble [1 × 3]>
96 <split [282/101]> Bootstrap096 <tibble [1 × 4]> <tibble [1 × 3]>
97 <split [282/110]> Bootstrap097 <tibble [1 × 4]> <tibble [1 × 3]>
98 <split [282/100]> Bootstrap098 <tibble [1 × 4]> <tibble [1 × 3]>
99 <split [282/108]> Bootstrap099 <tibble [1 × 4]> <tibble [1 × 3]>
100 <split [282/97]> Bootstrap100 <tibble [1 × 4]> <tibble [1 × 3]>
There were issues with some computations:
- Warning(s) x100: glm.fit: algorithm did not converge, glm.fit: fitted probabilitie...
Run `show_notes(.Last.tune.result)` for more information.
This tells us that we should use: show_notes(.Last.tune.result)
. So let’s run that:
show_notes(.Last.tune.result)
unique notes:
────────────────────────────────────────────────────────────────────────────────
glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
In this case, it just recapitulates the same message that presented by calling the object. The bigger point here is: always inspect error and warning messages.
We are getting a warning message about our fitted probabilities. This occurs because the outcome is totally separable (remember that issue from our discussions of logistic regression earlier). This model can still be used for prediction but we wouldn’t be able to use the statistical tests of the parameter estimates. Those are not our focus here.
We are going to fit this model on the whole sample so we can show you why we get this warning message. When we aren’t bootstrapping, the warning is provided when we fit.
#make feature matrix
<- rec_glm |>
rec_prep prep(data_train)
<- rec_prep |>
feat_train_glm bake(data_train)
#fit on full training sample
<- logistic_reg() |>
fit_best_glm set_engine("glm", control = list(maxit = 100)) |>
fit(diagnosis ~ ., data = feat_train_glm)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
$fit |> summary() fit_best_glm
Call:
stats::glm(formula = diagnosis ~ ., family = stats::binomial,
data = data, control = ~list(maxit = 100))
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.531e+02 1.708e+07 0 1
perimeter_se -1.707e+01 1.334e+06 0 1
fractal_dimension_mean -6.355e+02 1.607e+08 0 1
concave_points_worst -8.240e+01 1.631e+07 0 1
symmetry_mean -1.176e+02 1.443e+07 0 1
texture_se 1.961e+01 3.189e+05 0 1
concave_points_se 7.613e+03 1.292e+08 0 1
concavity_mean 1.484e+03 2.391e+07 0 1
fractal_dimension_se -1.078e+04 3.475e+08 0 1
radius_worst 8.568e+00 2.694e+06 0 1
concave_points_mean -2.814e+02 4.573e+07 0 1
radius_mean -2.524e+02 2.835e+06 0 1
smoothness_se -1.435e+04 1.270e+08 0 1
smoothness_worst 7.010e+02 4.115e+07 0 1
symmetry_se -1.954e+03 3.878e+07 0 1
radius_se 2.772e+02 1.306e+07 0 1
concavity_worst -5.805e+01 6.757e+06 0 1
concavity_se -1.542e+03 3.167e+07 0 1
compactness_se 1.799e+03 4.536e+07 0 1
smoothness_mean 1.274e+03 8.379e+07 0 1
area_se -1.624e+00 2.984e+04 0 1
area_worst 2.532e-01 1.314e+04 0 1
perimeter_mean 3.272e+01 3.664e+05 0 1
compactness_mean -2.339e+03 3.687e+07 0 1
area_mean 2.674e-01 1.237e+04 0 1
fractal_dimension_worst 1.091e+03 7.320e+07 0 1
texture_mean 6.494e+00 1.553e+05 0 1
perimeter_worst -2.138e+00 2.153e+05 0 1
symmetry_worst 1.873e+02 7.068e+06 0 1
texture_worst -4.069e+00 1.055e+05 0 1
compactness_worst -4.504e+01 1.423e+07 0 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2.4423e+02 on 281 degrees of freedom
Residual deviance: 2.6409e-10 on 251 degrees of freedom
AIC: 62
Number of Fisher Scoring iterations: 29
Let’s take a closer look at the predictions our model is making.
# probabilities in train
predict(fit_best_glm, feat_train_glm, type = "prob") |>
print(n = 50)
# A tibble: 282 × 2
.pred_benign .pred_malignant
<dbl> <dbl>
1 1 2.22e-16
2 1 2.22e-16
3 1.00 2.94e-12
4 1 2.22e-16
5 1 2.22e-16
6 1 2.22e-16
7 1 2.22e-16
8 1 2.22e-16
9 1 2.22e-16
10 1 2.22e-16
11 1.00 3.18e-12
12 1 2.22e-16
13 1 2.22e-16
14 1 2.22e-16
15 1.00 5.41e-12
16 1 2.22e-16
17 1 2.22e-16
18 1 2.22e-16
19 1.00 1.22e-12
20 1 2.22e-16
21 1 2.22e-16
22 1 2.22e-16
23 1 2.22e-16
24 1 2.22e-16
25 1 2.22e-16
26 1 2.22e-16
27 1 2.22e-16
28 1 2.22e-16
29 1 2.22e-16
30 1.00 3.15e-12
31 1 2.22e-16
32 1 2.22e-16
33 1 2.22e-16
34 1 2.22e-16
35 1 2.22e-16
36 1 2.22e-16
37 1 2.22e-16
38 1 2.22e-16
39 1 2.22e-16
40 1 2.22e-16
41 1 2.22e-16
42 1 2.22e-16
43 1 2.22e-16
44 1 2.22e-16
45 1 2.22e-16
46 1 2.22e-16
47 1 2.22e-16
48 1 2.22e-16
49 1 2.22e-16
50 1 2.22e-16
# ℹ 232 more rows
# confusion matrix for train
<- tibble(truth = feat_train_glm$diagnosis,
cm_trn estimate = predict(fit_best_glm, feat_train_glm)$.pred_class) |>
conf_mat(truth, estimate)
cm_trn
Truth
Prediction benign malignant
benign 238 0
malignant 0 44
|>
cm_trn summary()
# A tibble: 13 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 1
2 kap binary 1
3 sens binary 1
4 spec binary 1
5 ppv binary 1
6 npv binary 1
7 mcc binary 1
8 j_index binary 1
9 bal_accuracy binary 1
10 detection_prevalence binary 0.844
11 precision binary 1
12 recall binary 1
13 f_meas binary 1
Here we can see the cause of our warning message: Our data is perfectly predicting the classes (“perfect separation”) of the outcome in train (as seen from the confusion matrix above). This likely occurs because we can predict the outcome really well with these features in held out data (see below) and there is some overfittng in training which moves us from very good prediction to perfect prediction in training. Since we have confirmed that the warning message makes sense to us here, it is OK to progress.
Print the average ROC AUC of your logistic regression model
collect_metrics(fit_lr_boot, summarize = TRUE)
# A tibble: 1 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 roc_auc binary 0.945 100 0.00398 Preprocessor1_Model1
You might be wondering why our performance is not perfect across bootstrapped samples (we got the error for each of the 100 bootstraps above, which means our model always predicted perfectly in train). For demonstration purposes, we will assess this model in test here as well.
#test feature matrix
<- rec_prep |>
feat_test_glm bake(data_test)
# confusion matrix for test
<- tibble(truth = feat_test_glm$diagnosis,
cm_test estimate = predict(fit_best_glm, feat_test_glm)$.pred_class) |>
conf_mat(truth, estimate)
cm_test
Truth
Prediction benign malignant
benign 118 1
malignant 1 21
Performance is still pretty good here, but due to the ratio of number of predictors relative to our low training cases across bootstrapped folds, our models are likely able to perfectly fit our training data every time, but do not always echo this same performance in their relative test sets.
You should always check for these sorts of warnings by printing the model fit object.
Fit KNN
Set up a hyperparameter grid to consider a range of values for neighbors
in your KNN models.
NOTE be sure that you are not considering values higher than the N of your training sample! In this dataset N = 282
<- expand.grid(neighbors = seq(1, 280, by = 10)) hyper_grid
Fit a KNN model using the recipe you created and your bootstrap splits. Use ROC AUC (roc_auc
) as your performance metric.
<-
fit_knn_boot nearest_neighbor(neighbors = tune()) |>
set_engine("kknn") |>
set_mode("classification") |>
tune_grid(preprocessor = rec_knn,
resamples = splits_boot,
grid = hyper_grid,
metrics = metric_set(roc_auc))
Generate a plot to help you determine if you considered a wide enough range of values for neighbors
.
plot_hyperparameters(fit_knn_boot, hp1 = "neighbors", metric = "roc_auc")
Print the best value for the neighbors
hyperparameter across resamples based on model ROC AUC.
select_best(fit_knn_boot)
# A tibble: 1 × 2
neighbors .config
<dbl> <chr>
1 121 Preprocessor1_Model13
Print the average ROC AUC of your best KNN regression model
collect_metrics(fit_knn_boot, summarize = TRUE) |>
filter(neighbors == select_best(fit_knn_boot)$neighbors)
# A tibble: 1 × 7
neighbors .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 121 roc_auc binary 0.982 100 0.00173 Preprocessor1_Model13
Select and fit best model
Select based on average roc_auc
. Our best model configuration was a KNN algorithm with 121 neighbors.
Create training and test feature matrices for our best KNN model
<- rec_knn |>
rec_prep prep(data_train)
<- rec_prep |>
feat_train bake(data_train)
<- rec_prep |>
feat_test bake(data_test)
Fit on the full training sample
<- nearest_neighbor(neighbors = select_best(fit_knn_boot)$neighbors) |>
fit_best_knn set_engine("kknn") |>
set_mode("classification") |>
fit(diagnosis ~ ., data = feat_train)
Evaluate best model
Plot the ROC of your best model in the test set.
<-
roc_plot tibble(truth = feat_test$diagnosis,
prob = predict(fit_best_knn, feat_test, type = "prob")$.pred_malignant) |>
roc_curve(prob, truth = truth)
|>
roc_plot ggplot(aes(x = 1 - specificity, y = sensitivity, color = .threshold)) +
geom_path() +
geom_abline(lty = 3) +
coord_equal() +
labs(x = "1 - Specificity (FPR)",
y = "Sensitivity (TPR)")
Confusion matrix depicting performance in test
<- tibble(truth = feat_test$diagnosis,
cm estimate = predict(fit_best_knn, feat_test)$.pred_class) |>
conf_mat(truth, estimate)
cm
Truth
Prediction benign malignant
benign 119 11
malignant 0 11
|>
cm summary()
# A tibble: 13 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.922
2 kap binary 0.628
3 sens binary 1
4 spec binary 0.5
5 ppv binary 0.915
6 npv binary 1
7 mcc binary 0.677
8 j_index binary 0.5
9 bal_accuracy binary 0.75
10 detection_prevalence binary 0.922
11 precision binary 0.915
12 recall binary 1
13 f_meas binary 0.956
Make a plot of your confusion matrix
autoplot(cm)
Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best model in test
Let’s set up a tibble to track results across portions of the assignment.
#ROC AUC
<- tibble(truth = feat_test$diagnosis,
auc prob = predict(fit_best_knn, feat_test, type = "prob")$.pred_malignant) |>
roc_auc(prob, truth = truth) |>
pull(.estimate)
#Accuracy, sensitivity, specificity, PPV, NPV
#Using pivot_wider() to make a tracking tibble
<- cm |>
results summary() |>
filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |>
select(-.estimator) |>
pivot_wider(names_from = .metric, values_from = .estimate) |>
mutate(model = str_c("knn_", fit_best_knn$fit$best.parameters$k),
roc_auc = auc) |>
select(model, roc_auc, everything())
print_kbl(results, height = "100px")
model | roc_auc | accuracy | sens | spec | ppv | npv |
---|---|---|---|---|---|---|
knn_121 | 0 | 0.92 | 1 | 0.5 | 0.92 | 1 |
Part 2: Addressing class imbalance
Since only 15% of our cases our malignant, let’s see if we can achieve higher sensitivity by up-sampling our data with SMOTE.
Build recipes
Update your previous recipes to up-sample the minority class (malignant) in diagnosis using step_smote().
Remember to make 2 recipes (one for glm, one for KNN)
<- recipe(diagnosis ~ ., data = data_train) |>
rec_glm_up ::step_smote(diagnosis, over_ratio = 1, neighbors = 5)
themis
<- recipe(diagnosis ~ ., data = data_train) |>
rec_knn_up ::step_smote(diagnosis, over_ratio = 1, neighbors = 5) |>
themisstep_range(all_predictors())
Fit GLM
Fit an up-sampled logistic regression classifier
<-
fit_lr_boot_up logistic_reg() |>
set_engine("glm", control = list(maxit = 100)) |>
fit_resamples(preprocessor = rec_glm_up,
resamples = splits_boot,
metrics = metric_set(roc_auc))
We are seeing the same problem as before.
show_notes(.Last.tune.result)
unique notes:
─────────────────────────────────────────────────────────
glm.fit: fitted probabilities numerically 0 or 1 occurred
Print the average ROC AUC of your logistic regression model
collect_metrics(fit_lr_boot_up, summarize = TRUE)
# A tibble: 1 × 6
.metric .estimator mean n std_err .config
<chr> <chr> <dbl> <int> <dbl> <chr>
1 roc_auc binary 0.916 100 0.00469 Preprocessor1_Model1
3) Fit KNN
Set up a hyperparameter grid to consider a range of values for neighbors
in your KNN models.
<- expand.grid(neighbors = seq(1, 280, by = 10)) hyper_grid
Fit an up-sampled KNN using the new KNN recipe you created and your bootstrap splits. Use ROC AUC as your performance metric.
<-
fit_knn_boot_up nearest_neighbor(neighbors = tune()) |>
set_engine("kknn") |>
set_mode("classification") |>
tune_grid(preprocessor = rec_knn_up,
resamples = splits_boot,
grid = hyper_grid,
metrics = metric_set(roc_auc))
Generate a plot to help you determine if you considered a wide enough range of values for neighbors
.
plot_hyperparameters(fit_knn_boot_up, hp1 = "neighbors", metric = "roc_auc")
Print the best value for the neighbors
hyperparameter across resamples based on model ROC AUC.
select_best(fit_knn_boot_up)
# A tibble: 1 × 2
neighbors .config
<dbl> <chr>
1 71 Preprocessor1_Model08
Print the average ROC AUC of your best KNN regression model
collect_metrics(fit_knn_boot_up, summarize = TRUE) |>
filter(neighbors == select_best(fit_knn_boot_up)$neighbors)
# A tibble: 1 × 7
neighbors .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 71 roc_auc binary 0.978 100 0.00186 Preprocessor1_Model08
Select and fit best model
Create the up-sampled training feature matrix using your best recipe (for us this is KNN). Remember, do not upsample your test data!
<- rec_knn_up |>
rec_prep prep(data_train)
<- rec_prep |>
feat_train_up bake(data_train)
Fit your best performing up-sampled model on the full training sample
<- nearest_neighbor(neighbors = select_best(fit_knn_boot_up)$neighbors) |>
fit_best_knn_up set_engine("kknn") |>
set_mode("classification") |>
fit(diagnosis ~ ., data = feat_train_up)
5) Evaluate best model
Make a figure to plot the ROC of your best up-sampled model in the test set
<-
roc_plot_up tibble(truth = feat_test$diagnosis,
prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |>
roc_curve(prob, truth = truth)
|>
roc_plot_up ggplot(aes(x = 1 - specificity, y = sensitivity, color = .threshold)) +
geom_path() +
geom_abline(lty = 3) +
coord_equal() +
labs(x = "1 - Specificity (FPR)",
y = "Sensitivity (TPR)")
Generate a confusion matrix depicting your up-sampled model’s performance in test
<- tibble(truth = feat_test$diagnosis,
cm_up estimate = predict(fit_best_knn_up, feat_test)$.pred_class) |>
conf_mat(truth, estimate)
cm_up
Truth
Prediction benign malignant
benign 119 7
malignant 0 15
Make a plot of your confusion matrix
autoplot(cm_up)
Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best up-sampled model in the held out test set
#ROC AUC
<- tibble(truth = feat_test$diagnosis,
auc prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |>
roc_auc(prob, truth = truth) |>
pull(.estimate)
#Accuracy, sensitivity, specificity, PPV, NPV
<- cm_up |>
results_up summary(event_level = "second") |>
filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |>
select(-.estimator) |>
pivot_wider(names_from = .metric, values_from = .estimate) |>
mutate(model = str_c("knn_smote_", fit_best_knn_up$fit$best.parameters$k),
roc_auc = auc) |>
select(model, roc_auc, everything())
Joining results with our tibble from Part 1 for comparison
<- results |>
results bind_rows(results_up)
print_kbl(results, height = "150px")
model | roc_auc | accuracy | sens | spec | ppv | npv |
---|---|---|---|---|---|---|
knn_121 | 0 | 0.92 | 1.00 | 0.5 | 0.92 | 1.00 |
knn_smote_71 | 0 | 0.95 | 0.68 | 1.0 | 1.00 | 0.94 |
Some improvement with SMOTE! While we see that positive predictive value dipped a little from the tuned KNN model, in general, improvements across all other metrics show a meaningful improvement.
Part 3: New Classification Threshold
Now you want to check if there may be an additional benefit for your model’s performance if you adjust the classification threshold from its default 50% to a threshold of 40%
Adjust classification threshold to 40%
<- tibble(truth = feat_test$diagnosis,
preds prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |>
mutate(estimate_40 = if_else(prob > .40, "malignant", "benign"),
estimate_40 = factor(estimate_40, levels = c("benign", "malignant")))
preds
# A tibble: 141 × 3
truth prob estimate_40
<fct> <dbl> <fct>
1 benign 0 benign
2 benign 0.000982 benign
3 malignant 0.859 malignant
4 benign 0.0110 benign
5 malignant 0.461 malignant
6 benign 0 benign
7 benign 0.0215 benign
8 benign 0 benign
9 benign 0 benign
10 benign 0 benign
11 benign 0.0380 benign
12 benign 0 benign
13 benign 0.0612 benign
14 benign 0 benign
15 benign 0.0100 benign
16 benign 0.119 benign
17 benign 0 benign
18 benign 0.0680 benign
19 benign 0.0530 benign
20 benign 0 benign
21 benign 0 benign
22 malignant 0.124 benign
23 benign 0.136 benign
24 benign 0.00868 benign
25 benign 0.00873 benign
26 benign 0 benign
27 malignant 0.767 malignant
28 benign 0.00758 benign
29 benign 0.183 benign
30 benign 0 benign
31 benign 0 benign
32 benign 0.000758 benign
33 benign 0 benign
34 benign 0 benign
35 benign 0.0126 benign
36 benign 0 benign
37 benign 0.0196 benign
38 benign 0 benign
39 malignant 0.701 malignant
40 benign 0.0991 benign
41 benign 0 benign
42 benign 0.00724 benign
43 benign 0.0104 benign
44 benign 0 benign
45 benign 0.00901 benign
46 malignant 0.438 malignant
47 benign 0.00659 benign
48 malignant 0.538 malignant
49 benign 0.0309 benign
50 benign 0 benign
51 malignant 0.823 malignant
52 benign 0.0303 benign
53 benign 0 benign
54 benign 0.0115 benign
55 benign 0.0110 benign
56 benign 0 benign
57 benign 0 benign
58 benign 0 benign
59 malignant 0.549 malignant
60 benign 0 benign
61 benign 0.00691 benign
62 benign 0 benign
63 benign 0.0884 benign
64 benign 0 benign
65 malignant 0.782 malignant
66 benign 0 benign
67 benign 0.00596 benign
68 benign 0 benign
69 benign 0 benign
70 benign 0.0258 benign
71 benign 0 benign
72 benign 0.00535 benign
73 benign 0 benign
74 malignant 0.687 malignant
75 malignant 0.107 benign
76 benign 0 benign
77 benign 0.0105 benign
78 benign 0.00659 benign
79 benign 0 benign
80 benign 0.00341 benign
81 benign 0.000758 benign
82 malignant 0.631 malignant
83 benign 0.0663 benign
84 malignant 0.844 malignant
85 malignant 0.526 malignant
86 benign 0.0368 benign
87 benign 0 benign
88 benign 0 benign
89 benign 0 benign
90 benign 0 benign
91 benign 0.00477 benign
92 malignant 0.525 malignant
93 benign 0 benign
94 benign 0 benign
95 benign 0.0179 benign
96 benign 0.00596 benign
97 benign 0.00449 benign
98 benign 0.00394 benign
99 benign 0.00394 benign
100 benign 0.0186 benign
101 benign 0 benign
102 malignant 0.205 benign
103 benign 0.0217 benign
104 benign 0 benign
105 benign 0.0538 benign
106 benign 0.00864 benign
107 benign 0 benign
108 malignant 0.748 malignant
109 benign 0 benign
110 benign 0.0565 benign
111 benign 0 benign
112 malignant 0.497 malignant
113 benign 0 benign
114 benign 0 benign
115 benign 0.0399 benign
116 benign 0 benign
117 benign 0 benign
118 benign 0 benign
119 benign 0 benign
120 benign 0 benign
121 benign 0 benign
122 benign 0.0116 benign
123 benign 0.0330 benign
124 benign 0.00758 benign
125 benign 0 benign
126 malignant 0.774 malignant
127 benign 0.00341 benign
128 benign 0.00789 benign
129 benign 0.0744 benign
130 malignant 0.864 malignant
131 benign 0 benign
132 benign 0 benign
133 benign 0.0286 benign
134 benign 0.00864 benign
135 benign 0.0110 benign
136 benign 0 benign
137 benign 0 benign
138 malignant 0.354 benign
139 benign 0 benign
140 benign 0 benign
141 benign 0 benign
Evaluate model at new threshold
Generate a confusion matrix depicting your up-sampled model’s performance in test at your new threshold
<- preds |>
cm_40 conf_mat(truth = truth, estimate = estimate_40)
cm_40
Truth
Prediction benign malignant
benign 119 4
malignant 0 18
Make a plot of your confusion matrix
autoplot(cm_40)
Report the AUC, accuracy, sensitivity, specificity, PPV, and NPV of your best up-sampled model in the held out test set
#ROC AUC
<- tibble(truth = feat_test$diagnosis,
auc prob = predict(fit_best_knn_up, feat_test, type = "prob")$.pred_malignant) |>
roc_auc(prob, truth = truth) |>
pull(.estimate)
#Accuracy, sensitivity, specificity, PPV, NPV
<- cm_40 |>
results_40 summary(event_level = "second") |>
filter(.metric %in% c("accuracy", "sens", "spec", "ppv", "npv")) |>
select(-.estimator) |>
pivot_wider(names_from = .metric, values_from = .estimate) |>
mutate(model = str_c("knn_smote_", fit_best_knn_up$fit$best.parameters$k, "_thresh_40"),
roc_auc = auc) |>
select(model, roc_auc, everything())
Join results with tibble from part 1 and 2 results for comparison
<- results |>
results bind_rows(results_40)
print_kbl(results, height = "200px")
model | roc_auc | accuracy | sens | spec | ppv | npv |
---|---|---|---|---|---|---|
knn_121 | 0 | 0.92 | 1.00 | 0.5 | 0.92 | 1.00 |
knn_smote_71 | 0 | 0.95 | 0.68 | 1.0 | 1.00 | 0.94 |
knn_smote_71_thresh_40 | 0 | 0.97 | 0.82 | 1.0 | 1.00 | 0.97 |
The model has even better performance now – increased sensitivity without a notable decrease in specificity.