Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone. Davide Chicco, Giuseppe Jurman. BMC Medical Informatics and Decision Making. 2020.
This section includes code for the examples shown. These may differ slightly from the examples shown in the live demonstration.
Example 1: Pre-processing
# Load R packages ---------------------------------------------------------library(tidyverse)library(tidymodels)tidymodels_prefer()# Load data ---------------------------------------------------------------heart_failure <-read_csv("data/heart_failure.csv")heart_failure
# A tibble: 299 × 12
age sex smoking anaemia diabetes high_blood_pressure serum_creatinine
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 75 M 0 0 0 1 1.9
2 55 M 0 0 0 0 1.1
3 65 M 1 0 0 0 1.3
4 50 M 0 1 0 0 1.9
5 65 F 0 1 1 0 2.7
6 90 M 1 1 0 1 2.1
7 75 M 0 1 0 0 1.2
8 60 M 1 1 1 0 1.1
9 65 F 0 0 0 0 1.5
10 80 M 1 1 0 1 9.4
# ℹ 289 more rows
# ℹ 5 more variables: creatinine_phosphokinase <dbl>, platelets <dbl>,
# ejection_fraction <dbl>, time <dbl>, death <dbl>
heart_failure <- heart_failure |>mutate(death =factor(death))# You can also use `View()`!# Inspect variables -------------------------------------------------------barplot(table(heart_failure$death))
# Split into training and testing -----------------------------------------set.seed(1234)hf_split <-initial_split(heart_failure)hf_train <-training(hf_split)hf_test <-testing(hf_split)# choose a different split proportion?set.seed(1234)hf_split <-initial_split(heart_failure, prop =0.8)hf_train <-training(hf_split)hf_test <-testing(hf_split)# Create cross validation foldshf_folds <-vfold_cv(hf_train, v =10)# Build a recipe ----------------------------------------------------------hf_recipe <-recipe(death ~ ., data = hf_train) |>step_dummy(sex) |>step_normalize(age, serum_creatinine:time)wf <-workflow() |>add_recipe(hf_recipe)
Example 2: Lasso regression
# Specify the model -------------------------------------------------------tune_spec_lasso <-logistic_reg(penalty =tune(), mixture =1) |>set_engine("glmnet")# Tune the model ----------------------------------------------------------# Fit lots of valueslasso_grid <-tune_grid(add_model(wf, tune_spec_lasso),resamples = hf_folds,grid =grid_regular(penalty(), levels =50))# Choose the best valuehighest_roc_auc_lasso <- lasso_grid |>select_best(metric ="roc_auc")# Fit the final model -----------------------------------------------------final_lasso <-finalize_workflow(add_model(wf, tune_spec_lasso), highest_roc_auc_lasso)# Model evaluation --------------------------------------------------------last_fit(final_lasso, hf_split) |>collect_metrics()