Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone. Davide Chicco, Giuseppe Jurman. BMC Medical Informatics and Decision Making. 2020. doi.org/10.1186/s12911-020-1023-5.
This section includes code for the examples shown. These may differ slightly from the examples shown in the live demonstration.
Example 1: Pre-processing
See example
# Load R packages ---------------------------------------------------------library(tidyverse)library(tidymodels)tidymodels_prefer()# Load data ---------------------------------------------------------------heart_failure <-read_csv("data/heart_failure.csv")heart_failure
# A tibble: 299 × 12
age sex smoking anaemia diabetes high_blood_pressure serum_creatinine
<dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 75 M 0 0 0 1 1.9
2 55 M 0 0 0 0 1.1
3 65 M 1 0 0 0 1.3
4 50 M 0 1 0 0 1.9
5 65 F 0 1 1 0 2.7
6 90 M 1 1 0 1 2.1
7 75 M 0 1 0 0 1.2
8 60 M 1 1 1 0 1.1
9 65 F 0 0 0 0 1.5
10 80 M 1 1 0 1 9.4
# ℹ 289 more rows
# ℹ 5 more variables: creatinine_phosphokinase <dbl>, platelets <dbl>,
# ejection_fraction <dbl>, time <dbl>, death <dbl>
heart_failure <- heart_failure |>mutate(death =factor(death))# You can also use `View()`!# Inspect variables -------------------------------------------------------barplot(table(heart_failure$death))
barplot(table(heart_failure$sex))
hist(heart_failure$age)
# Split into training and testing -----------------------------------------set.seed(1234)hf_split <-initial_split(heart_failure)hf_train <-training(hf_split)hf_test <-testing(hf_split)# choose a different split proportion?set.seed(1234)hf_split <-initial_split(heart_failure, prop =0.8)hf_train <-training(hf_split)hf_test <-testing(hf_split)# Create cross validation foldshf_folds <-vfold_cv(hf_train, v =10)# Build a recipe ----------------------------------------------------------hf_recipe <-recipe(death ~ ., data = hf_train) |>step_dummy(sex) |>step_normalize(age, serum_creatinine:time)wf <-workflow() |>add_recipe(hf_recipe)
Example 2: Lasso regression
See example
# Specify the model -------------------------------------------------------tune_spec_lasso <-logistic_reg(penalty =tune(), mixture =1) |>set_engine("glmnet")# Tune the model ----------------------------------------------------------# Fit lots of valueslasso_grid <-tune_grid(add_model(wf, tune_spec_lasso),resamples = hf_folds,grid =grid_regular(penalty(), levels =50))# Choose the best valuehighest_roc_auc_lasso <- lasso_grid |>select_best(metric ="roc_auc")# Fit the final model -----------------------------------------------------final_lasso <-finalize_workflow(add_model(wf, tune_spec_lasso), highest_roc_auc_lasso)# Model evaluation --------------------------------------------------------last_fit(final_lasso, hf_split) |>collect_metrics()