26 May 2022
PhD Statistics and Operational Research
Data scientist at Jumping Rivers
A lot of data visualisation…
Part 1
facet_zoom()
from {ggforce}Part 2
Let’s try to make a flowchart with {ggplot2}…
library(tidyverse) library(igraph) library(showtext) library(rcartocolor)
goldilocks <- tibble(from = c("Goldilocks", "Porridge", "Porridge", "Porridge", "Just right", "Chairs", "Chairs", "Chairs", "Just right2", "Beds", "Beds", "Beds", "Just right3"), to = c("Porridge", "Too cold", "Too hot", "Just right", "Chairs", "Still too big", "Too big", "Just right2", "Beds", "Too soft", "Too hard", "Just right3", "Bears!"))
## # A tibble: 6 × 2 ## from to ## <chr> <chr> ## 1 Goldilocks Porridge ## 2 Porridge Too cold ## 3 Porridge Too hot ## 4 Porridge Just right ## 5 Just right Chairs ## 6 Chairs Still too big
g = graph_from_data_frame(goldilocks, directed = TRUE) coords = layout_as_tree(g) colnames(coords) = c("x", "y")
## x y ## [1,] 0 7 ## [2,] 0 6 ## [3,] -1 5 ## [4,] -1 4 ## [5,] -2 3 ## [6,] -2 2
output_df = as_tibble(coords) %>% mutate(step = vertex_attr(g, "name"), x = x*-1, type = factor(c(1, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 1)), label = gsub("\\d+$", "", step))
## # A tibble: 6 × 5 ## x y step type label ## <dbl> <dbl> <chr> <fct> <chr> ## 1 0 7 Goldilocks 1 Goldilocks ## 2 0 6 Porridge 2 Porridge ## 3 1 5 Just right 3 Just right ## 4 1 4 Chairs 2 Chairs ## 5 2 3 Just right2 3 Just right ## 6 2 2 Beds 2 Beds
plot_nodes = output_df %>% mutate(xmin = x - 0.35, xmax = x + 0.35, ymin = y - 0.25, ymax = y + 0.25)
## # A tibble: 6 × 9 ## x y step type label xmin xmax ymin ymax ## <dbl> <dbl> <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> ## 1 0 7 Goldilocks 1 Goldilocks -0.35 0.35 6.75 7.25 ## 2 0 6 Porridge 2 Porridge -0.35 0.35 5.75 6.25 ## 3 1 5 Just right 3 Just right 0.65 1.35 4.75 5.25 ## 4 1 4 Chairs 2 Chairs 0.65 1.35 3.75 4.25 ## 5 2 3 Just right2 3 Just right 1.65 2.35 2.75 3.25 ## 6 2 2 Beds 2 Beds 1.65 2.35 1.75 2.25
plot_edges = goldilocks %>% mutate(id = row_number()) %>% pivot_longer(cols = c("from", "to"), names_to = "s_e", values_to = "step") %>% left_join(plot_nodes, by = "step") %>% select(-c(label, type, y, xmin, xmax)) %>% mutate(y = ifelse(s_e == "from", ymin, ymax)) %>% select(-c(ymin, ymax))
## # A tibble: 3 × 5 ## id s_e step x y ## <int> <chr> <chr> <dbl> <dbl> ## 1 1 from Goldilocks 0 6.75 ## 2 1 to Porridge 0 6.25 ## 3 2 from Porridge 0 5.75
library(showtext) font_add_google(name = "Henny Penny", family = "henny") showtext_auto()
p = ggplot() + # draw rectangles geom_rect(data = plot_nodes, mapping = aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax, fill = type, colour = type), alpha = 0.5, linejoin = "round") + # add text labels geom_text(data = plot_nodes, mapping = aes(x = x, y = y, label = label), family = "henny", color = "#585c45") + # add arrows geom_path(data = plot_edges, mapping = aes(x = x, y = y, group = id), colour = "#585c45", arrow = arrow(length = unit(0.3, "cm"), type = "closed"))
p
p = p + scale_fill_carto_d(palette = "Antique") + scale_colour_carto_d(palette = "Antique")
p
p = p + labs(title = "The Goldilocks Decision Tree", caption = "N. Rennie\n\nData: Robert Southey. Goldilocks and the Three Bears. 1837.\n\nImage: New York Public Library\n\n#30DayChartChallenge")
p
p = p + theme_void() + theme(plot.margin = unit(c(1, 1, 0.5, 1), "cm"), legend.position = "none", plot.background = element_rect(colour = "#f2e4c1", fill = "#f2e4c1"), panel.background = element_rect(colour = "#f2e4c1", fill = "#f2e4c1"), plot.title = element_text(family = "henny", hjust = 0, face = "bold", size = 40, color = "#585c45", margin = margin(t = 10, r = 0, b = 10, l = 0)), plot.caption = element_text(family = "henny", hjust = 0, size = 10, color = "#585c45", margin = margin(t = 10)))
p