26 May 2022
PhD Statistics and Operational Research
Data scientist at Jumping Rivers
A lot of data visualisation…
Part 1
facet_zoom() from {ggforce}Part 2
Let’s try to make a flowchart with {ggplot2}…
library(tidyverse) library(igraph) library(showtext) library(rcartocolor)
goldilocks <- tibble(from = c("Goldilocks",
"Porridge", "Porridge", "Porridge",
"Just right",
"Chairs", "Chairs", "Chairs",
"Just right2",
"Beds", "Beds", "Beds",
"Just right3"),
to = c("Porridge",
"Too cold", "Too hot", "Just right",
"Chairs",
"Still too big", "Too big", "Just right2",
"Beds",
"Too soft", "Too hard", "Just right3",
"Bears!"))
## # A tibble: 6 × 2 ## from to ## <chr> <chr> ## 1 Goldilocks Porridge ## 2 Porridge Too cold ## 3 Porridge Too hot ## 4 Porridge Just right ## 5 Just right Chairs ## 6 Chairs Still too big
g = graph_from_data_frame(goldilocks, directed = TRUE)
coords = layout_as_tree(g)
colnames(coords) = c("x", "y")
## x y ## [1,] 0 7 ## [2,] 0 6 ## [3,] -1 5 ## [4,] -1 4 ## [5,] -2 3 ## [6,] -2 2
output_df = as_tibble(coords) %>%
mutate(step = vertex_attr(g, "name"),
x = x*-1,
type = factor(c(1, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 1)),
label = gsub("\\d+$", "", step))
## # A tibble: 6 × 5 ## x y step type label ## <dbl> <dbl> <chr> <fct> <chr> ## 1 0 7 Goldilocks 1 Goldilocks ## 2 0 6 Porridge 2 Porridge ## 3 1 5 Just right 3 Just right ## 4 1 4 Chairs 2 Chairs ## 5 2 3 Just right2 3 Just right ## 6 2 2 Beds 2 Beds
plot_nodes = output_df %>%
mutate(xmin = x - 0.35,
xmax = x + 0.35,
ymin = y - 0.25,
ymax = y + 0.25)
## # A tibble: 6 × 9 ## x y step type label xmin xmax ymin ymax ## <dbl> <dbl> <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> ## 1 0 7 Goldilocks 1 Goldilocks -0.35 0.35 6.75 7.25 ## 2 0 6 Porridge 2 Porridge -0.35 0.35 5.75 6.25 ## 3 1 5 Just right 3 Just right 0.65 1.35 4.75 5.25 ## 4 1 4 Chairs 2 Chairs 0.65 1.35 3.75 4.25 ## 5 2 3 Just right2 3 Just right 1.65 2.35 2.75 3.25 ## 6 2 2 Beds 2 Beds 1.65 2.35 1.75 2.25
plot_edges = goldilocks %>%
mutate(id = row_number()) %>%
pivot_longer(cols = c("from", "to"),
names_to = "s_e",
values_to = "step") %>%
left_join(plot_nodes, by = "step") %>%
select(-c(label, type, y, xmin, xmax)) %>%
mutate(y = ifelse(s_e == "from", ymin, ymax)) %>%
select(-c(ymin, ymax))
## # A tibble: 3 × 5 ## id s_e step x y ## <int> <chr> <chr> <dbl> <dbl> ## 1 1 from Goldilocks 0 6.75 ## 2 1 to Porridge 0 6.25 ## 3 2 from Porridge 0 5.75
library(showtext) font_add_google(name = "Henny Penny", family = "henny") showtext_auto()
p = ggplot() +
# draw rectangles
geom_rect(data = plot_nodes,
mapping = aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax,
fill = type, colour = type),
alpha = 0.5,
linejoin = "round") +
# add text labels
geom_text(data = plot_nodes,
mapping = aes(x = x, y = y, label = label),
family = "henny",
color = "#585c45") +
# add arrows
geom_path(data = plot_edges,
mapping = aes(x = x, y = y, group = id),
colour = "#585c45",
arrow = arrow(length = unit(0.3, "cm"), type = "closed"))
p
p = p + scale_fill_carto_d(palette = "Antique") + scale_colour_carto_d(palette = "Antique")
p
p = p +
labs(title = "The Goldilocks Decision Tree",
caption = "N. Rennie\n\nData: Robert Southey. Goldilocks and the Three Bears.
1837.\n\nImage: New York Public Library\n\n#30DayChartChallenge")
p
p = p +
theme_void() +
theme(plot.margin = unit(c(1, 1, 0.5, 1), "cm"),
legend.position = "none",
plot.background = element_rect(colour = "#f2e4c1", fill = "#f2e4c1"),
panel.background = element_rect(colour = "#f2e4c1", fill = "#f2e4c1"),
plot.title = element_text(family = "henny", hjust = 0, face = "bold",
size = 40, color = "#585c45",
margin = margin(t = 10, r = 0, b = 10, l = 0)),
plot.caption = element_text(family = "henny", hjust = 0,
size = 10, color = "#585c45",
margin = margin(t = 10)))
p