Starting 2022 Off With A Fairly Complex {ggplot2} Recreation Plot

The New York Times had a [tragic] story on Covid deaths today and one of their plots really stuck with me for how well it told that part of the story.

NYT Chart on Covid Deaths

NOTE: The red panel highlights are off a bit as I manually typed the data in (I only did the recreation to keep {ggplot2} muscle memory as I hadn’t doe a major customization like this in quite some time).

Only one {grid} hack (for the faceted X axis labels) too!

Hopefully, I’ll have more real-world opportunity to build some detailed, properly-annotated {ggplot2} plots this year.

Shout out to @ClausWilke for {ggtext} and all the folks who’ve made {ggplot2} such a powerful data visualization tool.

library(grid)
library(gtable)
library(hrbrthemes)
library(tidyverse)

gtable_filter_remove <- function (x, name, trim = FALSE) {
  # https://stackoverflow.com/a/36780639
  matches <- !(x$layout$name %in% name)
  x$layout <- x$layout[matches, , drop = FALSE]
  x$grobs <- x$grobs[matches]
  if (trim) 
    x <- gtable_trim(x)
  x
}

read.csv(text="race,age_group,before,after,cause
White,Under 25,1,3,Covid-19 deaths increased as a share of deaths from all cause
White,25-44,3,10,Covid-19 deaths increased as a share of deaths from all cause
White,45-64,8,15,Covid-19 deaths increased as a share of deaths from all cause
White,65-84,13,11,NA
White,85+,14,6,NA
Hispanic,Under 25,3,4,Covid-19 deaths increased as a share of deaths from all cause
Hispanic,25-44,17,21,Covid-19 deaths increased as a share of deaths from all cause
Hispanic,45-64,33,26,NA
Hispanic,65-84,33,17,NA
Hispanic,85+,21,9,NA
Black,Under 25,1,3,Covid-19 deaths increased as a share of deaths from all cause
Black,25-44,7,13,Covid-19 deaths increased as a share of deaths from all cause
Black,45-64,15,17,Covid-19 deaths increased as a share of deaths from all cause
Black,65-84,20,12,Covid-19 deaths increased as a share of deaths from all cause
Black,85+,17,8,NA
Asian,Under 25,2,4,Covid-19 deaths increased as a share of deaths from all cause
Asian,25-44,12,14,Covid-19 deaths increased as a share of deaths from all cause
Asian,45-64,21,13,NA
Asian,65-84,23,8,NA
Asian,85+,17,4,NA") -> xdf

xdf %>% 
  mutate(
    before = before/100,
    after = after/100,
    age_group = fct_inorder(age_group),
    race = factor(race, levels = rev(c("Asian", "Black", "Hispanic", "White")))
  ) -> xdf

{

  ggplot( data = xdf) +
    geom_rect(
      data = xdf,
      aes(
        xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf,
        fill = cause
      ),
      alpha = 1/6, color = NA
    ) +
    geom_rect(
      data = xdf %>% 
        filter(
          (race == "White" & age_group %in% c("65-84", "85+")) |
            (race == "Hispanic" & age_group %in% c("45-64", "65-84", "85+")) |
            (race == "Black" & age_group %in% c("85+")) |
            (race == "Asian" & age_group %in% c("45-64", "65-84", "85+"))
        ),
      aes(
        xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = Inf
      ),
      fill = "#999999", alpha = 1/6, color = NA
    ) +
    geom_segment(
      aes(-Inf, xend = Inf, -Inf, yend= -Inf),
      size = 0.25, color = "black"
    ) +
    geom_segment(
      data = xdf, aes("1", before, xend="2", yend=after),
      size = 0.25
    ) +
    geom_point(
      data = xdf, aes("1", before), 
      fill = "#999999", color = "white", size = 2, stroke = 0.5, shape = 21
    ) +
    geom_point(
      data = xdf, aes("2", after),
      fill = "#bb271a", color = "white", size = 2, stroke = 0.5, shape = 21
    ) +
    geom_text(
      data = xdf,
      aes("1", before+0.05, label = scales::percent(before, 1)),
      color = "#999999", family = font_es_bold, fontface = "bold", size = 3
    ) +
    geom_text(
      data = xdf,
      aes("2", after+0.05, label = scales::percent(after, 1)),
      color = "#bb271a", family = font_es_bold, fontface = "bold", size = 3
    ) +
    scale_x_discrete(
      expand = c(0, 0),
      labels = c("<span style='color:#999999'>BEFORE</span>", "<span style='color:#bb271a'>AFTER</a>")
    ) +
    scale_y_percent(
      limits = c(-0.005, 0.405),
      breaks = c(-0.005, 0.1, 0.2, 0.3, 0.405),
      labels = c("", "", "", "", "40%\nof deaths from\nall causes for\nthis group")
    ) +
    scale_fill_manual(
      name = NULL,
      values = c("#bb271a"),
      na.translate = FALSE
    ) +
    coord_cartesian(clip = "off") +
    facet_wrap(
      facets = race~age_group, 
      scales = "free_x",
      labeller = \(labels, multi_line = TRUE){
        labels <- lapply(labels, as.character)
        labels[["race"]][c(1,2,4,5,6,7,9,10,11,12,14,15,16,17,19,20)] <- ""
        labels[["age_group"]] <- sprintf("<span style='font-style:normal;font-weight:normal;'>%s</span>", labels[["age_group"]])
        labels[["race"]][c(3,8,13,18)] <- sprintf("<span style='font-size:12pt;'>**%s**</span>", labels[["race"]][c(3,8,13,18)])
        labels
      }
    ) +
    labs(
      x = NULL, y = NULL,
      title = "Covid-19 deaths <span style='color:#999999'>before</span> and <span style='color:#bb271a'>after</span> universal adult vaccine eligibility",
      caption = "Source: Provisional weekly death data from the C.D.C. through Nov. 27. Note: Only the four largest racial and ethnic groups are included. Universal vaccine eligibility was April 19, the date when all adults in the United States were eligible for vaccination."
    ) +
    theme_ipsum_es(grid="Y", plot_title_size = 16) +
    theme(
      plot.title.position = "plot",
      plot.title = ggtext::element_markdown(hjust = 0.5),
      plot.caption = ggtext::element_textbox_simple(
        hjust = 0, size = 8.5, family = font_es, color = "#999999",
        margin = margin(t = 14)
      ),
      axis.ticks.x.bottom = ell(size = 0.25) ,
      axis.line.x.bottom = ell(lineend = "square", size = 0.25),
      axis.text.x.bottom = ggtext::element_markdown(size = 8, margin = margin(t = 6)),
      axis.text.y.left = elt(size = 8, vjust = 1, lineheight = 0.875,  color = "#999999"),
      strip.text.x = ggtext::element_markdown(hjust = 0.5, size = 10, family = font_es),
      strip.text = ggtext::element_markdown(hjust = 0.5, size = 10, family = font_es),
      panel.spacing.x = unit("40", "pt"),
      panel.spacing.y = unit(6, "pt"),
      panel.border = elb(),
      legend.position = "top"
    ) -> gg

  grid.newpage()
  grid.draw(
    gtable_filter_remove(
      x = ggplotGrob(gg),
      name = c(sprintf("axis-b-%d-1", 2:5), sprintf("axis-b-%d-2", 2:5), sprintf("axis-b-%d-3", 2:5), sprintf("axis-b-%d-4", 2:5))
    )
  )

}

{ggplot2} recreation of NYT plot

Cover image from Data-Driven Security
Amazon Author Page

4 Comments Starting 2022 Off With A Fairly Complex {ggplot2} Recreation Plot

  1. Pingback: Starting 2022 Off With A Fairly Complex {ggplot2} Recreation Plot – Data Science Austria

  2. Pingback: Starting 2022 Off With A Fairly Complex {ggplot2} Recreation Plot | R-bloggers

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.