18 Day 16: Places

18.1 Technologies/Techniques

  • Taking inspiration from Day 1 ({sf}, {ggplot2}, geom_sf())
  • Web scraping
  • More use of {rgeocodio}

18.2 Data Source: The Retail Apocalypse

I track a few different happenings in the world, one of which is the “retail apocalypse”55. I’ve been collecting this data for a bit, but we’ll use some scraping techniques to pull lists from Business Insider, too.

My first thought for “places” was “places impacted by something” and losing these resources and jobs is pretty tough on a region despite the “great” economy we’re supposed to be in.

library(sf)
library(rvest)
library(ggthemes)
library(stringi)
library(pdftools)
library(hrbrthemes)
library(rgeocodio)
library(albersusa)
library(magrittr)
library(tidyverse)

We’ll do the scraping first, along with some more

if (!file.exists(here::here("data/russe.rds"))) {
  russe_pg <- read_html("https://www.businessinsider.com/charlotte-russe-bankruptcy-stores-closing-list-2019-2")

  # the lists are actually in ul/li lists!
  
  html_nodes(russe_pg, xpath=".//p[contains(., 'of the closing')]/following-sibling::ul/li") %>%
    html_text() -> russe

  russe_g <- rgeocodio::gio_batch_geocode(russe)

  saveRDS(russe_g, here::here("data/russe.rds"))
  
}

russe_g <- readRDS(here::here("data/russe.rds"))

if (!file.exists(here::here("data/sears.rds"))) {
  
  # the lists are actually in ul/li lists here too!
  
  # but we need to do some XPath surgery to get them

  sears_pg <- read_html("https://www.businessinsider.com/sears-closes-80-more-stores-2018-12")
  html_nodes(sears_pg, xpath=".//span[contains(., 'of the latest')]/../../p") %>%
    html_text() %>%
    keep(stri_detect_regex, "^(Sears|Kmart)") %>%
    stri_replace_first_regex("^(Sears[\\*]*|Kmart)", "") %>%
    stri_trim_both() -> sears

  sears2_pg <- read_html("https://www.businessinsider.com/sears-kmart-stores-closing-list-2018-10")
  html_nodes(sears2_pg, xpath=".//h2[text()='Sears' or text()='Kmart']/following-sibling::ul/li ") %>%
    html_text() %>%
    stri_trim_both() -> sears2

  sears_g <- rgeocodio::gio_batch_geocode(c(sears, sears2))
  
  saveRDS(sears_g, here::here("data/sears.rds"))

}

sears_g <- readRDS(here::here("data/sears.rds"))

# some previously scraped/geolocated data

if (!all(file.exists(here::here("data", c("dressbarn.rds", "payless.rds"))))) {

  dressbarn <- as_tibble(jsonlite::stream_in(gzcon(url("https://rud.is/dl/dressbarn-locations.json.gz"))))
  
  payless <- read_csv("http://rud.is/dl/2019-payless-store-closings.csv")
  
  saveRDS(dressbarn, here::here("data/dressbarn.rds"))
  saveRDS(payless, here::here("data/payless.rds"))
  
}

dressbarn <- readRDS(here::here("data/dressbarn.rds"))
payless <- readRDS(here::here("data/payless.rds"))

Let’s smush them all together with some useful labels:

bind_rows(
  filter(sears_g, map_lgl(response_results, ~nrow(.x) > 0)) %>% # find all valid geolocated data
    mutate(ll = map(response_results, ~select(.x, location.lng, location.lat) %>% slice(1))) %>% # pick the first geolocation result
    select(ll) %>%
    unnest(ll) %>%
    set_names(c("lng", "lat")) %>%
    mutate(brand = "Sears/Kmart"),

# lather, rinse, repeat  

  filter(russe_g, map_lgl(response_results, ~nrow(.x) > 0)) %>%
    mutate(ll = map(response_results, ~select(.x, location.lng, location.lat) %>% slice(1))) %>%
    select(ll) %>%
    unnest(ll) %>%
    set_names(c("lng", "lat")) %>%
    mutate(brand = "Russe"),

  select(payless, lng = longitude, lat=latitude) %>%
    mutate(brand = "Payless"),

  select(dressbarn, lng = lon, lat) %>%
    mutate(brand = "Dressbarn")
) %>%
  filter(lng > -130, lat > 21) -> continental # only looking for the conterminus U.S. 

glimpse(continental)
## Observations: 3,091
## Variables: 3
## $ lng   <dbl> -71.47046, -110.84236, -121.24129, -117.21061, -118.31655, -120…
## $ lat   <dbl> 42.07650, 32.22220, 37.93758, 34.47053, 34.18766, 34.90751, 38.…
## $ brand <chr> "Sears/Kmart", "Sears/Kmart", "Sears/Kmart", "Sears/Kmart", "Se…

Now, we’ll turn our plain ol’ data frame into an {sf} object:

# Load up Albers composite USA at the same time, but just the conterminus U.S. 
usa <- usa_sf("laea") %>% filter(!(name %in% c("Alaska", "Hawaii")))

# make a spatial version of the data frame
st_as_sf(continental, coords = c("lng", "lat"), crs = us_longlat_proj) %>%
  st_transform(albersusa::us_laea_proj) -> continental

glimpse(continental)
## Observations: 3,091
## Variables: 2
## $ brand    <chr> "Sears/Kmart", "Sears/Kmart", "Sears/Kmart", "Sears/Kmart", …
## $ geometry <POINT [m]> POINT (2296208 82451.33), POINT (-1022963 -1353086), P…

18.3 Drawing the Map

Now, we’ll draw points, mapping brands to a color aesthetic to see what kinds of damage was done and which company was involved:

ggplot() +
  geom_sf(data = usa, fill = "#252525", size = 0.125, color = "#b2b2b277") +
  geom_sf(data = continental, aes(color = brand), size = 0.25, alpha = 1/3, show.legend = "point") +
  scale_color_tableau(name = NULL) +
  coord_sf(datum = NA) +
  guides(
    colour = guide_legend(
      override.aes = list(size = 2, alpha=1)
    )
  ) +
  labs(
    x = NULL, y = NULL,
    title = "Places of the 2019 Retail Apocalpyse",
    subtitle = "Locations of four major brands store closings in 2019 alone (~3,100 stores in total)",
    caption = "Data source: (various + {rgeocodio}) • #30DayMapChallenge"
  ) +
  theme_ft_rc(grid="") +
  theme(legend.position = c(0.5, 0.95)) +
  theme(legend.direction = "horizontal") +
  theme(axis.text = element_blank())

18.4 In Review

We told a short story with today’s challenge by showing the places across the U.S. that have been impacted in some way by the “retail apocalypse”. We also mapped aesthetics to points vs use facets.

18.5 Try This At Home

Dig into the data a bit and try to compute actual job and tax revenue impact and plot that on a different version fo the map.

Bin the data (perhaps using the hexbin technique from a previous challenge) and show impact by those bins.