library(rvest) library(tidyverse) sprintf( fmt = "https://journals.riverpublishers.com/index.php/JWE/issue/archive/%s", .x = 1:5 ) |> map(read_html) -> idx idx |> map(html_nodes, "h2 div.series") |> map(html_text, trim = TRUE) |> unlist() -> series idx |> map(html_nodes, "h2 a.title") |> map(html_attr, "href") |> unlist() -> issue_links issue_links |> map(read_html, .progress = TRUE) -> issues issues |> map(html_nodes, "h3.title a") |> map2(series, \(.x, .y) { data.frame( title = html_text(.x, trim=TRUE), link = html_attr(.x, "href"), series = as.integer(.y) ) }) -> articles issues |> map(html_nodes, "div.meta div.authors") |> map(html_text, trim=TRUE) |> map(\(.x) { data.frame( authors = .x ) }) -> authors map2(articles, authors, \(.x, .y) bind_cols(.x, .y)) |> list_rbind() |> as_tibble() |> select(series, title, link, authors) |> jsonlite::toJSON() |> writeLines("jwe-idx.json")