Skip navigation

WWDC 2021 is on this week and many new fun things are being introduced, including some data science-friendly additions to the frameworks that come with Xcode 13 and available on macOS 12+ (and its *OS cousins).

Specifically, Apple has made tabular data a first-class citizen with the new TabularData app service.

A future post will have some more expository, but here’s a sample of core operations including:

  • reading in tabular data from CSV or JSON
  • examining the structure
  • working with columns and/or rows
  • grouping and filtering operations
  • transforming and removing columns

I’ve tagged this with rstats as there are R equivalents included for each operation so R folks can translate any Swift code they see in the future.

import TabularData

// define some basic formatting options for data frame output
let dOpts = FormattingOptions(maximumLineWidth: 80, maximumCellWidth: 10, maximumRowCount: 20, includesColumnTypes: true)

// read in a CSV file
// R: xdf <- read.csv("mtcars.csv")
var xdf = try! DataFrame.init(contentsOfCSVFile: URL(fileURLWithPath: "mtcars.csv"))

// take a look at it
// R: print(xdf) # no more print() in further R equivalents; just assume interactive or wrap with print
print(xdf.description(options: dOpts))

┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0  │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 1  │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 2  │ 22.8     │ 4     │ 108.0    │ 93    │ 3.85     │ 2.32     │      ┆
│ 3  │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
│ 4  │ 18.7     │ 8     │ 360.0    │ 175   │ 3.15     │ 3.44     │      ┆
│ 5  │ 18.1     │ 6     │ 225.0    │ 105   │ 2.76     │ 3.46     │      ┆
│ 6  │ 14.3     │ 8     │ 360.0    │ 245   │ 3.21     │ 3.57     │      ┆
│ 7  │ 24.4     │ 4     │ 146.7    │ 62    │ 3.69     │ 3.19     │      ┆
│ 8  │ 22.8     │ 4     │ 140.8    │ 95    │ 3.92     │ 3.15     │      ┆
│ 9  │ 19.2     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 10 │ 17.8     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 11 │ 16.4     │ 8     │ 275.8    │ 180   │ 3.07     │ 4.07     │      ┆
│ 12 │ 17.3     │ 8     │ 275.8    │ 180   │ 3.07     │ 3.73     │      ┆
│ 13 │ 15.2     │ 8     │ 275.8    │ 180   │ 3.07     │ 3.78     │      ┆
│ 14 │ 10.4     │ 8     │ 472.0    │ 205   │ 2.93     │ 5.25     │      ┆
│ 15 │ 10.4     │ 8     │ 460.0    │ 215   │ 3.0      │ 5.424    │      ┆
│ 16 │ 14.7     │ 8     │ 440.0    │ 230   │ 3.23     │ 5.345    │      ┆
│ 17 │ 32.4     │ 4     │ 78.7     │ 66    │ 4.08     │ 2.2      │      ┆
│ 18 │ 30.4     │ 4     │ 75.7     │ 52    │ 4.93     │ 1.615    │      ┆
│ 19 │ 33.9     │ 4     │ 71.1     │ 65    │ 4.22     │ 1.835    │      ┆
┢╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 12 more                                                               ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

// dimensions
// R: dim(xdf)
print(xdf.shape)

(rows: 32, columns: 11)

// head
// R: head(xdf)
print(xdf.prefix(5).description(options: dOpts))

┏━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃   ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 1 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 2 │ 22.8     │ 4     │ 108.0    │ 93    │ 3.85     │ 2.32     │      ┆
│ 3 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
│ 4 │ 18.7     │ 8     │ 360.0    │ 175   │ 3.15     │ 3.44     │      ┆
└───┴──────────┴───────┴──────────┴───────┴──────────┴──────────┴╌╌╌╌╌╌┘

// tail
// R: tail(xdf)
print(xdf.suffix(5).description(options: dOpts))

┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 27 │ 30.4     │ 4     │ 95.1     │ 113   │ 3.77     │ 1.513    │      ┆
│ 28 │ 15.8     │ 8     │ 351.0    │ 264   │ 4.22     │ 3.17     │      ┆
│ 29 │ 19.7     │ 6     │ 145.0    │ 175   │ 3.62     │ 2.77     │      ┆
│ 30 │ 15.0     │ 8     │ 301.0    │ 335   │ 3.54     │ 3.57     │      ┆
│ 31 │ 21.4     │ 4     │ 121.0    │ 109   │ 4.11     │ 2.78     │      ┆
└────┴──────────┴───────┴──────────┴───────┴──────────┴──────────┴╌╌╌╌╌╌┘

// column summaries
// summary(xdf)
print(xdf.summaryOfAllColumns().description(options: dOpts))

┏━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ count(mpg) ┃ uniqueCou… ┃ top(mpg) ┃ topFreque… ┃ count(cyl) ┃ 39   ┇
┃   ┃ <Int>      ┃ <Int>      ┃ <Double> ┃ <Int>      ┃ <Int>      ┃ more ┇
┡━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 32         │ 25         │ 21.4     │ 2          │ 32         │      ┆
└───┴────────────┴────────────┴──────────┴────────────┴────────────┴╌╌╌╌╌╌┘

// sort it
// R: library(tidyverse) # assume this going forward for R examples
// R: arrange(xdf, cyl)
xdf.sort(on: "cyl")

print(xdf.description(options: dOpts))

┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0  │ 22.8     │ 4     │ 108.0    │ 93    │ 3.85     │ 2.32     │      ┆
│ 1  │ 24.4     │ 4     │ 146.7    │ 62    │ 3.69     │ 3.19     │      ┆
│ 2  │ 22.8     │ 4     │ 140.8    │ 95    │ 3.92     │ 3.15     │      ┆
│ 3  │ 32.4     │ 4     │ 78.7     │ 66    │ 4.08     │ 2.2      │      ┆
│ 4  │ 30.4     │ 4     │ 75.7     │ 52    │ 4.93     │ 1.615    │      ┆
│ 5  │ 33.9     │ 4     │ 71.1     │ 65    │ 4.22     │ 1.835    │      ┆
│ 6  │ 21.5     │ 4     │ 120.1    │ 97    │ 3.7      │ 2.465    │      ┆
│ 7  │ 27.3     │ 4     │ 79.0     │ 66    │ 4.08     │ 1.935    │      ┆
│ 8  │ 26.0     │ 4     │ 120.3    │ 91    │ 4.43     │ 2.14     │      ┆
│ 9  │ 30.4     │ 4     │ 95.1     │ 113   │ 3.77     │ 1.513    │      ┆
│ 10 │ 21.4     │ 4     │ 121.0    │ 109   │ 4.11     │ 2.78     │      ┆
│ 11 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 12 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 13 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
│ 14 │ 18.1     │ 6     │ 225.0    │ 105   │ 2.76     │ 3.46     │      ┆
│ 15 │ 19.2     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 16 │ 17.8     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 17 │ 19.7     │ 6     │ 145.0    │ 175   │ 3.62     │ 2.77     │      ┆
│ 18 │ 18.7     │ 8     │ 360.0    │ 175   │ 3.15     │ 3.44     │      ┆
│ 19 │ 14.3     │ 8     │ 360.0    │ 245   │ 3.21     │ 3.57     │      ┆
┢╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 12 more                                                               ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

// read in a JSON File
// R: xdf2 <- jsonlite::fromJSON("mtcars.json")
var xdf2 = try! DataFrame.init(contentsOfJSONFile: URL(fileURLWithPath: "mtcars.json"))

// bind the rows together
// R: xdf <- bind_rows(xdf, xdf2)
xdf.append(xdf2)

// get the new summary
// R: summary(xdf)
print(xdf.summaryOfAllColumns().description(options: dOpts))

┏━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ count(mpg) ┃ uniqueCou… ┃ top(mpg) ┃ topFreque… ┃ count(cyl) ┃ 39   ┇
┃   ┃ <Int>      ┃ <Int>      ┃ <Double> ┃ <Int>      ┃ <Int>      ┃ more ┇
┡━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 64         │ 25         │ 21.4     │ 4          │ 64         │      ┆
└───┴────────────┴────────────┴──────────┴────────────┴────────────┴╌╌╌╌╌╌┘

// basic filtering
// R: xdf.filter(cyl == 6)
print( xdf.filter(on: "cyl", Int.self) { (val) in val == 6 } )

┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 11 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 12 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 13 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
│ 14 │ 18.1     │ 6     │ 225.0    │ 105   │ 2.76     │ 3.46     │      ┆
│ 15 │ 19.2     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 16 │ 17.8     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 17 │ 19.7     │ 6     │ 145.0    │ 175   │ 3.62     │ 2.77     │      ┆
│ 32 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 33 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 35 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
┢╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 4 more                                                                ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

// group by a column
// R: group_by(xdf, cyl)
print(xdf.grouped(by: "cyl"))

4
┏━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃   ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 22.8     │ 4     │ 108.0    │ 93    │ 3.85     │ 2.32     │      ┆
│ 1 │ 24.4     │ 4     │ 146.7    │ 62    │ 3.69     │ 3.19     │      ┆
│ 2 │ 22.8     │ 4     │ 140.8    │ 95    │ 3.92     │ 3.15     │      ┆
│ 3 │ 32.4     │ 4     │ 78.7     │ 66    │ 4.08     │ 2.2      │      ┆
│ 4 │ 30.4     │ 4     │ 75.7     │ 52    │ 4.93     │ 1.615    │      ┆
│ 5 │ 33.9     │ 4     │ 71.1     │ 65    │ 4.22     │ 1.835    │      ┆
│ 6 │ 21.5     │ 4     │ 120.1    │ 97    │ 3.7      │ 2.465    │      ┆
│ 7 │ 27.3     │ 4     │ 79.0     │ 66    │ 4.08     │ 1.935    │      ┆
│ 8 │ 26.0     │ 4     │ 120.3    │ 91    │ 4.43     │ 2.14     │      ┆
│ 9 │ 30.4     │ 4     │ 95.1     │ 113   │ 3.77     │ 1.513    │      ┆
┢╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 12 more                                                              ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

6
┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 11 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 12 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 13 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
│ 14 │ 18.1     │ 6     │ 225.0    │ 105   │ 2.76     │ 3.46     │      ┆
│ 15 │ 19.2     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 16 │ 17.8     │ 6     │ 167.6    │ 123   │ 3.92     │ 3.44     │      ┆
│ 17 │ 19.7     │ 6     │ 145.0    │ 175   │ 3.62     │ 2.77     │      ┆
│ 32 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.62     │      ┆
│ 33 │ 21.0     │ 6     │ 160.0    │ 110   │ 3.9      │ 2.875    │      ┆
│ 35 │ 21.4     │ 6     │ 258.0    │ 110   │ 3.08     │ 3.215    │      ┆
┢╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 4 more                                                                ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

8
┏━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ mpg      ┃ cyl   ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ 5    ┇
┃    ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 18 │ 18.7     │ 8     │ 360.0    │ 175   │ 3.15     │ 3.44     │      ┆
│ 19 │ 14.3     │ 8     │ 360.0    │ 245   │ 3.21     │ 3.57     │      ┆
│ 20 │ 16.4     │ 8     │ 275.8    │ 180   │ 3.07     │ 4.07     │      ┆
│ 21 │ 17.3     │ 8     │ 275.8    │ 180   │ 3.07     │ 3.73     │      ┆
│ 22 │ 15.2     │ 8     │ 275.8    │ 180   │ 3.07     │ 3.78     │      ┆
│ 23 │ 10.4     │ 8     │ 472.0    │ 205   │ 2.93     │ 5.25     │      ┆
│ 24 │ 10.4     │ 8     │ 460.0    │ 215   │ 3.0      │ 5.424    │      ┆
│ 25 │ 14.7     │ 8     │ 440.0    │ 230   │ 3.23     │ 5.345    │      ┆
│ 26 │ 15.5     │ 8     │ 318.0    │ 150   │ 2.76     │ 3.52     │      ┆
│ 27 │ 15.2     │ 8     │ 304.0    │ 150   │ 3.15     │ 3.435    │      ┆
┢╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 18 more                                                               ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

// number of groups
// R: group_by(xdf, cyl) %>% group_keys() %>% nrow()
print(xdf.grouped(by: "cyl").count)

3

// group, manipulate (in this case, filter), and re-combine
// R: group_by(xdf) %>% filter(mpg < 20) %>% ungroup()
print(
  xdf.grouped(by: "cyl").mapGroups { (val) in
    val.filter(on: "mpg", Double.self) { (val) in val! < 20 }.base
  }.ungrouped()
)

┏━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ mpg      ┃ disp     ┃ hp    ┃ drat     ┃ wt       ┃ qsec     ┃ 5    ┇
┃   ┃ <Double> ┃ <Double> ┃ <Int> ┃ <Double> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 22.8     │ 108.0    │ 93    │ 3.85     │ 2.32     │ 18.61    │      ┆
│ 1 │ 24.4     │ 146.7    │ 62    │ 3.69     │ 3.19     │ 20.0     │      ┆
│ 2 │ 22.8     │ 140.8    │ 95    │ 3.92     │ 3.15     │ 22.9     │      ┆
│ 3 │ 32.4     │ 78.7     │ 66    │ 4.08     │ 2.2      │ 19.47    │      ┆
│ 4 │ 30.4     │ 75.7     │ 52    │ 4.93     │ 1.615    │ 18.52    │      ┆
│ 5 │ 33.9     │ 71.1     │ 65    │ 4.22     │ 1.835    │ 19.9     │      ┆
│ 6 │ 21.5     │ 120.1    │ 97    │ 3.7      │ 2.465    │ 20.01    │      ┆
│ 7 │ 27.3     │ 79.0     │ 66    │ 4.08     │ 1.935    │ 18.9     │      ┆
│ 8 │ 26.0     │ 120.3    │ 91    │ 4.43     │ 2.14     │ 16.7     │      ┆
│ 9 │ 30.4     │ 95.1     │ 113   │ 3.77     │ 1.513    │ 16.9     │      ┆
┢╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 182 more                                                                ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛

// look at one column
// R: xdf$cyl
print( xdf["cyl"] )

┏━━━━━━━┓
┃ cyl   ┃
┃ <Int> ┃
┡━━━━━━━┩
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
│ 4     │
┢╍╍╍╍╍╍╍┪
┇ 54 m… ┇
┗╍╍╍╍╍╍╍┛

// combine two columns and look at it
// R: mutate(xdf, cyl_mpg = sprintf("%s:%s", cyl, mpg) %>% select(-cyl, -mpg)
// R: unite(xdf, cyl_mpg, cyl, mpg, sep = ":") # alternate way
xdf.combineColumns("cyl", "mpg", into: "cyl_mpg") { (val1: Int?, val2: Double?) -> String in
  String(val1 ?? 0) + ":" + String(val2 ?? 0.0)
}

print(xdf["cyl_mpg"])

┏━━━━━━━━━━┓
┃ cyl_mpg  ┃
┃ <String> ┃
┡━━━━━━━━━━┩
│ 4:22.8   │
│ 4:24.4   │
│ 4:22.8   │
│ 4:32.4   │
│ 4:30.4   │
│ 4:33.9   │
│ 4:21.5   │
│ 4:27.3   │
│ 4:26.0   │
│ 4:30.4   │
┢╍╍╍╍╍╍╍╍╍╍┪
┇ 54 more  ┇
┗╍╍╍╍╍╍╍╍╍╍┛

// look at the colnames (^^ removes "cyl" and "mpg"
// R: colnames(xdf)
print(xdf.columns.map{ col in col.name })

["cyl_mpg", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb"]

// turn an Int into a Double
// R: xdf$hp <- as.double(xdf$hp) # or use dplyr::mutate()
xdf.transformColumn("hp") { (val1: Int?) -> Double? in
  Double(val1 ?? 0)
}

print(xdf["hp"])

┏━━━━━━━━━━┓
┃ hp       ┃
┃ <Double> ┃
┡━━━━━━━━━━┩
│ 93.0     │
│ 62.0     │
│ 95.0     │
│ 66.0     │
│ 52.0     │
│ 65.0     │
│ 97.0     │
│ 66.0     │
│ 91.0     │
│ 113.0    │
┢╍╍╍╍╍╍╍╍╍╍┪
┇ 54 more  ┇
┗╍╍╍╍╍╍╍╍╍╍┛

// look at the coltypes
// R: sapply(mtcars, typeof)
print(xdf.columns.map{ col in col.wrappedElementType })

[Swift.String, Swift.Double, Swift.Double, Swift.Double, Swift.Double, Swift.Double, Swift.Int, Swift.Int, Swift.Int, Swift.Int]

// distinct horsepower
// R: distinct(xdf, hp)
print(xdf["hp"].distinct())

┏━━━━━━━━━━┓
┃ hp       ┃
┃ <Double> ┃
┡━━━━━━━━━━┩
│ 93.0     │
│ 62.0     │
│ 95.0     │
│ 66.0     │
│ 52.0     │
│ 65.0     │
│ 97.0     │
│ 91.0     │
│ 113.0    │
│ 109.0    │
┢╍╍╍╍╍╍╍╍╍╍┪
┇ 12 more  ┇
┗╍╍╍╍╍╍╍╍╍╍┛

// row slices
// R: xdf[10,]
print(xdf.rows[10])

┏━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃    ┃ cyl_mpg  ┃ disp     ┃ hp       ┃ drat     ┃ wt       ┃ qsec     ┃ 4    ┇
┃    ┃ <String> ┃ <Double> ┃ <Double> ┃ <Double> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 10 │ 4:21.4   │ 121.0    │ 109.0    │ 4.11     │ 2.78     │ 18.6     │      ┆
└────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴╌╌╌╌╌╌┘

// R: xdf[3:10,]
print(xdf.rows[3...10])

Rows(base: 
┏━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳╍╍╍╍╍╍┓
┃   ┃ cyl_mpg  ┃ disp     ┃ hp       ┃ drat     ┃ wt       ┃ qsec     ┃ 4    ┇
┃   ┃ <String> ┃ <Double> ┃ <Double> ┃ <Double> ┃ <Double> ┃ <Double> ┃ more ┇
┡━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇╍╍╍╍╍╍┩
│ 0 │ 4:22.8   │ 108.0    │ 93.0     │ 3.85     │ 2.32     │ 18.61    │      ┆
│ 1 │ 4:24.4   │ 146.7    │ 62.0     │ 3.69     │ 3.19     │ 20.0     │      ┆
│ 2 │ 4:22.8   │ 140.8    │ 95.0     │ 3.92     │ 3.15     │ 22.9     │      ┆
│ 3 │ 4:32.4   │ 78.7     │ 66.0     │ 4.08     │ 2.2      │ 19.47    │      ┆
│ 4 │ 4:30.4   │ 75.7     │ 52.0     │ 4.93     │ 1.615    │ 18.52    │      ┆
│ 5 │ 4:33.9   │ 71.1     │ 65.0     │ 4.22     │ 1.835    │ 19.9     │      ┆
│ 6 │ 4:21.5   │ 120.1    │ 97.0     │ 3.7      │ 2.465    │ 20.01    │      ┆
│ 7 │ 4:27.3   │ 79.0     │ 66.0     │ 4.08     │ 1.935    │ 18.9     │      ┆
│ 8 │ 4:26.0   │ 120.3    │ 91.0     │ 4.43     │ 2.14     │ 16.7     │      ┆
│ 9 │ 4:30.4   │ 95.1     │ 113.0    │ 3.77     │ 1.513    │ 16.9     │      ┆
┢╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍╍╍╍╍┷╍╍╍╍╍╍┪
┇ 54 more                                                                    ┇
┗╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍╍┛
, subranges: _RangeSet(3..<11))

My archinfo utility is a small macOS command line application that displays process id, name, and architecture (arm64 or x86_64).

Version 0.4.0 adds the following capabilities:

  • Added --x86 (-x short option) to have archinfo only show x86_64 processes
  • Added --arm (-a short option) to have archinfo only show arm64 processes
  • Added --basename (-b short option) to havearchinfo` only show basenames of running processes vs the full path

The key driver for this update was to make it easier to only see the remaining, non-universal binary or ARM-only laggards.

Example runs with the new options:

$ archinfo --x86 --basename
  42801 x86_64 QtWebEngineProcess
  40408 x86_64 QtWebEngineProcess
  40378 x86_64 RStudio
  56426 x86_64 gpg-agent
   4244 x86_64 TwitchStudioStreamDeck
   4243 x86_64 QtWebEngineProcess
   4213 x86_64 QtWebEngineProcess
   4173 x86_64 kbfs
   4155 x86_64 keybase
   4140 x86_64 updater
   4065 x86_64 Keybase Helper (Renderer)
   4057 x86_64 Keybase Helper (Renderer)
   4056 x86_64 Keybase Helper
   4053 x86_64 Keybase Helper (GPU)
   4045 x86_64 krisp
   4043 x86_64 Keybase
   4035 x86_64 AdobeCRDaemon
   4033 x86_64 AdobeIPCBroker
   3993 x86_64 Elgato Control Center
   3971 x86_64 Stream Deck
   3959 x86_64 LogiVCCoreService
$ archinfo --x86 --basename --json
{"pid":42801,"arch":"x86_64","name":"QtWebEngineProcess"}
{"pid":40408,"arch":"x86_64","name":"QtWebEngineProcess"}
{"pid":40378,"arch":"x86_64","name":"RStudio"}
{"pid":56426,"arch":"x86_64","name":"gpg-agent"}
{"pid":4244,"arch":"x86_64","name":"TwitchStudioStreamDeck"}
{"pid":4243,"arch":"x86_64","name":"QtWebEngineProcess"}
{"pid":4213,"arch":"x86_64","name":"QtWebEngineProcess"}
{"pid":4173,"arch":"x86_64","name":"kbfs"}
{"pid":4155,"arch":"x86_64","name":"keybase"}
{"pid":4140,"arch":"x86_64","name":"updater"}
{"pid":4065,"arch":"x86_64","name":"Keybase Helper (Renderer)"}
{"pid":4057,"arch":"x86_64","name":"Keybase Helper (Renderer)"}
{"pid":4056,"arch":"x86_64","name":"Keybase Helper"}
{"pid":4053,"arch":"x86_64","name":"Keybase Helper (GPU)"}
{"pid":4045,"arch":"x86_64","name":"krisp"}
{"pid":4043,"arch":"x86_64","name":"Keybase"}
{"pid":4035,"arch":"x86_64","name":"AdobeCRDaemon"}
{"pid":4033,"arch":"x86_64","name":"AdobeIPCBroker"}
{"pid":3993,"arch":"x86_64","name":"Elgato Control Center"}
{"pid":3971,"arch":"x86_64","name":"Stream Deck"}
{"pid":3959,"arch":"x86_64","name":"LogiVCCoreService"}

You can find signed binaries and source code on the Releases page.

My {cdcfluview} package started tossing erros on CRAN just over a week ago when the CDC added an extra parameter to one of the hidden API endpoints that the package wraps. After a fairly hectic set of days since said NOTE came, I had time this morning to poke at a fix. There are alot of tests, so after successful debugging session I was awaiting CRAN checks on various remotes as well as README builds and figured I’d keep up some practice with another, nascent, package of mine, {swiftr}, which makes it dead simple to build R functions from Swift code, in similar fashion to what Rcpp::cppFunction() does for C/C++ code.

macOS comes with a full set of machine learning/AI libraries/frameworks that definitely have “batteries included” (i.e. you can almost just make one function call to get 90-95% what you want without even training new models). One of which is text extraction from Apple’s computer Vision framework. I thought it’d be a fun and quick “wait mode” distraction to wrap the VNRecognizeTextRequest() function and use it from R.

To show how capable the default model is, I pulled a semi-complex random image from DDG’s image search:

Yellow street signs against clear blue sky pointing different directions. Each plate on the street sign has a specific term like unsure, muddled, coonfused and so on. Dilemma and confusion concept. horizontal composition with copy space. Clipping path is included.

Let’s build the function (you need to be on macOS for this; exposition inine):

library(swiftr) # github.com/hrbrmstr/swiftr

swift_function(
  code = '
import Foundation
import CoreImage
import Cocoa
import Vision

@_cdecl ("detect_text")
public func detect_text(path: SEXP) -> SEXP {

   // turn R string into Swift String so we can use it
   let fileName = String(cString: R_CHAR(STRING_ELT(path, 0)))

   var res: String = ""
   var out: SEXP = R_NilValue

  // get image into the right format
  if let ciImage = CIImage(contentsOf: URL(fileURLWithPath:fileName)) {

    let context = CIContext(options: nil)
    if let img = context.createCGImage(ciImage, from: ciImage.extent) {

      // setup comptuer vision request
      let requestHandler = VNImageRequestHandler(cgImage: img)

      // start recognition
      let request = VNRecognizeTextRequest()
      do {
        try requestHandler.perform([request])

        // if we have results
        if let observations = request.results as? [VNRecognizedTextObservation] {

          // paste them together
          let recognizedStrings = observations.compactMap { observation in
            observation.topCandidates(1).first?.string
          }
          res = recognizedStrings.joined(separator: "\\n")
        }
      } catch {
        debugPrint("\\(error)")
      }
    }
  }

  res.withCString { cstr in out = Rf_mkString(cstr) }

  return(out)
}
')

The detect_text() is now available in R, so let’s see how it performs on that image of signs:

detect_text(path.expand("~/Data/signs.jpeg")) %>% 
  stringi::stri_split_lines() %>% 
  unlist()
##  [1] "BEWILDERED" "UNCLEAR"    "nAZEU"      "UNCERTAIN"  "VISA"       "INSURE"    
##  [7] "ATED"       "MUDDLED"    "LOsT"       "DISTRACTED" "PERPLEXED"  "CONFUSED"  
## [13] "PUZZLED" 

It works super-fast and gets far more correct than I would have expected.

Toy examples aside, it also works pretty well (as one would expect) on “real” text images, such as this example from the Tesseract test suite:

tesseract project newspaper clipping example text image

detect_text(path.expand("~/Data/tesseract/news.3B/0/8200_006.3B.tif")) %>% 
  stringi::stri_split_lines() %>% 
  unlist()
##  [1] "Tobacco chiefs still refuse to see the truth abou"                           
##  [2] "even of America's least conscionable"                                        
##  [3] "The tobacco industry would like to promote"                                  
##  [4] "men sat together in Washington last"                                         
##  [5] "under the conditions they are used.'"                                        
##  [6] "week to do what they do best: blow"                                          
##  [7] "the specter of prohibition."                                                 
##  [8] "panel\" of toxicologists as \"not hazardous"                                 
##  [9] "smoke at the truth about cigarettes."                                        
## [10] "'If cigarettes are too dangerous to be sold,"                                
## [11] "then ban them. Some smokers will obey the"                                   
## [12] "People not paid by the tobacco companies"                                    
## [13] "aren't so sure. The list includes several"                                   
## [14] "The CEOs of the nation's largest tobacco"                                    
## [15] "firms told congressional panel that nicotine"                                
## [16] "law, but many will not. People will be selling"                              
## [17] "iS not addictive, that they are unconvinced"                                 
## [18] "cigarettes out of the trunks of cars, cigarettes"                            
## [19] "substances the government does not allow in"                                 
## [20] "foods or classifies as potentially toxic. They"                              
## [21] "that smoking causes lung cancer or any other"                                
## [22] "made by who knows who, made of who knows include ammonia, a pesticide called"
## [23] "illness, and that smoking is no more harmful"                                
## [24] "what,\" said James Johnston of R.J. Reynolds."                               
## [25] "than drinking coffee or eating Twinkies."                                    
## [26] "It's a ruse. He knows cigarettes are not"                                    
## [27] "methoprene, and ethyl furoate, which has"                                    
## [28] "They said these things with straight taces."                                 
## [29] "going to be banned, at leasi not in his lifetime."                           
## [30] "caused liver damage in rats."                                                
## [31] "The list \"begs a number of important"                                       
## [32] "They said them in the face of massive"                                       
## [33] "STEVE WILSON"                                                                
## [34] "What he really fears are new taxes, stronger"                                
## [35] "questions about the safety of these additives,\""                            
## [36] "scientific evidence that smoking is responsible"                             
## [37] "anti-smoking campaigns, further smoking"                                     
## [38] "said a joint statement from the American"                                    
## [39] "for more than 400,000 deaths every year."                                    
## [40] "restrictions, limits on secondhand smoke and"                                
## [41] "Rep. Henry Waxman, D-Calif., put that"                                       
## [42] "Republic Columnist"                                                          
## [43] "Lung, Cancer and Heart associations. The"                                    
## [44] "limits on tar and nicotine."                                                 
## [45] "statement added that substances safe to eat"                                 
## [46] "frightful statistic another way:"                                            
## [47] "Collectively, these steps can accelerate the"                                
## [48] "\"Imagine our nation's outrage if two fully"                                 
## [49] "He and the others played dumb for the"                                       
## [50] "current 5 percent annual decline in cigarette"                               
## [51] "aren't necessarily safe to inhale."                                          
## [52] "The 50-page list can be obtained free by"                                    
## [53] "loaded jumbo jets crashed each day, killing all"                             
## [54] "entire six hours, but really didn't matter."                                 
## [55] "use and turn the tobacco business from highly"                               
## [56] "calling 1-800-852-8749."                                                     
## [57] "aboard. That's the same number of Americans"                                 
## [58] "The game i nearly over, and the tobacco"                                     
## [59] "profitable to depressed."                                                    
## [60] "Johnson's comment about cigarettes \"made"                                   
## [61] "Here are just the 44 ingredients that start"                                 
## [62] "that cigarettes kill every 24 hours.'"                                       
## [63] "executives know it."                                                         
## [64] "with the letter \"A\":"                                                      
## [65] "The CEOs were not impressed."                                                
## [66] "The hearing marked a turning point in the"                                   
## [67] "of who knows what\" was comical."                                            
## [68] "Acetanisole, acetic acid, acetoin,"                                          
## [69] "\"We have looked at the data."                                               
## [70] "It does"                                                                     
## [71] "nation's growing aversion to cigarettes. No"                                 
## [72] "The day before the hearing, the tobacco"                                     
## [73] "acetophenone,6-acetoxydihydrotheaspirane,"                                   
## [74] "not convince me that smoking causes death,\""                                
## [75] "2-acetyl-3-ethylpyrazine, 2-acetyl-5-"                                       
## [76] "said Andrew Tisch of the Lorillard Tobacco"                                  
## [77] "longer hamstrung by tobacco-state seniority"                                 
## [78] "companies released a long-secret list of 599"                                
## [79] "Co."                                                                         
## [80] "and the deep-pocketed tobacco lobby,"                                        
## [81] "methylfuran, acetylpyrazine, 2-acetylpyridine,"                              
## [82] "Congress is taking aim at cigarette makers."                                 
## [83] "additives used in cigarettes. The companies"                                 
## [84] "said all are certified by an \"independent"                                  
## [85] "3-acetylpyridine, 2-acetylthiazole, aconitic"   

(You can compare that on your own with the Tesseract results.)

FIN

{cdcfluview} checks are done, and the fixed functions are back on CRAN! Just in time to close out this post.

If you’re on macOS, definitely check out the various ML/AI frameworks Apple has to offer via Swift and have some fun playing with integrating them into R (or build some small, command line utilities if you want to keep Swift and R apart).

The fine folks over at @ObservableHQ released a new javascript exploratory visualization library called Plot last week with great fanfare. It was primarily designed to be used in Observable notebooks and I quickly tested it out there (you can find them at my Observable landing page: https://observablehq.com/@hrbrmstr).

{Plot} doesn’t require Observable, however, and I threw together a small example that dynamically tracks U.S. airline passenger counts by the TSA to demonstrate how to use it in a plain web page.

It’s small enough that I can re-create it here:

TSA Total Traveler Throughput 2021 vs 2020 vs 2019 (same weekday)


and include the (lightly annotated) source:

fetch(
"https://observable-cors.glitch.me/https://www.tsa.gov/coronavirus/passenger-throughput",
{
  cache: "no-store",
  mode: "cors",
  redirect: "follow"
}
)
.then((response) => response.text()) // we get the text here
.then((html) => {

   var parser = new DOMParser();
   var doc = parser.parseFromString(html, "text/html"); // we turn it into DOM elements here

   // some helpers to make the code less crufty
   // first a function to make proper dates

   var as_date = d3.timeParse("%m/%d/%Y");

   // and, now, a little function to pull a specific <table> column and
   // convert it to a proper numeric array. I would have put this inline
   // if we were only converting one column but there are three of them,
   // so it makes sense to functionize it.

   var col_double = (col_num) => {
     return Array.from(
     doc.querySelectorAll(`table tbody tr td:nth-child(${col_num})`)
     ).map((d) => parseFloat(d.innerText.trim().replace(/,/g, "")));
   };

   // build an arquero table from the scraped columns

   var flights = aq
         .table({
            day: Array.from(
                   doc.querySelectorAll("table tbody tr td:nth-child(1)")
                 ).map((d) => as_date(d.innerText.trim().replace(/.{4}$/g, "2021"))),
            y2021: col_double(2),
            y2020: col_double(3),
            y2019: col_double(4)
        })
        .orderby("day")
        .objects()
        .filter((d) => !isNaN(d.y2021))

   document.getElementById('vis').appendChild(
     Plot.plot({
       marginLeft: 80,
       x: {
         grid: true
       },
       y: {
         grid: true,
         label: "# Passengers"
       },
       marks: [
         Plot.line(flights, { x: "day", y: "y2019", stroke: "#4E79A7" }),
         Plot.line(flights, { x: "day", y: "y2020", stroke: "#F28E2B" }),
         Plot.line(flights, { x: "day", y: "y2021", stroke: "#E15759" })
       ]
    })
  );

})
.catch((err) => err)

FIN

I’ll likely do a more in-depth piece on Plot in the coming weeks (today is Mother’s Dayin the U.S. and that’s going to consume most of my attention today), but I highly encourage y’all to play with this new, fun tool.

On or about Friday evening (May 7, 2021) Edge notified me that the Feedly Mini extension (one of the only extensions I use as extensions are dangerous things) was remove from the store due to “malware”.

Feedly is used by many newshounds, and with 2021 being a very bad year when it comes to supply-chain attacks, seeing a notice about malware in a very popular Chrome extension is more than a little distressing.

I’m posting this blog to get the word “malware” associated with “Feedly” so they are compelled to make some sort of statement. I’ll update it with more information as it is provided.

Rather than continue to generate daily images with R, I threw together an Observable notebook that takes advantage of the CDC COVID-19 county data datasette (provided by Simon Willison) and the new {Plot} library (by the @ObservableHQ team) that enables users to interactively see the daily county resident vaccination “series complete” percentage distribution.

The full notebook is here — https://observablehq.com/@hrbrmstr/us-county-covid-vaccination-progress — and the interactive visualization is embedded below (though it doesn’t support “dark mode” well):

There’s a semi-infrequent-but-frequent-enough-to-be-annoying manual task at $DAYJOB that involves extracting a particular set of strings (identifiable by a fairly benign set of regular expressions) from various interactive text sources (so, not static documents or documents easily scrape-able).

Rather than hack something onto Sublime Text or VS Code I made a small macOS app in SwiftUI that does the extraction when something is pasted.

It occurred to me that this would work for indicators of compromise (IoCs) — because why not add one more to the 5 billion of them on GitHub — and I forked my app, removed all the $WORK bits and added in some code to do just this, and unimaginatively dubbed it extractor. Here’s the main view:

macOS GUI window showing the extractor main view

For now, extractor handles identifying & extracting CIDRs, IPv4s, URLs, hostnames, and email addresses (file issues if you really want hashes, CVE strings or other types) either from:

  • an input URL (it fetches the content and extracts IoCs from the rendered HTML, not the HTML source);
  • items pasted into the textbox (more on some SwiftUI 2 foibles regarding that in a bit); and
  • PDF, HTML, and text files (via Open / ⌘-o)

Here it is extracting IoCs from one of FireEye’s “solarwinds”-related posts:

macOS GUI window showing extracted IoCs from a blog post

If you tick the “Monitor Pasteboard” toggle, the app will monitor the all system-wide additions to the pasteboard, extract the IoCs from them and put them in the textbox. (I think I really need to make this additive to the text in the textbox vs replacing what’s there).

You can save the indicators out to a text file (via Save / ⌘-s) or just copy them from the text box (if you want ndjson or some threat indicator sharing format file an issue).

That SwiftUI 2 Thing I Mentioned

SwiftUI 2 makes app-creation very straightforward, but it also still has many limitations. One of which is how windows/controls handle the “Paste” command. The glue code to make this app really work the way I’d like it to work is just annoying enough to have it on a TODO vs an ISDONE list and I’m hoping SwiftUI 3 comes out with WWDC 2021 (in a scant ~2 months) and provides a less hacky solution.

FIN

You can find the source and notarized binary releases of extractor on GitHub. File issues for questions, feature requests, or problems with the app/code.

Because I used SwiftUI 2, it is very likely possible to have this app work on iOS and iPadOS devices. I can’t see anyone using an iPad for DFIR work, but if you’d like a version of this for iOS/iPadOS, also drop an issue.

There are a plethora of amazingly useful Golang libraries, and it has been possible for quite some time to use Go libraries with Swift. The advent of the release of the new Apple Silicon/M1/arm64 architecture for macOS created the need for a new round of “fat”/”universal” binaries and libraries to bridge the gap between legacy Intel Macs and the new breed of Macs.

I didn’t see an “all-in-one-place” snippet for how to build cross-platform + fat/universal Golang static libraries and then use them in Swift to make a fat/universal macOS binary, and it is likely I’m not the only one who wished this existed, so here’s a snippet that takes the static HTML library example from Young Dynasty and shows how to prepare the static library for use in Swift, then how to build a Swift universal binary. This is a command line app and we’re building everything without the Xcode editor to keep it concise and straightforward.

The rest of the explanatory text is in the comments in the code block.

# make a space to play in
mkdir universal-static-test
cd universal-static-test

# libhtmlscraper via: https://youngdynasty.net/posts/writing-mac-apps-in-go/
# make the small HTML escaper library Golang source
cat > main.go << EOF
package main

import (
  "C"
  "html"
)

//export escape_html
func escape_html(input *C.char) *C.char {
  s := html.EscapeString(C.GoString(input))
  return C.CString(s)
}

//export unescape_html
func unescape_html(input *C.char) *C.char {
  s := html.UnescapeString(C.GoString(input))
  return C.CString(s)
}

// We need an entry point; it's ok for this to be empty
func main() {}
EOF

# build the Go library for ARM
CGO_ENABLED=1 GOOS=darwin GOARCH=arm64 go build --buildmode=c-archive -o libhtmlescaper-arm64.a

# build the Go library for AMD
CGO_ENABLED=1 GOOS=darwin GOARCH=amd64 go build --buildmode=c-archive -o libhtmlescaper-amd64.a

# Make a universal static archive
lipo -create libhtmlescaper-arm64.a libhtmlescaper-amd64.a -o libhtmlescaper.a

# we don't need this anymore
rm libhtmlescaper-amd64.h

# this is a better name
mv libhtmlescaper-arm64.h libhtmlescaper.h

# make the objective-c bridging header so we can use the library in Swift
cat > bridge.h <<EOF
#include "libhtmlescaler.h"
EOF

# creaate a lame/super basic test swift file that uses the Go library
cat > main.swift <<EOF
print(String(cString: escape_html(strdup("<b>bold</b>"))))
EOF

# make the swift executatble for amd64
swiftc -target x86_64-apple-macos11.0 -import-objc-header bridge.h main.swift libhtmlescaper.a -o main-amd64

# make the swift executatble for arm64
swiftc -target arm64-apple-macos11.0 -import-objc-header bridge.h main.swift libhtmlescaper.a -o main-arm64 

# Make a universal binary
lipo -create main-amd64 main-arm64 -o main

# Make sure it's universal
file main
## main: Mach-O universal binary with 2 architectures: [x86_64:Mach-O 64-bit executable x86_64] [arm64]
## main (for architecture x86_64): Mach-O 64-bit executable x86_64
## main (for architecture arm64):  Mach-O 64-bit executable arm64

# try it out
./main.swift
## "<b>bold</b>"