Packages

library(tidyverse)

Exercise 1

Problem

Write a regular expression to match a

  1. social security number of the form ###-##-####,

  2. phone number of the form (###) ###-####,

  3. license plate of the form AAA ####.

Test your regexs on some examples with str_detect() or str_view().

Solution

x <- "My info is as follows. Cell: (432)-431-1512. Social security: 432-11-1990"
y <- "Vehicle info: AEF 2348"

# not the most efficient
str_view_all(x, "[0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9][0-9][0-9]")
str_view_all(x, "\\([0-9][0-9][0-9]\\)-[0-9][0-9][0-9]-[0-9][0-9][0-9][0-9]")
str_view_all(y, "[A-Z][A-Z][A-Z] [0-9][0-9][0-9][0-9]")

Exercise 2

Problem

  1. Write a regular expression that will extract all phone numbers contained in the vector above.

  2. Once that works use groups to extracts the area code separately from the rest of the phone number.

text <- c(
  "apple", 
  "219 733 8965", 
  "329-293-8753",
  "Work: (579) 499-7527; Home: (543) 355 3679"
)

Solution

str_extract_all(text, "\\(?\\d{3}\\)?[ -]\\d{3}[ -]\\d{4}")
#> [[1]]
#> character(0)
#> 
#> [[2]]
#> [1] "219 733 8965"
#> 
#> [[3]]
#> [1] "329-293-8753"
#> 
#> [[4]]
#> [1] "(579) 499-7527" "(543) 355 3679"
str_match_all(text, "\\(?(\\d{3})\\)?[ -]\\d{3}[ -]\\d{4}") %>% 
  purrr::map(~ .[, 2]) %>% 
  unlist()
#> [1] "219" "329" "579" "543"

Exercise 3

Problem

Extract every word “fruit” or “flies” from phrases.

phrases <-  c("time flies when you're having fun in 521",
              "fruit flies when you throw it",
              "a fruit fly is a beautiful creature",
              "how do you spell fruitfly?")

Solution

phrases %>% 
  str_extract_all(pattern = "fruit|flies")
#> [[1]]
#> [1] "flies"
#> 
#> [[2]]
#> [1] "fruit" "flies"
#> 
#> [[3]]
#> [1] "fruit"
#> 
#> [[4]]
#> [1] "fruit"

Exercise 4

Problem

Extract the numeric values from the tongue twister below.

twister <- paste("Something in a 30 acre thermal thicket of thorns and",
                 "thistles thumped and thundered threatening the 3-D",
                 "thoughts of Matthew the thug - although, theatrically,",
                 "it was only the 13000 thistles and thorns through the",
                 "underneath of his thigh that the 30 year old thug",
                 "thought of that morning.", sep = " ")

Solution

twister %>% 
  str_extract_all(pattern = "[:digit:]+")
#> [[1]]
#> [1] "30"    "3"     "13000" "30"