---
title: "More purrr"
output: html_document
---
## Setup
```{r}
library(dplyr)
library(purrr)
```
## Example 2 from Tuesday
```{r}
primes = c( 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37,
43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97)
x = c(3, 4, 12, 19, 23, 48, 50, 61, 63, 78)
```
* for loop
```{r}
f1 = function() {
res = c()
for (v in x) {
if (!v %in% primes)
res = c(res, v)
}
res
}
f1()
```
* with subsetting
```{r}
f2 = function() {
x[ !x %in% primes]
}
f2()
```
* with an apply / map
```{r}
f3 = function() {
x[ sapply(x, function(v) !v %in% primes) ]
}
f4 = function() {
lapply(x, function(v) if (!v %in% primes) v else NULL) %>% unlist()
}
f5 = function() {
map(x, function(v) if (!v %in% primes) v else NULL) %>% unlist()
}
f6 = function() {
keep(x, function(v) !v %in% primes)
}
f7 = function() {
discard(x, function(v) v %in% primes)
}
```
### Benchmarking
```{r}
library(microbenchmark)
microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7())
x = sample(1:100, 1e3, replace = TRUE)
microbenchmark(f1(), f2(), f3(), f4(), f5(), f6(), f7())
```
## More repurrrsive
```{r}
library(repurrrsive)
```
### `sw_people`
```{r error=TRUE}
map_dfr(sw_people, ~.x)
map_dfr(sw_people, ~ as_data_frame(.x))
do.call(rbind, sw_people)
do.call(rbind, sw_people) %>%
tbl_df() %>%
mutate(name = unlist(name))
```
```{r}
df_f1 = function() {
data_frame(
name = map_chr(sw_people, "name"),
height = map_chr(sw_people, "height") %>% parse_numeric(),
mass = map_chr(sw_people, "mass") %>% parse_numeric(),
starships = map(sw_people, "starships")
)
}
parse_numeric = function(x) {
suppressWarnings(
x %>%
stringr::str_replace(",","") %>%
as.numeric()
)
}
df_f2 = function() {
map_dfr(
sw_people,
function(char) {
data_frame(
name = char %>% pluck("name"),
height = char %>% pluck("height") %>% parse_numeric(),
mass = char %>% pluck("mass") %>% parse_numeric(),
starships = char %>% pluck("starships") %>% list()
)
}
)
}
microbenchmark(df_f1(), df_f2(), times = 1)
```
#### unnest and starships
```{r error=TRUE}
data_frame(
name = map_chr(sw_people, "name"),
height = map_chr(sw_people, "height") %>% parse_numeric(),
mass = map_chr(sw_people, "mass") %>% parse_numeric(),
starships = map(sw_people, "starships")
) %>%
mutate(starships = map(starships, ~ if(is.null(.x)) NULL else .x)) %>%
tidyr::unnest()
data_frame(
name = map_chr(sw_people, "name"),
height = map_chr(sw_people, "height") %>% parse_numeric(),
mass = map_chr(sw_people, "mass") %>% parse_numeric(),
starships = map(sw_people, "starships")
) %>%
mutate(starships = map(starships, ~ if(is.null(.x)) NA else .x)) %>%
tidyr::unnest()
```
## List Columns
### HW2 Q6
6. What are the seven most popular hobbies of lego purchasers?
```{r error=TRUE}
sales = readRDS("data/lego_sales.rds")
sales %>%
distinct(first_name, last_name) %>%
nrow()
sales %>%
distinct(first_name, last_name, hobbies) %>%
nrow()
sales %>%
group_by(first_name, last_name) %>%
slice(1) %>%
select(hobbies) %>%
transmute(hobbies = unlist(hobbies))
```
#### dplyr only approach
```{r}
sales %>%
group_by(first_name, last_name) %>%
slice(1) %>%
pull(hobbies) %>%
unlist() %>%
data_frame(hobbies = .) %>%
group_by(hobbies) %>%
count() %>%
ungroup() %>%
arrange(desc(n)) %>%
top_n(7)
```
#### dplyr + tidyr approach
```{r}
sales %>%
group_by(first_name, last_name) %>%
slice(1) %>%
ungroup() %>%
select(first_name, last_name, hobbies)
sales %>%
group_by(first_name, last_name) %>%
slice(1) %>%
ungroup() %>%
select(first_name, last_name, hobbies) %>%
tidyr::unnest() %>%
group_by(hobbies) %>%
count() %>%
ungroup() %>%
arrange(desc(n)) %>%
top_n(7)
```