Packages

library(bench)
library(parallel)
library(tidyverse)

Exercise 1

Problem

Compare which("q" == sample_letters)[1] and match("q", sample_letters), where

sample_letters <- sample(c(letters, 0:9), size = 1000, 
                         replace = TRUE)

What do these expression do?

Solution

sample_letters <- sample(c(letters, 0:9), size = 1000, replace = TRUE)

mark(
  which("q" ==  sample_letters)[1],
  match("q", sample_letters)
)

#> # A tibble: 2 x 6
#>   expression                           min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                      <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 which("q" == sample_letters)[1]      6µs   8.74µs   116632.    7.91KB     11.7
#> 2 match("q", sample_letters)        1.43µs   2.27µs   354580.   10.62KB     70.9

Exercise 2

Problem

Do you notice anything strange with objects result2 and result4? What is going on?

detectCores()

#> [1] 8

result2 <- mclapply(1:12, FUN = function(x) rnorm(1), 
                   mc.cores = 2, mc.set.seed = FALSE) %>% 
  unlist()

result2

#>  [1] -0.1841918 -0.1841918 -0.3567670 -0.3567670 -0.9360770 -0.9360770
#>  [7]  1.1408426  1.1408426  0.4460828  0.4460828 -0.4399691 -0.4399691

result4 <- mclapply(1:12, FUN = function(x) rnorm(1), 
                   mc.cores = 4, mc.set.seed = FALSE) %>% 
  unlist()

result4

#>  [1] -0.1841918 -0.1841918 -0.1841918 -0.1841918 -0.3567670 -0.3567670
#>  [7] -0.3567670 -0.3567670 -0.9360770 -0.9360770 -0.9360770 -0.9360770

Solution

When mc.set.seed = FALSE, the child process has the same initial random number generator (RNG) state as the current R session. Hence, we see the same numbers generated two times and four times when we use two and four cores, respectively.

Exercise 3

Problem

Parallelize the evaluation of the four expressions below.

mtcars %>% 
  count(cyl)

mtcars %>% 
  lm(mpg ~ wt + hp + factor(cyl), data = .)

map_chr(mtcars, typeof)

mtcars %>% 
  select(mpg, disp:qsec) %>% 
  map_df(summary)

Solution

x <- list()

x$expr1 <- mcparallel({
  mtcars %>% 
  count(cyl)
})

x$expr2 <- mcparallel({
  mtcars %>% 
    lm(mpg ~ wt + hp + factor(cyl), data = .)
})

x$expr3 <- mcparallel({
  map_chr(mtcars, typeof)
})

x$expr4 <- mcparallel({
  mtcars %>% 
    select(mpg, disp:qsec) %>% 
    map_df(summary)
})

mccollect(x)

#> $`30226`
#>   cyl  n
#> 1   4 11
#> 2   6  7
#> 3   8 14
#> 
#> $`30227`
#> 
#> Call:
#> lm(formula = mpg ~ wt + hp + factor(cyl), data = .)
#> 
#> Coefficients:
#>  (Intercept)            wt            hp  factor(cyl)6  factor(cyl)8  
#>     35.84600      -3.18140      -0.02312      -3.35902      -3.18588  
#> 
#> 
#> $`30228`
#>      mpg      cyl     disp       hp     drat       wt     qsec       vs 
#> "double" "double" "double" "double" "double" "double" "double" "double" 
#>       am     gear     carb 
#> "double" "double" "double" 
#> 
#> $`30229`
#> # A tibble: 6 x 6
#>    Min. `1st Qu.` Median   Mean `3rd Qu.`   Max.
#>   <dbl>     <dbl>  <dbl>  <dbl>     <dbl>  <dbl>
#> 1 10.4      15.4   19.2   20.1      22.8   33.9 
#> 2 71.1     121.   196.   231.      326    472   
#> 3 52        96.5  123    147.      180    335   
#> 4  2.76      3.08   3.70   3.60      3.92   4.93
#> 5  1.51      2.58   3.32   3.22      3.61   5.42
#> 6 14.5      16.9   17.7   17.8      18.9   22.9

Exercises: Parallelization

Shawn Santo

Packages

Exercise 1

Problem

Solution

Exercise 2

Problem

Solution

Exercise 3

Problem

Solution