Packages

library(bench)
library(parallel)
library(tidyverse)

Exercise 1

Problem

Benchmark which("q" == chars)[1] and match("q", chars), where

chars <- sample(c(letters, 0:9), size = 1000, 
                         replace = TRUE)

What do these expression do?

Solution

chars <- sample(c(letters, 0:9), size = 1000, replace = TRUE)

mark(
  which("q" ==  chars)[1],
  match("q", chars)
)

#> # A tibble: 2 x 6
#>   expression                  min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>             <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 which("q" == chars)[1]    5.8µs    8.8µs   107968.    7.91KB     21.6
#> 2 match("q", chars)        1.39µs   2.07µs   383010.   10.62KB     38.3

Exercise 2

Problem

Benchmark the last two expression.

X <- matrix(rnorm(1000 * 1000), nrow = 1000, ncol = 1000)

sum(diag(X %*% t(X)))
sum(X ^ 2)

What do these expression do?

Solution

X <- matrix(rnorm(1000 * 1000), nrow = 1000, ncol = 1000)

mark(
  sum(diag(X %*% t(X))),
  sum(X ^ 2)
)

#> # A tibble: 2 x 6
#>   expression                 min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>            <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 sum(diag(X %*% t(X))) 690.27ms 690.27ms      1.45   15.32MB     1.45
#> 2 sum(X^2)                1.61ms   2.48ms    305.      7.63MB    67.9

Exercise 3

Problem

Do you notice anything strange with objects result2 and result4? What is going on?

detectCores()

#> [1] 8

result2 <- mclapply(1:12, FUN = function(x) rnorm(1), 
                   mc.cores = 2, mc.set.seed = FALSE) %>% 
  unlist()

result2

#>  [1] -1.1208915 -1.1208915  1.0087470  1.0087470 -1.3467196 -1.3467196
#>  [7]  0.1774789  0.1774789  1.0212912  1.0212912  0.9554079  0.9554079

result4 <- mclapply(1:12, FUN = function(x) rnorm(1), 
                   mc.cores = 4, mc.set.seed = FALSE) %>% 
  unlist()

result4

#>  [1] -1.120892 -1.120892 -1.120892 -1.120892  1.008747  1.008747  1.008747
#>  [8]  1.008747 -1.346720 -1.346720 -1.346720 -1.346720

Solution

When mc.set.seed = FALSE, the child process has the same initial random number generator (RNG) state as the current R session. Hence, we see the same numbers generated two times and four times when we use two and four cores, respectively.

Exercise 4

Problem

Parallelize the evaluation of the four expressions below.

mtcars %>% 
  count(cyl)

mtcars %>% 
  lm(mpg ~ wt + hp + factor(cyl), data = .)

map_chr(mtcars, typeof)

mtcars %>% 
  select(mpg, disp:qsec) %>% 
  map_df(summary)

Solution

x <- list()

x$expr1 <- mcparallel({
  mtcars %>% 
  count(cyl)
})

x$expr2 <- mcparallel({
  mtcars %>% 
    lm(mpg ~ wt + hp + factor(cyl), data = .)
})

x$expr3 <- mcparallel({
  map_chr(mtcars, typeof)
})

x$expr4 <- mcparallel({
  mtcars %>% 
    select(mpg, disp:qsec) %>% 
    map_df(summary)
})

mccollect(x)

#> $`96100`
#>   cyl  n
#> 1   4 11
#> 2   6  7
#> 3   8 14
#> 
#> $`96101`
#> 
#> Call:
#> lm(formula = mpg ~ wt + hp + factor(cyl), data = .)
#> 
#> Coefficients:
#>  (Intercept)            wt            hp  factor(cyl)6  factor(cyl)8  
#>     35.84600      -3.18140      -0.02312      -3.35902      -3.18588  
#> 
#> 
#> $`96102`
#>      mpg      cyl     disp       hp     drat       wt     qsec       vs 
#> "double" "double" "double" "double" "double" "double" "double" "double" 
#>       am     gear     carb 
#> "double" "double" "double" 
#> 
#> $`96103`
#> # A tibble: 6 x 6
#>   Min.    `1st Qu.` Median  Mean       `3rd Qu.` Max.   
#>   <table> <table>   <table> <table>    <table>   <table>
#> 1 10.400   15.42500  19.200  20.090625  22.80     33.900
#> 2 71.100  120.82500 196.300 230.721875 326.00    472.000
#> 3 52.000   96.50000 123.000 146.687500 180.00    335.000
#> 4  2.760    3.08000   3.695   3.596563   3.92      4.930
#> 5  1.513    2.58125   3.325   3.217250   3.61      5.424
#> 6 14.500   16.89250  17.710  17.848750  18.90     22.900

Exercise 5

Problem

Suppose you only have two cores. Use mclapply() to execute the below function in parallel so the total run-time is six seconds.

sleep_r <- function(x) {
  Sys.sleep(x)
  runif(1)
}

x <- c(3, 3, 6)

Solution

aff_list_bal <- c(1, 1, 2)

system.time({
  mclapply(x, sleep_r, mc.cores = 2,
           mc.preschedule = FALSE, affinity.list = aff_list_bal)
})

#>    user  system elapsed 
#>   0.007   0.012   6.016

Exercises: Parallelization

Shawn Santo

Packages

Exercise 1

Problem

Solution

Exercise 2

Problem

Solution

Exercise 3

Problem

Solution

Exercise 4

Problem

Solution

Exercise 5

Problem

Solution