library(bench)
library(parallel)
library(tidyverse)
Benchmark which("q" == chars)[1]
and match("q", chars)
, where
chars <- sample(c(letters, 0:9), size = 1000,
replace = TRUE)
What do these expression do?
chars <- sample(c(letters, 0:9), size = 1000, replace = TRUE)
mark(
which("q" == chars)[1],
match("q", chars)
)
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 which("q" == chars)[1] 5.8µs 8.8µs 107968. 7.91KB 21.6
#> 2 match("q", chars) 1.39µs 2.07µs 383010. 10.62KB 38.3
Benchmark the last two expression.
X <- matrix(rnorm(1000 * 1000), nrow = 1000, ncol = 1000)
sum(diag(X %*% t(X)))
sum(X ^ 2)
What do these expression do?
X <- matrix(rnorm(1000 * 1000), nrow = 1000, ncol = 1000)
mark(
sum(diag(X %*% t(X))),
sum(X ^ 2)
)
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 sum(diag(X %*% t(X))) 690.27ms 690.27ms 1.45 15.32MB 1.45
#> 2 sum(X^2) 1.61ms 2.48ms 305. 7.63MB 67.9
Do you notice anything strange with objects result2
and result4
? What is going on?
detectCores()
#> [1] 8
result2 <- mclapply(1:12, FUN = function(x) rnorm(1),
mc.cores = 2, mc.set.seed = FALSE) %>%
unlist()
result2
#> [1] -1.1208915 -1.1208915 1.0087470 1.0087470 -1.3467196 -1.3467196
#> [7] 0.1774789 0.1774789 1.0212912 1.0212912 0.9554079 0.9554079
result4 <- mclapply(1:12, FUN = function(x) rnorm(1),
mc.cores = 4, mc.set.seed = FALSE) %>%
unlist()
result4
#> [1] -1.120892 -1.120892 -1.120892 -1.120892 1.008747 1.008747 1.008747
#> [8] 1.008747 -1.346720 -1.346720 -1.346720 -1.346720
When mc.set.seed = FALSE
, the child process has the same initial random number generator (RNG) state as the current R session. Hence, we see the same numbers generated two times and four times when we use two and four cores, respectively.
Parallelize the evaluation of the four expressions below.
mtcars %>%
count(cyl)
mtcars %>%
lm(mpg ~ wt + hp + factor(cyl), data = .)
map_chr(mtcars, typeof)
mtcars %>%
select(mpg, disp:qsec) %>%
map_df(summary)
x <- list()
x$expr1 <- mcparallel({
mtcars %>%
count(cyl)
})
x$expr2 <- mcparallel({
mtcars %>%
lm(mpg ~ wt + hp + factor(cyl), data = .)
})
x$expr3 <- mcparallel({
map_chr(mtcars, typeof)
})
x$expr4 <- mcparallel({
mtcars %>%
select(mpg, disp:qsec) %>%
map_df(summary)
})
mccollect(x)
#> $`96100`
#> cyl n
#> 1 4 11
#> 2 6 7
#> 3 8 14
#>
#> $`96101`
#>
#> Call:
#> lm(formula = mpg ~ wt + hp + factor(cyl), data = .)
#>
#> Coefficients:
#> (Intercept) wt hp factor(cyl)6 factor(cyl)8
#> 35.84600 -3.18140 -0.02312 -3.35902 -3.18588
#>
#>
#> $`96102`
#> mpg cyl disp hp drat wt qsec vs
#> "double" "double" "double" "double" "double" "double" "double" "double"
#> am gear carb
#> "double" "double" "double"
#>
#> $`96103`
#> # A tibble: 6 x 6
#> Min. `1st Qu.` Median Mean `3rd Qu.` Max.
#> <table> <table> <table> <table> <table> <table>
#> 1 10.400 15.42500 19.200 20.090625 22.80 33.900
#> 2 71.100 120.82500 196.300 230.721875 326.00 472.000
#> 3 52.000 96.50000 123.000 146.687500 180.00 335.000
#> 4 2.760 3.08000 3.695 3.596563 3.92 4.930
#> 5 1.513 2.58125 3.325 3.217250 3.61 5.424
#> 6 14.500 16.89250 17.710 17.848750 18.90 22.900
Suppose you only have two cores. Use mclapply()
to execute the below function in parallel so the total run-time is six seconds.
sleep_r <- function(x) {
Sys.sleep(x)
runif(1)
}
x <- c(3, 3, 6)
aff_list_bal <- c(1, 1, 2)
system.time({
mclapply(x, sleep_r, mc.cores = 2,
mc.preschedule = FALSE, affinity.list = aff_list_bal)
})
#> user system elapsed
#> 0.007 0.012 6.016