Packages

library(tidyverse)

Exercise 1

Problem

Profile the below code. Then try to improve the computation time while keeping the loops and not using parallel computing. Lastly, try an apply variant and evaluate the performance.

reps <- 10000
n <- 1000

beta_0 <- 2
beta_1 <- .5
beta_2 <- 3

beta_1_hat_all <- c()

for (s in c(1, 3, 7)) {
  beta_1_hat <- c()
  for (i in 1:reps) {
    X <- cbind(rnorm(n), rnorm(n) ^ 2)
    Y <- beta_0 + beta_1 * X[, 1, drop = FALSE]  + 
      beta_2 * X[, 2, drop = FALSE] + rnorm(n, sd = s)
    m <- lm(Y~X)
    beta_1_hat <- c(beta_1_hat, coefficients(m)[2])
  }
  beta_1_hat_all <- c(beta_1_hat_all, beta_1_hat)
}

beta_df <- tibble(sd         = rep(c(1, 3, 7), each = reps),
                  beta_1_hat = beta_1_hat_all)

Solution

Below are a couple of initial improvements.

We pre-allocated our beta_1_hat results vector
We used the least squares matrix solution to obtain \(\hat{\beta}\) rather than lm()

reps <- 10000
n <- 1000

beta_0 <- 2
beta_1 <- .5
beta_2 <- 3
st_dev <- c(1, 3, 7)

beta_1_hat_all <- NULL

for (s in st_dev) {
  beta_1_hat <- numeric(reps)
  for (i in 1:reps) {
    X <- cbind(1, rnorm(n), rnorm(n) ^ 2)
    Y <- beta_0 + beta_1 * X[, 2, drop = FALSE]  + 
      beta_2 * X[, 3, drop = FALSE] + rnorm(n, sd = s)
    
    beta_1_hat[i] <- (solve(t(X) %*% X) %*% t(X) %*% Y)[2, ]
  }
  beta_1_hat_all <- c(beta_1_hat_all, beta_1_hat)
}

beta_df <- tibble(sd         = rep(st_dev, each = reps),
                  beta_1_hat = beta_1_hat_all)

Some further improvements are below using a map() variant.

get_ols_estimate <- function(s, beta = 1) {
  
  beta_0 <- 2
  beta_1 <- .5
  beta_2 <- 3
  n <- 1000
  
  X <- cbind(1, rnorm(n), rnorm(n) ^ 2)
  
  Y <- beta_0 + beta_1 * X[, 2, drop = FALSE]  + beta_2 * X[, 3, drop = FALSE] + 
    rnorm(n, sd = s)
  
  beta_k_hat <- tibble(sd = s,
                       beta_k_hat = (solve(t(X) %*% X) %*% t(X) %*% Y)[beta + 1, ]
  )
  return(beta_k_hat)
}


reps <- 10000
s <- c(1, 3, 7)
beta_hat_df <- map2_df(1:(length(s) * reps), rep(s, each = reps), 
                       ~ get_ols_estimate(s = .y, beta = 1))

Exercise 2

Problem

Profile the below code. What do you notice?

x <- numeric()
for (i in 1:1e5) {
  x <- c(x, i)
}

Solution

The garbage collector was hard at work getting rid of those old objects in memory.

Exercises: Parallelization and Profiling

Shawn Santo

Packages

Exercise 1

Problem

Solution

Exercise 2

Problem

Solution