library(tidyverse)
Profile the below code. Then try to improve the computation time while keeping the loops and not using parallel computing. Lastly, try an apply variant and evaluate the performance.
reps <- 10000
n <- 1000
beta_0 <- 2
beta_1 <- .5
beta_2 <- 3
beta_1_hat_all <- c()
for (s in c(1, 3, 7)) {
beta_1_hat <- c()
for (i in 1:reps) {
X <- cbind(rnorm(n), rnorm(n) ^ 2)
Y <- beta_0 + beta_1 * X[, 1, drop = FALSE] +
beta_2 * X[, 2, drop = FALSE] + rnorm(n, sd = s)
m <- lm(Y~X)
beta_1_hat <- c(beta_1_hat, coefficients(m)[2])
}
beta_1_hat_all <- c(beta_1_hat_all, beta_1_hat)
}
beta_df <- tibble(sd = rep(c(1, 3, 7), each = reps),
beta_1_hat = beta_1_hat_all)
Below are a couple of initial improvements.
beta_1_hat
results vectorlm()
reps <- 10000
n <- 1000
beta_0 <- 2
beta_1 <- .5
beta_2 <- 3
st_dev <- c(1, 3, 7)
beta_1_hat_all <- NULL
for (s in st_dev) {
beta_1_hat <- numeric(reps)
for (i in 1:reps) {
X <- cbind(1, rnorm(n), rnorm(n) ^ 2)
Y <- beta_0 + beta_1 * X[, 2, drop = FALSE] +
beta_2 * X[, 3, drop = FALSE] + rnorm(n, sd = s)
beta_1_hat[i] <- (solve(t(X) %*% X) %*% t(X) %*% Y)[2, ]
}
beta_1_hat_all <- c(beta_1_hat_all, beta_1_hat)
}
beta_df <- tibble(sd = rep(st_dev, each = reps),
beta_1_hat = beta_1_hat_all)
Some further improvements are below using a map()
variant.
get_ols_estimate <- function(s, beta = 1) {
beta_0 <- 2
beta_1 <- .5
beta_2 <- 3
n <- 1000
X <- cbind(1, rnorm(n), rnorm(n) ^ 2)
Y <- beta_0 + beta_1 * X[, 2, drop = FALSE] + beta_2 * X[, 3, drop = FALSE] +
rnorm(n, sd = s)
beta_k_hat <- tibble(sd = s,
beta_k_hat = (solve(t(X) %*% X) %*% t(X) %*% Y)[beta + 1, ]
)
return(beta_k_hat)
}
reps <- 10000
s <- c(1, 3, 7)
beta_hat_df <- map2_df(1:(length(s) * reps), rep(s, each = reps),
~ get_ols_estimate(s = .y, beta = 1))
Profile the below code. What do you notice?
x <- numeric()
for (i in 1:1e5) {
x <- c(x, i)
}
The garbage collector was hard at work getting rid of those old objects in memory.