Vectorization & Recursion

Vectorization

Dot product example

dot1 = function(x,y) {
  x %*% y
}

dot2 = function(x,y) {
  sum(x * y)
}

dot3 = function(x,y) {
  sum = 0
  for(i in 1:length(x))
    sum = sum + x[i] * y[i]

  return(sum)
}

Dot product benchmark

x = rnorm(1e5)
y = runif(1e5)

microbenchmark(dot1(x,y),
               dot2(x,y),
               dot3(x,y),
               times=100)

## Unit: microseconds
##        expr       min         lq       mean     median         uq        max neval
##  dot1(x, y)   291.952   322.7955   372.3661   367.2470   399.0045    576.219   100
##  dot2(x, y)   232.533   255.9540   323.7053   303.2845   381.9580    505.216   100
##  dot3(x, y) 60039.374 63223.3570 67334.1279 65274.5230 67308.4280 101578.604   100

Sampling Example

Lets consider a function to generate samples from a bivariate normal.

We want to sample x and y such that

\[ \left( \begin{array}{c} x\\y \end{array} \right) \sim N \left( \left( \begin{array}{c} \mu_x\\\mu_y \end{array} \right) , \left( \begin{array}{cc} \sigma_x^2 & \rho \sigma_x\sigma_y\\ \rho \sigma_x\sigma_y & \sigma_y^2 \end{array} \right) \right) \]

we can accomplish this by sampling x and y separately from

\[ \begin{aligned} x &\;\sim N(\mu_x, \sigma_x^2) \\ y\,|\,x &\;\sim N\left(\mu_y + \frac{\sigma_y}{\sigma_x}\rho(x-\mu_x), (1-\rho^2)\sigma_y^2\right) \end{aligned} \]

respectively.

Nonvectorized rbinorm

rbinorm = function(n, mu=c(0,0), sigma=c(1,1), rho=0)
{
  res = matrix(NA, ncol=2, nrow=n)
  colnames(res) = c("x","y")
  for(i in 1:n)
  {
    x = rnorm(1, mu[1], sigma[1])
    y = rnorm(1, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2])

    res[i,] = c(x,y)
  }

  return(res)
}

Vectorized rbinorm

rbinorm_vec = function(n, mu=c(0,0), sigma=c(1,1), rho=0)
{
  x = rnorm(n, mu[1], sigma[1])
  y = rnorm(n, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2])

  return(cbind(x,y))
}

Output

par(mfrow=c(1,2), mar=c(4,4,1,1))
plot(rbinorm(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1))
plot(rbinorm_vec(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1))

Benchmarking

print(microbenchmark(rbinorm(1e5,rho=0.7),rbinorm_vec(1e5,rho=0.7), times=10))

## Unit: milliseconds
##                           expr        min         lq       mean     median         uq        max neval
##      rbinorm(1e+05, rho = 0.7) 1020.25794 1026.41148 1061.01451 1055.03692 1086.31283 1124.95979    10
##  rbinorm_vec(1e+05, rho = 0.7)   18.48391   18.56326   19.77796   19.23128   21.31042   22.43728    10

GCD - Non-vectorized

gcd = function(loc1, loc2) {
  deg2rad = function(deg) return(deg*pi/180)

  y1 = deg2rad( loc1[2] )
  y2 = deg2rad( loc2[2] )
  x1 = deg2rad( loc1[1] )
  x2 = deg2rad( loc2[1] )

  R = 6371 # Earth mean radius in km

  d = sin(y1)*sin(y2) + cos(y1)*cos(y2) * cos(x2-x1)
  if(d > 1 | d < -1) d = 0
  else d = acos(d) * R

  return(d) # distance in km
}

gcds = function(l1, l2=l1) {
  d = matrix(NA, nrow=nrow(l1), ncol=nrow(l2))

  for(i in 1:nrow(l1)) {
    for(j in 1:nrow(l2)) {
      d[i,j] = gcd(l1[i,],l2[j,])
    }
  }

  return(d)
}

GCD - Vectorized

gcd_vec = function(loc1, loc2)
{
  loc1 = matrix(loc1*pi/180,ncol=2)
  loc2 = matrix(loc2*pi/180,ncol=2)

  R = 6371 # Earth mean radius in km

  d = sin(loc1[,2])*sin(loc2[,2]) + cos(loc1[,2])*cos(loc2[,2]) * cos(loc2[,1]-loc1[,1])
  s = d > 1 | d < -1
  d[s] = 0
  d[!s] = acos(d[!s]) * R

  return(d) # distance in km
}

gcds_vec = function(l1, l2=l1)
{
  return( apply(l2, 1, function(x) gcd_vec(l1,x)) )
}

Performance

n = 200
locs = cbind(x = runif(n,-180,180), y = runif(n,-90,90))

microbenchmark(gcds(locs),
               gcds_vec(locs),
               times=10)

## Unit: milliseconds
##            expr      min        lq      mean    median        uq      max neval
##      gcds(locs) 698.6082 702.49425 711.81102 705.68215 726.37613 733.2330    10
##  gcds_vec(locs)  12.1576  12.25712  12.71611  12.49367  12.94588  13.9258    10

sum(abs(gcds(locs)-gcds_vec(locs)))

## [1] 0