Lets consider a function to generate samples from a bivariate normal.
We want to sample x
and y
such that
\[ \left( \begin{array}{c} x\\y \end{array} \right) \sim N \left( \left( \begin{array}{c} \mu_x\\\mu_y \end{array} \right) , \left( \begin{array}{cc} \sigma_x^2 & \rho \sigma_x\sigma_y\\ \rho \sigma_x\sigma_y & \sigma_y^2 \end{array} \right) \right) \]
we can accomplish this by sampling x
and y
separately from
\[ \begin{aligned} x &\;\sim N(\mu_x, \sigma_x^2) \\ y\,|\,x &\;\sim N\left(\mu_y + \frac{\sigma_y}{\sigma_x}\rho(x-\mu_x), (1-\rho^2)\sigma_y^2\right) \end{aligned} \]
respectively.
rbinorm = function(n, mu=c(0,0), sigma=c(1,1), rho=0) { res = matrix(NA, ncol=2, nrow=n) colnames(res) = c("x","y") for(i in 1:n) { x = rnorm(1, mu[1], sigma[1]) y = rnorm(1, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2]) res[i,] = c(x,y) } return(res) }
rbinorm_vec = function(n, mu=c(0,0), sigma=c(1,1), rho=0) { x = rnorm(n, mu[1], sigma[1]) y = rnorm(n, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2]) return(cbind(x,y)) }
par(mfrow=c(1,2), mar=c(4,4,1,1)) plot(rbinorm(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1)) plot(rbinorm_vec(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1))
library(microbenchmark) print(microbenchmark(rbinorm(1e5,rho=0.7),rbinorm_vec(1e5,rho=0.7), times=10))
## Unit: milliseconds ## expr min lq mean median uq max neval ## rbinorm(1e+05, rho = 0.7) 1626.22 1629.9 1658.19 1658.77 1684.33 1690.40 10 ## rbinorm_vec(1e+05, rho = 0.7) 26.38 26.5 27.19 26.97 27.92 28.35 10
gcd = function(loc1, loc2) { deg2rad = function(deg) return(deg*pi/180) y1 = deg2rad( loc1[2] ) y2 = deg2rad( loc2[2] ) x1 = deg2rad( loc1[1] ) x2 = deg2rad( loc2[1] ) R = 6371 # Earth mean radius in km d = sin(y1)*sin(y2) + cos(y1)*cos(y2) * cos(x2-x1) if(d > 1 | d < -1) d = 0 else d = acos(d) * R return(d) # distance in km }
gcds = function(l1, l2=l1) { d = matrix(NA, nrow=nrow(l1), ncol=nrow(l2)) for(i in 1:nrow(l1)) { for(j in 1:nrow(l2)) { d[i,j] = gcd(l1[i,],l2[j,]) } } return(d) }
gcd_vec = function(loc1, loc2) { loc1 = matrix(loc1*pi/180,ncol=2) loc2 = matrix(loc2*pi/180,ncol=2) R = 6371 # Earth mean radius in km d = sin(loc1[,2])*sin(loc2[,2]) + cos(loc1[,2])*cos(loc2[,2]) * cos(loc2[,1]-loc1[,1]) s = d > 1 | d < -1 d[s] = 0 d[!s] = acos(d[!s]) * R return(d) # distance in km } gcds_vec = function(l1, l2=l1) { return( apply(l2, 1, function(x) gcd_vec(l1,x)) ) }
n = 200 locs = cbind(x = runif(n,-180,180), y = runif(n,-90,90)) microbenchmark(gcds(locs), gcds_vec(locs), times=10)
## Unit: milliseconds ## expr min lq mean median uq max neval ## gcds(locs) 1007.14 1046.46 1106.32 1104.08 1175.90 1215.68 10 ## gcds_vec(locs) 28.91 30.39 31.19 30.63 32.02 35.22 10
sum(abs(gcds(locs)-gcds_vec(locs)))
## [1] 0
Name | |
---|---|
Alice | alice@company.com |
Bob | bob@company.com |
Carol | carol@company.com |
Dave | dave@company.com |
Eve | eve@company.com |
Name | Phone |
---|---|
Bob | 919 555-1111 |
Carol | 919 555-2222 |
Eve | 919 555-3333 |
Eve | 310 555-3333 |
Frank | 919 555-4444 |
Outer join of email with phone by name
name | phone | |
---|---|---|
Alice | alice@company.com | NULL |
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Dave | dave@company.com | NULL |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Frank | NULL | 919 555-4444 |
Inner join of email with phone by name
Name | Phone | |
---|---|---|
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Left join of email with phone by name
name | phone | |
---|---|---|
Alice | alice@company.com | NULL |
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Dave | dave@company.com | NULL |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Right join of email with phone by name
name | phone | |
---|---|---|
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Frank | NULL | 919 555-4444 |
addr
## name email ## 1 Alice alice@company.com ## 2 Bob bob@company.com ## 3 Carol carol@company.com ## 4 Dave dave@company.com ## 5 Eve eve@company.com
phone
## name phone ## 1 Bob 919 555-1111 ## 2 Carol 919 555-2222 ## 3 Eve 919 555-3333 ## 4 Eve 310 555-3333 ## 5 Frank 919 555-4444
Base R:
merge(addr, phone, all=TRUE)
## name email phone ## 1 Alice alice@company.com <NA> ## 2 Bob bob@company.com 919 555-1111 ## 3 Carol carol@company.com 919 555-2222 ## 4 Dave dave@company.com <NA> ## 5 Eve eve@company.com 919 555-3333 ## 6 Eve eve@company.com 310 555-3333 ## 7 Frank <NA> 919 555-4444
dplyr:
Not implemented, due in 0.3.1.Base R:
merge(addr, phone, all=FALSE)
## name email phone ## 1 Bob bob@company.com 919 555-1111 ## 2 Carol carol@company.com 919 555-2222 ## 3 Eve eve@company.com 919 555-3333 ## 4 Eve eve@company.com 310 555-3333
dplyr:
inner_join(addr,phone)
## Joining by: "name"
## name email phone ## 1 Bob bob@company.com 919 555-1111 ## 2 Carol carol@company.com 919 555-2222 ## 3 Eve eve@company.com 919 555-3333 ## 4 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all.x=TRUE)
## name email phone ## 1 Alice alice@company.com <NA> ## 2 Bob bob@company.com 919 555-1111 ## 3 Carol carol@company.com 919 555-2222 ## 4 Dave dave@company.com <NA> ## 5 Eve eve@company.com 919 555-3333 ## 6 Eve eve@company.com 310 555-3333
dplyr:
left_join(addr,phone)
## Joining by: "name"
## name email phone ## 1 Alice alice@company.com <NA> ## 2 Bob bob@company.com 919 555-1111 ## 3 Carol carol@company.com 919 555-2222 ## 4 Dave dave@company.com <NA> ## 5 Eve eve@company.com 919 555-3333 ## 6 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all.y=TRUE)
## name email phone ## 1 Bob bob@company.com 919 555-1111 ## 2 Carol carol@company.com 919 555-2222 ## 3 Eve eve@company.com 919 555-3333 ## 4 Eve eve@company.com 310 555-3333 ## 5 Frank <NA> 919 555-4444
dplyr:
Not implemented, due in 0.3.1.semi_join(addr, phone)
## Joining by: "name"
## name email ## 1 Bob bob@company.com ## 2 Carol carol@company.com ## 3 Eve eve@company.com
anti_join(addr, phone)
## Joining by: "name"
## name email ## 1 Dave dave@company.com ## 2 Alice alice@company.com
Above materials are derived in part from the following sources: