Lets consider a function to generate samples from a bivariate normal.
We want to sample x
and y
such that
$$
\left(
\begin{array}{c}
x\\y
\end{array}
\right) \sim N
\left(
\left(
\begin{array}{c}
\mu_x\\\mu_y
\end{array}
\right)
,
\left(
\begin{array}{cc}
\sigma_x^2 & \rho \sigma_x\sigma_y\\
\rho \sigma_x\sigma_y & \sigma_y^2
\end{array}
\right)
\right)
$$
we can accomplish this by sampling x
and y
separately from
$$
\begin{aligned}
x &\;\sim N(\mu_x, \sigma_x^2) \\
y\,|\,x &\;\sim N\left(\mu_y + \frac{\sigma_y}{\sigma_x}\rho(x-\mu_x), (1-\rho^2)\sigma_y^2\right)
\end{aligned}
$$
respectively.
rbinorm = function(n, mu=c(0,0), sigma=c(1,1), rho=0)
{
res = matrix(NA, ncol=2, nrow=n)
colnames(res) = c("x","y")
for(i in 1:n)
{
x = rnorm(1, mu[1], sigma[1])
y = rnorm(1, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2])
res[i,] = c(x,y)
}
return(res)
}
rbinorm_vec = function(n, mu=c(0,0), sigma=c(1,1), rho=0)
{
x = rnorm(n, mu[1], sigma[1])
y = rnorm(n, mu[2] + (sigma[2]/sigma[1]) * rho * (x - mu[1]), sqrt(1-rho^2) * sigma[2])
return(cbind(x,y))
}
par(mfrow=c(1,2), mar=c(4,4,1,1))
plot(rbinorm(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1))
plot(rbinorm_vec(1e5,rho=0.7),pch=16,cex=0.2,col=adjustcolor("black",alpha=0.1))
library(microbenchmark)
print(microbenchmark(rbinorm(1e5,rho=0.7),rbinorm_vec(1e5,rho=0.7), times=10))
## Unit: milliseconds
## expr min lq mean median uq max neval
## rbinorm(1e+05, rho = 0.7) 1582.28 1586.79 1622.61 1620.34 1641.42 1728.11 10
## rbinorm_vec(1e+05, rho = 0.7) 26.52 26.97 27.66 27.93 28.06 28.69 10
gcd = function(loc1, loc2)
{
deg2rad = function(deg) return(deg*pi/180)
y1 = deg2rad( loc1[2] )
y2 = deg2rad( loc2[2] )
x1 = deg2rad( loc1[1] )
x2 = deg2rad( loc2[1] )
R = 6371 # Earth mean radius in km
d = sin(y1)*sin(y2) + cos(y1)*cos(y2) * cos(x2-x1)
if(d > 1 | d < -1) d = 0
else d = acos(d) * R
return(d) # distance in km
}
gcds = function(l1, l2=l1)
{
d = matrix(NA, nrow=nrow(l1), ncol=nrow(l2))
for(i in 1:nrow(l1))
{
for(j in 1:nrow(l2))
{
d[i,j] = gcd(l1[i,],l2[j,])
}
}
return(d)
}
gcd_vec = function(loc1, loc2)
{
loc1 = matrix(loc1*pi/180,ncol=2)
loc2 = matrix(loc2*pi/180,ncol=2)
R = 6371 # Earth mean radius in km
d = sin(loc1[,2])*sin(loc2[,2]) + cos(loc1[,2])*cos(loc2[,2]) * cos(loc2[,1]-loc1[,1])
s = d > 1 | d < -1
d[s] = 0
d[!s] = acos(d[!s]) * R
return(d) # distance in km
}
gcds_vec = function(l1, l2=l1)
{
return( apply(l2, 1, function(x) gcd_vec(l1,x)) )
}
n = 200
locs = cbind(x = runif(n,-180,180), y = runif(n,-90,90))
microbenchmark(gcds(locs),
gcds_vec(locs),
times=10)
## Unit: milliseconds
## expr min lq mean median uq max neval
## gcds(locs) 1010 1020.81 1028.48 1021.19 1024.54 1064.78 10
## gcds_vec(locs) 27 27.09 27.48 27.41 27.53 28.81 10
sum(abs(gcds(locs)-gcds_vec(locs)))
## [1] 0
Name | |
---|---|
Alice | alice@company.com |
Bob | bob@company.com |
Carol | carol@company.com |
Dave | dave@company.com |
Eve | eve@company.com |
Name | Phone |
---|---|
Bob | 919 555-1111 |
Carol | 919 555-2222 |
Eve | 919 555-3333 |
Eve | 310 555-3333 |
Frank | 919 555-4444 |
Outer join of email with phone by name
name | phone | |
---|---|---|
Alice | alice@company.com | NULL |
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Dave | dave@company.com | NULL |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Frank | NULL | 919 555-4444 |
Inner join of email with phone by name
Name | Phone | |
---|---|---|
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Left join of email with phone by name
name | phone | |
---|---|---|
Alice | alice@company.com | NULL |
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Dave | dave@company.com | NULL |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Right join of email with phone by name
name | phone | |
---|---|---|
Bob | bob@company.com | 919 555-1111 |
Carol | carol@company.com | 919 555-2222 |
Eve | eve@company.com | 919 555-3333 |
Eve | eve@company.com | 310 555-3333 |
Frank | NULL | 919 555-4444 |
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
addr
## name email
## 1 Alice alice@company.com
## 2 Bob bob@company.com
## 3 Carol carol@company.com
## 4 Dave dave@company.com
## 5 Eve eve@company.com
phone
## name phone
## 1 Bob 919 555-1111
## 2 Carol 919 555-2222
## 3 Eve 919 555-3333
## 4 Eve 310 555-3333
## 5 Frank 919 555-4444
Base R:
merge(addr, phone, all=TRUE)
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 Dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
## 7 Frank <NA> 919 555-4444
dplyr:
Not implemented, due in 0.3.1.Base R:
merge(addr, phone, all=FALSE)
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
dplyr:
inner_join(addr,phone)
## Joining by: "name"
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all.x=TRUE)
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 Dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
dplyr:
left_join(addr,phone)
## Joining by: "name"
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 Dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all.y=TRUE)
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
## 5 Frank <NA> 919 555-4444
dplyr:
Not implemented, due in 0.3.1.semi_join(addr, phone)
## Joining by: "name"
## name email
## 1 Bob bob@company.com
## 2 Carol carol@company.com
## 3 Eve eve@company.com
anti_join(addr, phone)
## Joining by: "name"
## name email
## 1 Dave dave@company.com
## 2 Alice alice@company.com
Above materials are derived in part from the following sources: