nc = readOGR("data/gis/nc_counties/","nc_counties",FALSE)
air = readOGR("data/gis/airports/","airports",FALSE)
hwy = readOGR("data/gis/us_interstates/","us_interstates",FALSE)
proj4string(nc)
## [1] "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
proj4string(air)
## [1] "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
proj4string(hwy)
## [1] "+proj=utm +zone=15 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
nc = spTransform(nc, CRS(proj4string(hwy)))
row.names(nc) = sub(" County","", as.character(nc$COUNTY))
air = spTransform(air, CRS(proj4string(hwy)))
plot(nc)
plot(air, add=TRUE, pch=1, col="blue")
plot(hwy, add=TRUE, col="red")
d = gDistance(nc,air,byid=c(TRUE,FALSE))
str(d)
## num [1, 1:100] 56851 71272 24297 49802 19809 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
d[,"Durham"]
## Durham
## 3334.51
d[,"Wake"]
## Wake
## 0
d[,"Orange"]
## Orange
## 20633.96
ncc = gCentroid(nc,byid=TRUE)
class(ncc)
## [1] "SpatialPoints"
## attr(,"package")
## [1] "sp"
d = gDistance(ncc,air,byid=c(TRUE,FALSE))
d[,"Durham"]
## Durham
## 19685.62
d[,"Wake"]
## Wake
## 16071.85
d[,"Orange"]
## Orange
## 37003.97
rgeos
For more detail see the DE-9IM specification
nc_air = gIntersects(nc,air,byid=c(TRUE))
str(nc_air)
## logi [1:940, 1:100] FALSE FALSE FALSE FALSE FALSE FALSE ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:940] "1" "2" "3" "4" ...
## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
nc$COUNTY[apply(nc_air,2,any)]
## [1] Forsyth County Guilford County Dare County
## [4] Wake County Pitt County Catawba County
## [7] Buncombe County Wayne County Mecklenburg County
## [10] Moore County Cabarrus County Lenoir County
## [13] Craven County Cumberland County Onslow County
## [16] New Hanover County
## 100 Levels: Alamance County Alexander County ... Yancey County
plot(nc)
plot(nc[apply(nc_air,2,any),], add=TRUE, col="lightblue")
plot(air[apply(nc_air,1,any),], add=TRUE, pch=1, col="blue")
adj = gTouches(nc,byid=TRUE)
str(adj)
## logi [1:100, 1:100] FALSE TRUE FALSE FALSE FALSE FALSE ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
nc$COUNTY[adj["Durham",]]
## [1] Person County Granville County Orange County Wake County
## [5] Chatham County
## 100 Levels: Alamance County Alexander County ... Yancey County
library(corrplot)
corrplot(adj[1:20,1:20],method="color",type="lower",tl.col="black",cl.pos = "n")
plot(nc)
plot(nc[rowSums(adj)==max(rowSums(adj)),],add=TRUE,col="lightblue")
plot(nc)
plot(nc[rowSums(adj)==min(rowSums(adj)),],add=TRUE,col="lightblue")
Two table functions / verbs for joining:
full_join
- Join data. Retain all values, all rows.inner_join
- Join data. Retain only rows in both sets.left_join
- Join matching rows from b
to a
.right_join
- Join matching rows from a
to b
.semi_join
- All rows in a
that have a match in b
.anti_join
- All rows in a
that do not have a match in b
.addr = data.frame(name = c("Alice","Bob",
"Carol","dave",
"Eve"),
email= c("alice@company.com",
"bob@company.com",
"carol@company.com",
"dave@company.com",
"eve@company.com"),
stringsAsFactors = FALSE)
phone = data.frame(name = c("Bob","Carol",
"Eve","Eve",
"Frank"),
phone= c("919 555-1111",
"919 555-2222",
"919 555-3333",
"310 555-3333",
"919 555-4444"),
stringsAsFactors = FALSE)
addr
## name email
## 1 Alice alice@company.com
## 2 Bob bob@company.com
## 3 Carol carol@company.com
## 4 dave dave@company.com
## 5 Eve eve@company.com
phone
## name phone
## 1 Bob 919 555-1111
## 2 Carol 919 555-2222
## 3 Eve 919 555-3333
## 4 Eve 310 555-3333
## 5 Frank 919 555-4444
dplyr:
full_join(addr, phone)
## Joining by: "name"
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
## 7 Frank <NA> 919 555-4444
Base R:
merge(addr, phone, all=TRUE)
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
## 7 Frank <NA> 919 555-4444
dplyr:
inner_join(addr, phone)
## Joining by: "name"
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all=FALSE)
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
dplyr:
left_join(addr, phone)
## Joining by: "name"
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
Base R:
merge(addr, phone, all.x=TRUE)
## name email phone
## 1 Alice alice@company.com <NA>
## 2 Bob bob@company.com 919 555-1111
## 3 Carol carol@company.com 919 555-2222
## 4 dave dave@company.com <NA>
## 5 Eve eve@company.com 919 555-3333
## 6 Eve eve@company.com 310 555-3333
dplyr:
right_join(addr, phone)
## Joining by: "name"
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
## 5 Frank <NA> 919 555-4444
Base R:
merge(addr, phone, all.y=TRUE)
## name email phone
## 1 Bob bob@company.com 919 555-1111
## 2 Carol carol@company.com 919 555-2222
## 3 Eve eve@company.com 919 555-3333
## 4 Eve eve@company.com 310 555-3333
## 5 Frank <NA> 919 555-4444
semi_join(addr, phone)
## Joining by: "name"
## name email
## 1 Bob bob@company.com
## 2 Carol carol@company.com
## 3 Eve eve@company.com
anti_join(addr, phone)
## Joining by: "name"
## name email
## 1 dave dave@company.com
## 2 Alice alice@company.com
Two table functions / verbs:
intersect
- Rows that appear in both a
and b
.union
- Rows that appear in either or both a
and b
setdiff
- Rows that appear in a
but not b
.bind_rows
- Like rbind, but better.bind_cols
- Like cbind, but better.Set operations are similar to the joins but require all columns be the same for both data frames.