R has a rich package ecosystem for read/writing, manipulating, and analyzing geospatial data.
Some core packages:
sp
- core classes for handling spatial data, additional utility functions.
rgdal
- R interface to gdal
(Geospatial Data Abstraction Library) for reading and writing spatial data.
maptools
- Additional tools for reading and writing spatial data.
rgeos
- R interface to geos
(Geometry Engine Open Source) library for querying and manipulating spatial data. Reading and writing WKT.
raster
- classes and tools for handling spatial raster data.
See more - Spatial task view
The R package sp
provides classes that implement these geospatial objects.
Point(s) | Linestring(s) | Polygon(s) | Geometry Collection | |
---|---|---|---|---|
Geometry | SpatialPoints |
SpatialLines |
SpatialPolygons |
SpatialCollection * |
Geometry + Data | SpatialPointsDataFrame |
SpatialLinesDataFrame |
SpatialPolygonsDataFrame |
— |
They also handle additional tasks like tracking the projection of the spatial coordinates.
sp
maptools
:
readShapePoints
/ writeShapePoints
- Shapefile w/ pointsreadShapeLines
/ writeShapeLines
- Shapefile w/ linesreadShapePoly
/ writeShapePoly
- Shapefile w/ polygonsreadShapeSpatial
/ writeShapeSpatial
- Shapefilergdal
:
readOGR
/ writeOGR
- Shapefile, GeoJSON, KML, …rgeos
:
readWKT
/ writeWKT
- Well Known TextWant to fly from the Western most point in the US to the Eastern most point?
inter = gcIntermediate(c(179.776,51.952), c(-179.146,51.273), n=50, addStartEnd=TRUE) plot(wrld_simpl, col="black", ylim=c(15,90)) lines(inter,col='red',lwd=2,lty=3)
How do we define the distance between A and B, A and C, or B and C?
POINT (30 10)
MULTIPOINT ((10 40), (40 30), (20 20), (30 10))
{ "type": "Point", "coordinates": [30, 10] }
{ "type": "MultiPoint", "coordinates": [ [10, 40], [40, 30], [20, 20], [30, 10] ] }
LINESTRING (30 10, 10 30, 40 40) {.smaller}
MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))
{ "type": "LineString", "coordinates": [ [30, 10], [10, 30], [40, 40] ] }
{ "type": "MultiLineString", "coordinates": [ [[10, 10], [20, 20], [10, 40]], [[40, 40], [30, 30], [40, 20], [30, 10]] ] }
POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))
MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))
{ "type": "Polygon", "coordinates": [ [[30, 10], [40, 40], [20, 40], [10, 20], [30, 10]] ] }
{ "type": "MultiPolygon", "coordinates": [ [ [[30, 20], [45, 40], [10, 40], [30, 20]] ], [ [[15, 5], [40, 10], [10, 20], [5, 10], [15, 5]] ] ] }
POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))
MULTIPOLYGON (((40 40, 20 45, 45 30, 40 40)), ((20 35, 10 30, 10 10, 30 5, 45 20, 20 35), (30 20, 20 15, 20 25, 30 20)))
{ "type": "Polygon", "coordinates": [ [[35, 10], [45, 45], [15, 40], [10, 20], [35, 10]], [[20, 30], [35, 35], [30, 20], [20, 30]] ] }
{ "type": "MultiPolygon", "coordinates": [ [ [[40, 40], [20, 45], [45, 30], [40, 40]] ], [ [[20, 35], [10, 30], [10, 10], [30, 5], [45, 20], [20, 35]], [[30, 20], [20, 15], [20, 25], [30, 20]] ] ] }
WKT:
GEOMETRYCOLLECTION (POINT (4 8), LINESTRING (4 6,7 10), POLYGON ((6 6, 8 6, 8 8, 6 6)))'
GeoJSON:
{ "type": "GeometryCollection", "geometries": [ { "type": "Point", "coordinates": [30, 10] }, { "type": "LineString", "coordinates": [ [30, 10], [10, 30], [40, 40] ] } ] }
GeoJSON:
{ "type": "Feature", "geometry": { "type": "GeometryCollection", "geometries": [ { "type": "Point", "coordinates": [0, 0] }, { "type": "LineString", "coordinates": [[0, 0], [1, 0]] } ] }, "properties": { "name": "data", "rank": 1, "valid": true } }
nc = readOGR("data/gis/nc_counties/","nc_counties",FALSE) air = readOGR("data/gis/airports/","airports",FALSE) hwy = readOGR("data/gis/us_interstates/","us_interstates",FALSE)
proj4string(nc)
## [1] "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
proj4string(air)
## [1] "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0"
proj4string(hwy)
## [1] "+proj=utm +zone=15 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
nc = spTransform(nc, CRS(proj4string(hwy))) row.names(nc) = sub(" County","", as.character(nc$COUNTY)) air = spTransform(air, CRS(proj4string(hwy)))
plot(nc) plot(air, add=TRUE, pch=1, col="blue") plot(hwy, add=TRUE, col="red")
d = gDistance(nc,air,byid=c(TRUE,FALSE)) str(d)
## num [1, 1:100] 56851 71272 24297 49802 19809 ... ## - attr(*, "dimnames")=List of 2 ## ..$ : NULL ## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
d[,"Durham"]
## Durham ## 3334.51
d[,"Wake"]
## Wake ## 0
d[,"Orange"]
## Orange ## 20633.96
ncc = gCentroid(nc,byid=TRUE) class(ncc)
## [1] "SpatialPoints" ## attr(,"package") ## [1] "sp"
d = gDistance(ncc,air,byid=c(TRUE,FALSE))
d[,"Durham"]
## Durham ## 19685.62
d[,"Wake"]
## Wake ## 16071.85
d[,"Orange"]
## Orange ## 37003.97
rgeos
For more detail see the DE-9IM specification
nc_air = gIntersects(nc,air,byid=c(TRUE)) str(nc_air)
## logi [1:940, 1:100] FALSE FALSE FALSE FALSE FALSE FALSE ... ## - attr(*, "dimnames")=List of 2 ## ..$ : chr [1:940] "1" "2" "3" "4" ... ## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
nc$COUNTY[apply(nc_air,2,any)]
## [1] Forsyth County Guilford County Dare County ## [4] Wake County Pitt County Catawba County ## [7] Buncombe County Wayne County Mecklenburg County ## [10] Moore County Cabarrus County Lenoir County ## [13] Craven County Cumberland County Onslow County ## [16] New Hanover County ## 100 Levels: Alamance County Alexander County ... Yancey County
plot(nc) plot(nc[apply(nc_air,2,any),], add=TRUE, col="lightblue") plot(air[apply(nc_air,1,any),], add=TRUE, pch=1, col="blue")
adj = gTouches(nc,byid=TRUE) str(adj)
## logi [1:100, 1:100] FALSE TRUE FALSE FALSE FALSE FALSE ... ## - attr(*, "dimnames")=List of 2 ## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ... ## ..$ : chr [1:100] "Ashe" "Alleghany" "Surry" "Gates" ...
nc$COUNTY[adj["Durham",]]
## [1] Person County Granville County Orange County Wake County ## [5] Chatham County ## 100 Levels: Alamance County Alexander County ... Yancey County
library(corrplot) corrplot(adj[1:20,1:20],method="color",type="lower",tl.col="black",cl.pos = "n")
plot(nc) plot(nc[rowSums(adj)==max(rowSums(adj)),],add=TRUE,col="lightblue")
plot(nc) plot(nc[rowSums(adj)==min(rowSums(adj)),],add=TRUE,col="lightblue")