R has a rich package ecosystem for read/writing, manipulating, and analyzing geospatial data.
Some core packages:
sp
- core classes for handling spatial data, additional utility functions.
rgdal
- R interface to gdal
(Geospatial Data Abstraction Library) for reading and writing spatial data.
rgeos
- R interface to geos
(Geometry Engine Open Source) library for querying and manipulating spatial data. Reading and writing WKT.
raster
- classes and tools for handling spatial raster data.
See more - Spatial task view
R has a rich package ecosystem for read/writing, manipulating, and analyzing geospatial data.
Some core packages:
sp
- core classes for handling spatial data, additional utility functions.
rgdal
- R interface to gdal
(Geospatial Data Abstraction Library) for reading and writing spatial data.
rgeos
- R interface to geos
(Geometry Engine Open Source) library for querying and manipulating spatial data. Reading and writing WKT.
sf
- Combines the functionality of sp
, rgdal
, and rgeos
into a single package based on tidy priciples.
raster
- classes and tools for handling spatial raster data.
See more - Spatial task view
sf
The sf
package is currently under active development and is evolving rapidly. For the time being I suggest installing the latest version from github instead of using CRAN.
library(devtools)
install_github('edzer/sfr')
library(sf)
Point, Multipoint, Multilinestring, Polygon
sp
maptools
:
readShapePoints
/ writeShapePoints
- Shapefile w/ pointsreadShapeLines
/ writeShapeLines
- Shapefile w/ linesreadShapePoly
/ writeShapePoly
- Shapefile w/ polygonsreadShapeSpatial
/ writeShapeSpatial
- Shapefilergdal
:
readOGR
/ writeOGR
- Shapefile, GeoJSON, KML, …rgeos
:
readWKT
/ writeWKT
- Well Known Textsf
:
st_read
/ st_write
- Shapefile, GeoJSON, KML, …Want to fly from the Western most point in the US to the Eastern most point?
inter = gcIntermediate(c(179.776,51.952), c(-179.146,51.273), n=50, addStartEnd=TRUE)
plot(wrld_simpl, col="black", ylim=c(15,90))
lines(inter,col='red',lwd=2,lty=3)
How do we define the distance between A and B, A and C, or B and C?
nc = st_read("data/gis/nc_counties/")
## Reading layer `nc_counties' from data source `data/gis/nc_counties/' using driver `ESRI Shapefile'
## features: 100
## fields: 8
## converted into: MULTIPOLYGON
## proj4string: +proj=longlat +datum=NAD83 +no_defs
air = st_read("data/gis/airports/")
## Reading layer `airports' from data source `data/gis/airports/' using driver `ESRI Shapefile'
## features: 940
## fields: 16
## proj4string: +proj=longlat +datum=NAD83 +no_defs
hwy = st_read("data/gis/us_interstates/")
## Reading layer `us_interstates' from data source `data/gis/us_interstates/' using driver `ESRI Shapefile'
## features: 233
## fields: 3
## converted into: MULTILINESTRING
## proj4string: +proj=utm +zone=15 +datum=NAD83 +units=m +no_defs
nc
## Simple feature collection with 100 features and 8 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -84.32186 ymin: 33.84175 xmax: -75.46003 ymax: 36.58815
## epsg (SRID): NA
## proj4string: +proj=longlat +datum=NAD83 +no_defs
## First 20 features:
## AREA PERIMETER COUNTYP010 STATE COUNTY FIPS STATE_FIPS SQUARE_MIL
## 1 0.11175964 1.610396 1994 NC Ashe County 37009 37 429.350
## 2 0.06159483 1.354829 1996 NC Alleghany County 37005 37 236.459
## 3 0.14023009 1.769388 1998 NC Surry County 37171 37 538.863
## 4 0.08912401 1.425249 1999 NC Gates County 37073 37 342.340
## 5 0.06865730 4.428217 2000 NC Currituck County 37053 37 263.871
## 6 0.11859434 1.404309 2001 NC Stokes County 37169 37 455.793
## 7 0.06259671 2.106357 2002 NC Camden County 37029 37 240.615
## 8 0.11542955 1.462524 2003 NC Warren County 37185 37 443.659
## 9 0.14328609 2.397293 2004 NC Northampton County 37131 37 550.581
## 10 0.09245561 1.810778 2005 NC Hertford County 37091 37 355.525
## 11 0.10512251 1.299689 2006 NC Person County 37145 37 404.077
## 12 0.07017160 1.132041 2007 NC Vance County 37181 37 269.817
## 13 0.13942048 1.691192 2008 NC Granville County 37077 37 536.495
## 14 0.14895720 1.607730 2009 NC Rockingham County 37157 37 572.528
## 15 0.11141436 1.345644 2010 NC Caswell County 37033 37 428.244
## 16 0.18995027 2.654576 2013 NC Halifax County 37083 37 731.365
## 17 0.05900578 1.695896 2016 NC Pasquotank County 37139 37 227.076
## 18 0.19674745 2.183487 2040 NC Wilkes County 37193 37 758.024
## 19 0.08110616 1.365225 2048 NC Watauga County 37189 37 312.386
## 20 0.06431129 1.821947 2049 NC Perquimans County 37143 37 247.775
## geometry
## 1 MULTIPOLYGON(((-81.65648656...
## 2 MULTIPOLYGON(((-81.30999399...
## 3 MULTIPOLYGON(((-80.71416384...
## 4 MULTIPOLYGON(((-76.91183250...
## 5 MULTIPOLYGON(((-75.82777792...
## 6 MULTIPOLYGON(((-80.43314893...
## 7 MULTIPOLYGON(((-76.54193262...
## 8 MULTIPOLYGON(((-77.91906980...
## 9 MULTIPOLYGON(((-77.16403243...
## 10 MULTIPOLYGON(((-77.15427973...
## 11 MULTIPOLYGON(((-78.97913187...
## 12 MULTIPOLYGON(((-78.32392257...
## 13 MULTIPOLYGON(((-78.45623203...
## 14 MULTIPOLYGON(((-79.88557248...
## 15 MULTIPOLYGON(((-79.13813182...
## 16 MULTIPOLYGON(((-77.88533221...
## 17 MULTIPOLYGON(((-76.43508531...
## 18 MULTIPOLYGON(((-81.08343124...
## 19 MULTIPOLYGON(((-81.69653106...
## 20 MULTIPOLYGON(((-76.38103268...
tbl_df(nc)
## # A tibble: 100 × 9
## AREA PERIMETER COUNTYP010 STATE COUNTY FIPS STATE_FIPS SQUARE_MIL
## <dbl> <dbl> <dbl> <fctr> <fctr> <fctr> <fctr> <dbl>
## 1 0.11175964 1.610396 1994 NC Ashe County 37009 37 429.350
## 2 0.06159483 1.354829 1996 NC Alleghany County 37005 37 236.459
## 3 0.14023009 1.769388 1998 NC Surry County 37171 37 538.863
## 4 0.08912401 1.425249 1999 NC Gates County 37073 37 342.340
## 5 0.06865730 4.428217 2000 NC Currituck County 37053 37 263.871
## 6 0.11859434 1.404309 2001 NC Stokes County 37169 37 455.793
## 7 0.06259671 2.106357 2002 NC Camden County 37029 37 240.615
## 8 0.11542955 1.462524 2003 NC Warren County 37185 37 443.659
## 9 0.14328609 2.397293 2004 NC Northampton County 37131 37 550.581
## 10 0.09245561 1.810778 2005 NC Hertford County 37091 37 355.525
## # ... with 90 more rows, and 1 more variables: geometry <simple_feature>
nc = st_transform(nc, st_crs(hwy)$proj4string)
row.names(nc) = sub(" County","", as.character(nc$COUNTY))
air = st_transform(air, st_crs(hwy)$proj4string)
plot(nc, axes=TRUE)
plot(air, add=TRUE, pch=1, col="blue")
plot(hwy, add=TRUE, col="red")
counties = which(row.names(nc) %in% c("Durham","Wake","Orange"))
d = st_distance(nc[counties,],air)
str(d)
## num [1:3, 1:940] 846916 837771 836234 3122697 3146840 ...
nearest_airport = apply(d, 1, which.min) %>% unique()
air$AIRPT_NAME[nearest_airport] %>% as.character()
## [1] "RALEIGH-DURHAM INTERNATIONAL AIRPORT"
d[, nearest_airport, drop=FALSE]
## [,1]
## [1,] 20633.952
## [2,] 3334.507
## [3,] 0.000
(nc_cent = st_centroid(nc))
## Warning in CPL_proj4string_from_epsg(epsg): GDAL Error 6: EPSG PCS/GCS code -2147483648 not found in EPSG support files. Is this a valid
## EPSG coordinate system?
## Warning in `st_crs<-.sfc`(`*tmp*`, value = structure(list(epsg = NA_integer_, : st_crs: replacing
## crs does not reproject data; use st_transform for that
## Geometry set for 100 features
## geometry type: POINT
## dimension: XY
## bbox: xmin: 1315264 ymin: 3870422 xmax: 2061959 ymax: 4165292
## epsg (SRID): NA
## proj4string:
## First 5 geometries:
## POINT(1532770.11999766 4094136.71257601)
## POINT(1565623.45873979 4104658.72512808)
## POINT(1606393.82239651 4101068.89858318)
## POINT(1966497.95216947 4159294.55278797)
## POINT(2029641.55497004 4165291.61233346)
d = st_distance(nc_cent[counties],air)
nearest_airport = apply(d, 1, which.min) %>% unique()
air$AIRPT_NAME[nearest_airport] %>% as.character()
## [1] "RALEIGH-DURHAM INTERNATIONAL AIRPORT"
d[, nearest_airport, drop=FALSE]
## [,1]
## [1,] 37003.95
## [2,] 19685.62
## [3,] 16071.84
For more detail see the DE-9IM specification
nc_air = st_intersects(nc, air)
str(nc_air[20:30])
## List of 11
## $ : int(0)
## $ : int(0)
## $ : int(0)
## $ : int(0)
## $ : int(0)
## $ : int 268
## $ : int 717
## $ : int(0)
## $ : int(0)
## $ : int(0)
## $ : int(0)
has_air = sapply(nc_air, function(x) length(x) > 0)
nc$COUNTY[has_air]
## [1] Forsyth County Guilford County Dare County Wake County Pitt County
## [6] Catawba County Buncombe County Wayne County Mecklenburg County Moore County
## [11] Cabarrus County Lenoir County Craven County Cumberland County Onslow County
## [16] New Hanover County
## 100 Levels: Alamance County Alexander County Alleghany County Anson County ... Yancey County
plot(nc, axes=TRUE)
plot(nc[has_air,], add=TRUE, col="lightblue")
plot(air[unlist(nc_air) %>% unique(),], add=TRUE, pch=16, col="blue")
durham = which(row.names(nc) == "Durham")
adj = st_touches(nc, sparse=FALSE)
str(adj)
## logi [1:100, 1:100] FALSE TRUE FALSE FALSE FALSE FALSE ...
nc$COUNTY[adj[durham,]] %>% as.character()
## [1] "Person County" "Granville County" "Orange County" "Wake County" "Chatham County"
library(corrplot)
rownames(adj) = rownames(nc)
colnames(adj) = rownames(nc)
corrplot(adj[1:20,1:20],method="color",type="lower",tl.col="black",cl.pos = "n")
max(rowSums(adj))
## [1] 9
most_neighbors = rowSums(adj)==max(rowSums(adj))
nc$COUNTY[most_neighbors] %>% as.character()
## [1] "Iredell County" "Moore County"
plot(nc, axes=TRUE)
plot(nc[most_neighbors,],add=TRUE,col="lightblue")
min(rowSums(adj))
## [1] 2
least_neighbors = rowSums(adj)==min(rowSums(adj))
nc$COUNTY[least_neighbors] %>% as.character()
## [1] "Currituck County" "Chowan County" "Dare County" "Tyrrell County"
## [5] "Polk County" "Pamlico County" "Clay County" "New Hanover County"
plot(nc, axes=TRUE)
plot(nc[least_neighbors,],add=TRUE,col="lightblue")