--- title: Spatial data author: "Colin Rundel" date: "2019-03-09" output: xaringan::moon_reader: css: "slides.css" lib_dir: libs nature: highlightStyle: github highlightLines: true countIncrementalSlides: false --- exclude: true ```{r setup, echo=FALSE, message=FALSE, warning=FALSE, include=FALSE} options( htmltools.dir.version = FALSE, # for blogdown width = 100, tibble.width = 80 ) knitr::opts_chunk$set( fig.align = "center", dpi=150 ) htmltools::tagList(rmarkdown::html_dependency_font_awesome()) library(dplyr) library(sf) library(purrr) library(ggplot2) library(patchwork) ggplot2::theme_set(ggplot2::theme_bw()) cols = c("#7fc97f","#386cb0","#beaed4","#fdc086") ``` --- class: middle count: false # Geospatial stuff is hard --- ## Projections ```{r echo=FALSE} lat_lines = map(seq(9.999, 89.999, length.out = 9), ~ cbind(seq(-179.999, -9.999, length.out=100), .)) long_lines = map(seq(-179.999, -9.999, length.out = 17), ~ cbind(., seq(9.999, 89.999, length.out=100))) lat_long = c(lat_lines, long_lines) %>% st_multilinestring() %>% st_sfc() %>% st_set_crs("+proj=longlat +datum=WGS84 +no_defs") ``` ```{r echo=FALSE, fig.align="center", message=FALSE, out.width="100%", fig.height=5} data(wrld_simpl, package = "maptools") world = st_as_sf(wrld_simpl) NAm = world %>% filter(FIPS %in% c("CA","GL","MX","US")) NAm_google = st_transform(NAm, "+init=epsg:3857") par(mar=c(3,2,2,1),mfrow=c(2,3)) plot(lat_long, col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Lat/Long (epsg:4326)") plot(st_geometry(NAm), col="black", add=TRUE) plot(st_transform(lat_long, "+init=epsg:3857"), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Google / Web Mercator (epsg:3857)", ylim=c(0, 2e7)) plot(st_transform(NAm, "+init=epsg:3857") %>% st_geometry(), col="black", add=TRUE) lcc = "+proj=lcc +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" plot(st_transform(lat_long, lcc), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Lambert Conformal Conic:") plot(st_transform(NAm, lcc) %>% st_geometry(), col="black", add=TRUE) aea = "+proj=aea +lat_1=20 +lat_2=60 +lat_0=40 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs" plot(st_transform(lat_long, aea), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Alberts Equal Area") plot(st_transform(NAm, aea) %>% st_geometry(), col="black", add=TRUE) robinson = "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" plot(st_transform(lat_long, robinson), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Robinson") plot(st_transform(NAm, robinson) %>% st_geometry(), col="black", add=TRUE) mollweide = "+proj=moll +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs" plot(st_transform(lat_long, mollweide), col=adjustcolor("grey",alpha.f = 0.5), axes=TRUE, main="Mollweide") plot(st_transform(NAm, mollweide) %>% st_geometry(), col="black", add=TRUE) ``` --- ## Dateline How long is the flight between the Western most and the Eastern most points in the US? -- ```{r echo=FALSE, fig.align="center", fig.height=4, fig.width=8, warning=FALSE, message=FALSE, out.width="70%", warning=FALSE} par(mar=c(3,3,1,1)) ak = st_read("data/ak/states.shp", quiet = TRUE, stringsAsFactors = FALSE) ak_geom = st_geometry(ak) west_hem = st_polygon(list(matrix(c(-180,90, -180,-90, 0,-90, 0,90, -180,90), ncol=2,byrow=TRUE))) %>% st_sfc() %>% st_set_crs("+proj=longlat +datum=WGS84") east_hem = st_polygon(list(matrix(c(180,90, 180,-90, 0,-90, 0,90, 180,90), ncol=2,byrow=TRUE))) %>% st_sfc() %>% st_set_crs("+proj=longlat +datum=WGS84") ak_west = st_intersection(ak_geom, west_hem) ak_east = st_intersection(ak_geom, east_hem) ak_east_shift = (ak_east - c(360,0)) %>% st_set_crs("+proj=longlat +datum=WGS84") ak_shift = st_union(ak_east_shift, ak_west) plot(ak_shift, axes=TRUE, col="black", border=NA, xlim=c(-190, -130)) points(c(-360+179.776,-179.146), c(51.952,51.273),col='red') abline(v=-180,col='blue',lty=2) ``` ```{r echo=FALSE, fig.align="center", fig.width=8, fig.height=3, out.width="70%"} plot(ak_shift, col="black", border=NA, xlim=c(-190, -170), ylim=c(50, 55)) points(c(-360+179.776,-179.146), c(51.952,51.273),col='red') abline(v=-180,col='blue',lty=2) box() ``` --- ## Great circle distance .small[ ```{r fig.align="center", fig.width=8, fig.height=4} par(mar=c(0,0,0,0)) ak1 = c(179.776,51.952) ak2 = c(-179.146,51.273) inter = geosphere::gcIntermediate(ak1, ak2, n=50, addStartEnd=TRUE) plot(st_geometry(world), col="black", ylim=c(-90,90), axes=TRUE) lines(inter,col='red',lwd=2,lty=3) ``` ] --- ## Even plotting is hard ```{r fig.align="center", fig.width=8, fig.height=4, out.width="100%"} plot(st_geometry(NAm), graticule=st_crs(NAm), col="black", border=NA, axes=TRUE) ``` --- ## Relationships --- class: middle count: false # Geospatial Data and R --- ## Simple Features ```{r echo=FALSE} pt = st_point(c(30, 10)) ls = st_linestring(matrix(c(30, 10, 10, 30, 40, 40), byrow=TRUE, ncol=2)) poly = st_polygon(list(matrix(c(30, 10, 40, 40, 20, 40, 10, 20, 30, 10), ncol=2, byrow=TRUE))) poly_hole = st_polygon( list( matrix(c(35, 10, 45, 45, 15, 40, 10, 20, 35, 10), ncol=2, byrow=TRUE), matrix(c(20, 30, 35, 35, 30, 20, 20, 30), ncol=2, byrow=TRUE) ) ) pts = st_multipoint(matrix(c(10, 40, 40, 30, 20, 20, 30, 10), ncol=2, byrow=TRUE)) lss = st_multilinestring(list( matrix(c(10, 10, 20, 20, 10, 40), ncol=2, byrow=TRUE), matrix(c(40, 40, 30, 30, 40, 20, 30, 10), ncol=2, byrow=TRUE) )) polys = st_multipolygon(list( list(matrix(c(30, 20, 45, 40, 10, 40, 30, 20), ncol=2, byrow=TRUE)), list(matrix(c(15, 5, 40, 10, 10, 20, 5, 10, 15, 5), ncol=2, byrow=TRUE)) )) polys_hole = st_multipolygon(list( list(matrix(c(40, 40, 20, 45, 45, 30, 40, 40), ncol=2, byrow=TRUE)), list(matrix(c(20, 35, 10, 30, 10, 10, 30, 5, 45, 20, 20, 35), ncol=2, byrow=TRUE), matrix(c(30, 20, 20, 15, 20, 25, 30, 20), ncol=2, byrow=TRUE)) )) ``` ```{r echo=FALSE, fig.width=8, fig.height=8, fig.align="center", out.width="100%"} par(mar=c(1,1,2,1), mfrow=c(4,4)) plot(pt, axes=FALSE, main="Point", pch=16); box() plot(ls, axes=FALSE, main="Linestring"); box() plot(poly, axes=FALSE, col="lightgrey", main="Polygon"); box() plot(poly_hole, axes=FALSE, col="lightgrey", main="Polygon w/ Hole(s)"); box() plot(pts, axes=FALSE, main="Multipoint", pch=16); box() plot(lss, axes=FALSE, main="Multilinestring"); box() plot(polys, axes=FALSE, col="lightgrey", main="Multipolygon"); box() plot(polys_hole, axes=FALSE, col="lightgrey", main="Multipolygon w/ Hole(s)"); box() ``` --- ## Analysis of geospatial data in R R has a rich package ecosystem for read/writing, manipulating, and analyzing geospatial data.
Some core packages: * ~~`sp` - core classes for handling spatial data, additional utility functions.~~ * ~~`rgdal` - R interface to `gdal` (Geospatial Data Abstraction Library) for reading and writing spatial data.~~ * ~~`rgeos` - R interface to `geos` (Geometry Engine Open Source) library for querying and manipulating spatial data. Reading and writing WKT.~~ * `sf` - Combines the functionality of `sp`, `rgdal`, and `rgeos` into a single package based on tidy simple features. * `raster` - classes and tools for handling spatial raster data. * `stars` - Reading, manipulating, writing and plotting spatiotemporal arrays (rasters) See more - [Spatial task view](http://cran.r-project.org/web/views/Spatial.html) --- ## Installing `sf` This is the hardest part of using the `sf` package, difficulty comes from is dependence on several external libraries (`geos`, `gdal`, and `proj`). * *Windows* - installing from source works when Rtools is installed (system requirements are downloaded from rwinlib) * *MacOS* - install dependencies via homebrew: `gdal`, `geos`, `proj`, `udunits`. * *Linux* - Install development pacakages for GDAL (>= 2.0.0), GEOS (>= 3.3.0), Proj4 (>= 4.8.0), udunits2 from your package manager of choice. More specific details are included in the repo [README](https://github.com/r-spatial/sf) on github. --- ## Reading, writing, and converting simple features - `sf` * `st_read` / `st_write` - Shapefile, GeoJSON, KML, ... * `read_sf` / `write_sf` - Same, suports tibbles ... * `st_as_sfc` / `st_as_wkt` - WKT * `st_as_sfc` / `st_as_binary` - WKB * `st_as_sfc` / `as(x, "Spatial")` - sp See [sf vignette #2](https://cran.r-project.org/web/packages/sf/vignettes/sf2.html). --- ## Shapefiles ```{r} fs::dir_info("data/gis/nc_counties/") %>% select(path:size) ``` --- ## NC Counties ```{r} nc = st_read("data/gis/nc_counties/", quiet=TRUE, stringsAsFactors=FALSE) nc ``` --- ## sf tibbles ```{r} nc = read_sf("data/gis/nc_counties/") nc ``` --- ## `sf` classes .small[ ```{r} str(nc, max.level=1) class(nc) class(nc$geometry) class(nc$geometry[[1]]) ``` ] --- ## Plotting ```{r} plot(nc) ``` --- ## More Plotting ```{r echo=TRUE, out.width="100%", fig.height=4} plot(nc["AREA"]) ``` --- ## Graticules ```{r echo=TRUE, out.width="100%", fig.height=4} par(oma=c(0,2,0,0)) plot(nc["AREA"], graticule=TRUE, axes=TRUE, las=1) ``` --- ## Geometries ```{r echo=TRUE, out.width="100%", fig.height=4} par(oma=c(0,2,0,0)) plot(st_geometry(nc), graticule=TRUE, axes=TRUE, las=1) ``` --- ## ggplot2 ```{r echo=TRUE, out.width="100%", fig.height=4} ggplot(nc, aes(fill=AREA)) + geom_sf() ``` --- ## ggplot2 + palettes ```{r echo=TRUE, out.width="100%", fig.height=4} ggplot(nc, aes(fill=AREA)) + geom_sf() + scale_fill_viridis_c() ``` --- ## Some other data .small[ ```{r} air = st_read("data/gis/airports/", quiet=TRUE, stringsAsFactors=FALSE) %>% tbl_df() %>% st_sf() hwy = st_read("data/gis/us_interstates/", quiet=TRUE, stringsAsFactors=FALSE) %>% tbl_df() %>% st_sf() ``` ```{r echo=FALSE, out.width="100%", fig.height=4} par(mar=c(3,3,3,0.1), mfrow=c(1,3)) plot(st_geometry(nc), axes=TRUE, main="nc") plot(st_geometry(air), axes=TRUE, pch=16, col="blue", main="air") plot(st_geometry(hwy), axes=TRUE, col="red", main="hwy") ``` ] --- ## Overlays? ```{r echo=FALSE, out.width="100%", fig.height=4} plot(st_geometry(nc), axes=TRUE) plot(st_geometry(air), axes=TRUE, pch=16, col="blue", main="air", add=TRUE) plot(st_geometry(hwy), axes=TRUE, col="red", add=TRUE) ``` --- ## Overlays? (ggplot) ```{r echo=FALSE, out.width="100%", fig.height=5} ggplot() + geom_sf(data=nc) + geom_sf(data=air, color="blue", alpha=0.5, size=1) + geom_sf(data=hwy, color="red") ``` --- ## Projections ```{r} st_crs(nc) st_crs(air) st_crs(hwy) ``` --- ## Aside - UTM Zones --- ## Lat/Long ```{r} hwy = st_transform(hwy, st_crs(nc)) st_crs(hwy) ``` ```{r echo=FALSE, out.width="100%", fig.height=4} plot(st_geometry(nc), axes=TRUE, main="Combined") plot(st_geometry(air), add=TRUE, pch=16, col="blue") plot(st_geometry(hwy), add=TRUE, col="red") ``` --- class: middle count: false # Airport Example --- ## Sparse vs Full Results ```{r} st_intersects(nc[20:30,], air) %>% str() ``` --- ```{r} st_intersects(nc, air, sparse=FALSE) %>% str() st_intersects(nc, air, sparse=FALSE) %>% .[20:30, 260:270] ``` --- ## Which counties have airports? ```{r} nc_air = st_intersects(nc, air) n_air = map_int(nc_air, length) nc %>% slice(which(n_air > 0)) %>% pull(COUNTY) ``` ```{r} air_in_nc = nc_air %>% unlist() %>% unique() air %>% slice(air_in_nc) %>% pull(AIRPT_NAME) ``` --- ## Results ```{r echo=FALSE, out.width="100%", fig.height=4} plot(st_geometry(nc)) plot(st_geometry(nc[n_air > 0,]), add=TRUE, col="lightblue") plot(st_geometry(air[air_in_nc,]), add=TRUE, pch=16, col="blue") ```