Code
library(sf)
library(dplyr)
library(httr)
library(mapview)
library(jsonlite)
library(knitr)
library(DT)
<- "https://reference.geoconnex.us" base_url
Below we demonstrates how to use the Geoconnex Reference Feature server API with R’s spatial packages, particularly sf
. We’ll explore various use cases for working with hydrological and related spatial data. These use cases can be groups into three categories:
This API conforms to the OGC-API Features specification, and its full API documentation is available here for those who wish to explore its capabilities further than what is demonstrated below.
First, let’s load the necessary libraries and set up our API base URL.
library(sf)
library(dplyr)
library(httr)
library(mapview)
library(jsonlite)
library(knitr)
library(DT)
<- "https://reference.geoconnex.us" base_url
The Geoconnex Reference Feature server is a source for identifiers and geometries for real-world features that many organizations may collect and publish data about. The attributes of these features vary, but all include a “uri” which serves as a persistent identifer. First, let’s discover what kinds of features are available:
<- paste0(base_url, "/collections?f=json")
url <- jsonlite::fromJSON(url)
collections
datatable(collections$collections)
We see a number of options available, including watershed boundaries like the Hydrologic Unit Codes, administrative boundaries like counties, states, and public water systems, and hydrologic features like mainstems (rivers) and aquifers, and hydrometric features such as dams and gages. The reference feature server lets us find features according to attribute and spatial queries. In general, this is accomplished by passing queries of the form https://reference.geoconnex.us/collections/<collectionId>/items?filter=
Let’s see how to use these collections!
Let’s say we’re interested in a specific river, one we think is called the “Animas river”. We can pass an attribute filter query to the mainstems
collection”, and then use the R sf
package to read into a dataframe, and the mapview
package to visualize.
# construct a query for a river name that includes the string "animas"
<- URLencode("name_at_outlet ILIKE '%animas%'")
query <- paste0(base_url, "/collections/mainstems/items?f=json&filter=", query)
url
# Read the data into an sf object
<- st_read(url, quiet = TRUE) animas_rivers
Warning in CPL_read_ogr(dsn, layer, query, as.character(options), quiet, : GDAL
Message 1: GDAL was built against curl 7.79.1, but is running against 7.77.0.
Runtime failure is likely !
# Display the results
|>
animas_rivers select(uri, name_at_outlet, outlet_drainagearea_sqkm) |>
datatable()
# Map the results
mapview(animas_rivers |>
select(uri, name_at_outlet), zcol = "name_at_outlet")
There are evidently 3 rivers that include the word “Animas”. Let’s say we were interested in the “Animas River”, shown on the map in Green. We find that it’s Geoconnex URI is https://geoconnex.us/ref/mainstems/35394
.
We can also do filters based on logical and quantitative filters on attributes. Let’s say we wanted to find all rivers with drainage areas (in this reference dataset, the attribute is outlet_drainagearea_sqkm
) greater than 1,000,000 square kilometers:
# construct a query for a river with outlet_drainagearea_sqkm > 600,000
<- URLencode("outlet_drainagearea_sqkm > 500000")
query <- paste0(base_url, "/collections/mainstems/items?f=json&filter=", query)
url
# Read the data into an sf object
<- st_read(url, quiet = TRUE)
large_mainstems
# Display the results
|>
large_mainstems select(uri, name_at_outlet, outlet_drainagearea_sqkm) |>
datatable()
# Map the results
mapview(large_mainstems, zcol = "name_at_outlet")
Queries over multiple attributes can also be made, combining with ‘AND’ or ‘OR’. For example, let’s find all Dams that include the name “Hoover”, but then also filter to only the ones with a drainage area of more than 100,000 square kilometers:
# Step 1: Find all dams named "Hoover"
<- URLencode("name LIKE '%Hoover%'")
query_hoover <- paste0(base_url, "/collections/dams/items?f=json&filter=", query_hoover)
url_hoover <- st_read(url_hoover, quiet = TRUE)
hoover_dams
cat("Number of dams named 'Hoover':", nrow(hoover_dams), "\n")
Number of dams named 'Hoover': 39
# Create an interactive table of all Hoover dams
datatable(
|>
hoover_dams st_drop_geometry() |>
select(uri, name, drainage_area_sqkm) |>
arrange(desc(drainage_area_sqkm)),
options = list(pageLength = 10, scrollX = TRUE),
caption = "All Dams Named 'Hoover'",
rownames = FALSE
)
# Step 2: Query for large Hoover dams using a combined filter
<- URLencode("name LIKE '%Hoover%' AND drainage_area_sqkm > 100000")
query_large_hoover <- paste0(base_url, "/collections/dams/items?f=json&filter=", query_large_hoover)
url_large_hoover <- st_read(url_large_hoover, quiet = TRUE)
large_hoover_dams
cat("\nNumber of large Hoover dams (Drainage Area > 100,000 sq km):", nrow(large_hoover_dams), "\n")
Number of large Hoover dams (Drainage Area > 100,000 sq km): 1
# Create an interactive table of large Hoover dams
datatable(
|>
large_hoover_dams st_drop_geometry() |>
select(uri, name, drainage_area_sqkm) |>
arrange(desc(drainage_area_sqkm)),
options = list(pageLength = 10, scrollX = TRUE),
caption = "Large Dams Named 'Hoover' (Drainage Area > 100,000 sq km)",
rownames = FALSE
)
# Create a map view of all Hoover dams, highlighting the large ones
<- mapview(hoover_dams |>
m select(uri, name, drainage_area_sqkm), layer.name = "All Hoover Dams", label="name")
+ mapview(large_hoover_dams |>
m select(uri, name, drainage_area_sqkm), color = "red", col.regions="red", layer.name = "Large Hoover Dams", lwd=2, cex=15, label="Hoover")
We found 39 Dams in the US named “Hoover”, but only 1 with a large drainage area, the famous one near Las Vegas, NV.
We can also do spatial queries, using bounding box queries (min lon, min lat, max lon, max lat) or by passing WKT-encoded geometry. Let’s say we want to find all Public Water Systems within a bounding box around the four-corners region.
# Define the bounding box for the Four Corners area
# Format: (min Longitude, min Latitude, max Longitude, max Latitude)
<- c(-109.5, 36.5, -107.5, 37.5)
bbox
# Construct the URL with the bbox parameter
<- paste0(base_url, "/collections/pws/items?f=json&bbox=", paste(bbox, collapse = ","))
url
# Read the data into an sf object
<- st_read(url, quiet = TRUE)
four_corners_pws
# Display summary of the results
cat("Number of Public Water Systems found:", nrow(four_corners_pws), "\n")
Number of Public Water Systems found: 75
# Create an interactive table of the results
|>
four_corners_pws st_drop_geometry() |>
select(uri, pws_name, population_served_count) |>
arrange(desc(population_served_count)) |>
datatable(
options = list(pageLength = 5, scrollX = TRUE),
caption = "Public Water Systems in the Four Corners Area",
rownames = FALSE
)
# Create a map view of the results
<- mapview(four_corners_pws, zcol = "population_served_count", layer.name = "Population Served", label= "pws_name")
m
# Add the bounding box to the map
<- st_as_sf(st_as_sfc(st_bbox(c(xmin = bbox[1], ymin = bbox[2], xmax = bbox[3], ymax = bbox[4]), crs = 4326)))
bbox_poly + mapview(bbox_poly, col.region = "red", alpha.regions=0, color="red", lwd=2, layer.name = "Query Bounding Box") m
When it comes to spatial queries, we are not restricted to bounding box queries. We can pass any spatial predicate along with WKT geometries to a collection filter. Let’s say we have several field sites near Farmington, NM, and we want to identify which HUC10 watersheds they fall within. We’ll use the point-in-polygon query capability of the INTERECTS spatial; predicate to find this information:
# Define our field site (example coordinate near Farmington, NM)
<- -108.2186
site_lon <- 36.7280
site_lat
# Construct the query
<- sprintf("INTERSECTS(geometry, POINT(%f %f))", site_lon, site_lat) |> URLencode()
query <- paste0(base_url, "/collections/hu10/items?f=json&filter=", query)
url
# Make the API call
<- st_read(url, quiet = TRUE) |>
huc10 select(id,uri,name)
# Display the results table
datatable(huc10)
# Create a map
<- st_point(c(site_lon, site_lat)) |>
site_point st_sfc(crs = 4326) |>
st_sf()
mapview(huc10, zcol = "name", layer.name = "HUC10 Watershed") +
mapview(site_point, col.regions = "red", layer.name = "Field Site")
Here we see that our field site is in the HUC10 1408010505, which has the associated Geoconnex URI https://geoconnex.us/ref/hu10/1408010505. This identifier can be used if we were to publish data about our site, following Geoconenx guidance and best practices.
Complex features can have many coordinates, and thus requests via ?filter
to the API can be too long to format as URL. To get around this, the API supports a special intersection process that involves passing a URL for any GeoJSON feature to a collection. Let’s say we want to know which counties intersect the Animas River (https://geoconnex.us/ref/mainstems/35394).
# Define the process endpoint
<- "https://reference.geoconnex.us/processes/intersector/execution"
process_url
# Define the input parameters
<- list(
input_params inputs = list(
url = "https://geoconnex.us/ref/mainstems/35394",
collection = "counties"
)
)
# Execute the process
<- POST(
response url = process_url,
body = toJSON(input_params, auto_unbox = TRUE),
add_headers("Content-Type" = "application/json"),
encode = "json"
)
# Convert the result to an sf object
<- st_read(response, quiet = TRUE)
intersecting_counties
# Create an interactive table of the results
|>
intersecting_counties st_drop_geometry() |>
select(name, uri) |>
datatable(
options = list(pageLength = 5, scrollX = TRUE),
caption = "Counties Intersecting the Animas River"
)
# Fetch the Animas River geometry
<- st_read("https://geoconnex.us/ref/mainstems/35394", quiet = TRUE)
animas_river
# Create a map view of the results
mapview(intersecting_counties, zcol = "name", layer.name = "Intersecting Counties") +
mapview(animas_river, color = "blue", layer.name = "Animas River")
Note that of the three counties intersecting the Animas River, two are named “San Juan”, https://geoconnex.us/ref/counties/08111 in Colorado, and https://geoconnex.us/ref/counties/35045 in New Mexico, highlighting the importance of unique identifiers and the usefulness of HTTP identifiers that direct to spatial/data representations of a given feature.
The most important use case of the Geoconnex system for data users is to discover datasets related to a given hydrologic feature. This functionality is the reason for implementing a Knowledge Graph approach. There are two ways we currently offer users to discover datasets:
mainstems
)Each mainstem’s GeoJSON response includes a nested array within the “datasets” attribute which can be extracted. In the example below, 58 datasets are available about the Animas River. These can be filtered downstream of the API call. For example, in the interactive data table, we can use the search bar to searchf or mention of “temperature”. Of the 58, 6 datasets are regarding the variableMeasured
“Temperature”, and these appear to be 3 pairs of USGS datasets from 3 distinct USGS monitoring locations, with each pair having a 2 different download options.
# Animas River mainstem URI
<- "https://geoconnex.us/ref/mainstems/35394"
animas_uri
# Fetch the Animas River data
<- GET(animas_uri, query = list(f = "json"))
response
<- content(response, "text") |> fromJSON()
animas_data
# Extract datasets
<- animas_data$properties$datasets
datasets datatable(datasets,
options = list(pageLength = 5, scrollX = TRUE,
search = list(regex=TRUE, caseInsensitive = TRUE, search = 'temperature')
) )
For those who wish to make SPARQL queries directly, the endpoint is https://graph.geoconnex.us/repositories/iow. For example, one would query the graph for all datasets schema:about
monitoring locations that are on (<hyf:referencedPosition/hyf:HY_IndirectPosition/hyf:linearElement>
) the Animas River, that have a value for the schema:variableMeasured
that includes the string “temperature”.
# SPARQL endpoint
<- "https://graph.geoconnex.us/repositories/iow"
endpoint
# Revised SPARQL query
<- 'PREFIX schema: <https://schema.org/>
query PREFIX gsp: <http://www.opengis.net/ont/geosparql#>
PREFIX hyf: <https://www.opengis.net/def/schema/hy_features/hyf/>
SELECT DISTINCT ?monitoringLocation ?siteName ?datasetDescription ?type ?url
?variableMeasured ?variableUnit ?measurementTechnique ?temporalCoverage
?distributionName ?distributionURL ?distributionFormat ?wkt
WHERE {
VALUES ?mainstem { <https://geoconnex.us/ref/mainstems/35394> }
?monitoringLocation hyf:referencedPosition/hyf:HY_IndirectPosition/hyf:linearElement ?mainstem ;
schema:subjectOf ?item ;
hyf:HydroLocationType ?type ;
gsp:hasGeometry/gsp:asWKT ?wkt .
?item schema:name ?siteName ;
schema:temporalCoverage ?temporalCoverage ;
schema:url ?url ;
schema:variableMeasured ?variableMeasured .
?variableMeasured schema:description ?datasetDescription ;
schema:name ?variableMeasuredName ;
schema:unitText ?variableUnit ;
schema:measurementTechnique ?measurementTechnique .
OPTIONAL {
?item schema:distribution ?distribution .
?distribution schema:name ?distributionName ;
schema:contentUrl ?distributionURL ;
schema:encodingFormat ?distributionFormat .
}
# Filter datasets by the desired variable description
FILTER(REGEX(?datasetDescription, "temperature", "i"))
}
ORDER BY ?siteName
'
# Execute the SPARQL query
<- GET(
response url = endpoint,
query = list(query = query),
accept("application/sparql-results+json")
)
# Parse the JSON response
<- content(response, "text", encoding = "UTF-8") %>% fromJSON()
result
# Extract the results
<- as.data.frame(result$results$bindings) |>
datasets mutate(across(everything(), ~.$value))
datatable(datasets)