3 Download process

3.0.1 Catalogue existing data

The download process begins by using the {dataRetrieval} package to catalogue the data that is available in the WQP. To do this the user selects parameters of interest and their corresponding characteristicName for retrieving data from the WQP. Below are the current parameters and their characteristicNames as defined in the configuration YAML, 1_inventory/cfg/wqp_codes.yml. Data for these parameters are requested from the WQP within a spatial grid that is mapped in the next section.

Code

names_yaml <- names(yaml_contents)

map_df(.x = names_yaml,
    .f = ~yaml_contents[.x] %>%
        as_tibble() %>%
        rename(characteristicName = 1) %>%
        mutate(parameter = .x) %>%
        select(parameter, characteristicName)) %>%
  kable() %>%
  kable_material() %>%
  collapse_rows(columns = 1, valign = "top")

parameter	characteristicName
chlorophyll	Chlorophyll a
	Chlorophyll a (probe relative fluorescence)
	Chlorophyll a, corrected for pheophytin
	Chlorophyll a (probe)
	Chlorophyll a, free of pheophytin
	Chlorophyll a, uncorrected for pheophytin
	Chlorophyll a - Phytoplankton (suspended)
doc	Organic carbon
	Total carbon
	Non-purgeable Organic Carbon (NPOC)
sdd	Depth, Secchi disk depth
	Depth, Secchi disk depth (choice list)
	Secchi Reading Condition (choice list)
	Secchi depth
	Water transparency, Secchi disc

3.0.2 Maps of data spread

The pipeline uses a spatial grid to download data from the WQP in batches. Maps are presented below with counts of records across this spatial grid. Note: The counts here are for raw data that are not yet filtered or harmonized.

Code

# Combine sample counts in each grid_id with the grid polygons
grid_counts <- left_join(x = global_grid,
                         y = site_counts %>%
                           count(grid_id),
                         by = c("id" = "grid_id")) %>%
  st_transform(crs = 9311)

state_selection <- states(progress_bar = FALSE) %>%
  st_transform(crs = 9311)

## Retrieving data for the year 2021

Code

# Conterminous US map:
conterminous_us <- state_selection %>%
  filter(!(NAME %in% c("Alaska", "Hawaii", "American Samoa",
                       "Guam", "Puerto Rico",
                       "United States Virgin Islands",
                       "Commonwealth of the Northern Mariana Islands")))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = conterminous_us,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(conterminous_us)[,"X"]),
                    max(st_coordinates(conterminous_us)[,"X"])),
           ylim = c(min(st_coordinates(conterminous_us)[,"Y"]),
                    max(st_coordinates(conterminous_us)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Conterminous US") +
  theme_bw()

Code

# Alaska map:
AK <- state_selection %>%
  filter(NAME == "Alaska")

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = AK,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(AK)[,"X"]),
                    max(st_coordinates(AK)[,"X"])),
           ylim = c(min(st_coordinates(AK)[,"Y"]),
                    max(st_coordinates(AK)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Alaska") +
  theme_bw()

Code

# Hawaii map:
HI <- state_selection %>%
  filter(NAME == "Hawaii")

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = HI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(HI)[,"X"]),
                    0.9 * max(st_coordinates(HI)[,"X"])),
           ylim = c(1.1 * min(st_coordinates(HI)[,"Y"]),
                    max(st_coordinates(HI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Hawaii") +
  theme_bw()

Code

# American Samoa map:
AS <- state_selection %>%
  filter(NAME %in% c("American Samoa"))


ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = AS,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(1.025 * min(st_coordinates(AS)[,"X"]),
                    0.975 *max(st_coordinates(AS)[,"X"])),
           ylim = c(1.05 * min(st_coordinates(AS)[,"Y"]),
                    max(st_coordinates(AS)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("American Samoa") +
  theme_bw()

Code

# Guam & Commonwealth of the Northern Mariana Islands map
GU_CNMI <- state_selection %>%
  filter(NAME %in% c("Guam", "Commonwealth of the Northern Mariana Islands"))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = GU_CNMI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(1.025 * min(st_coordinates(GU_CNMI)[,"X"]),
                    0.975 * max(st_coordinates(GU_CNMI)[,"X"])),
           ylim = c(0.95 * min(st_coordinates(GU_CNMI)[,"Y"]),
                    1.05 * max(st_coordinates(GU_CNMI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Guam & Commonwealth of the Northern Mariana Islands") +
  theme_bw()

Code

# Puerto Rico & United States Virgin Islands map:
PR_VI <- state_selection %>%
  filter(NAME %in% c("Puerto Rico", "United States Virgin Islands"))


ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = PR_VI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(0.95 * min(st_coordinates(PR_VI)[,"X"]),
                    1.05 * max(st_coordinates(PR_VI)[,"X"])),
           ylim = c(1.075 * min(st_coordinates(PR_VI)[,"Y"]),
                    0.925 * max(st_coordinates(PR_VI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Puerto Rico & United States Virgin Islands") +
  theme_bw()