3 Download process

3.0.1 Catalogue existing data

The download process begins by using the {dataRetrieval} package to catalogue the data that is available in the WQP. To do this the user selects parameters of interest and their corresponding characteristicName for retrieving data from the WQP. Below are the current parameters and their characteristicNames as defined in the configuration YAML, 1_inventory/cfg/wqp_codes.yml. Data for these parameters are requested from the WQP within a spatial grid that is mapped in the next section.

names_yaml <- names(yaml_contents)

map_df(.x = names_yaml,
    .f = ~yaml_contents[.x] %>%
        as_tibble() %>%
        rename(characteristicName = 1) %>%
        mutate(parameter = .x) %>%
        select(parameter, characteristicName)) %>%
  kable() %>%
  kable_material() %>%
  collapse_rows(columns = 1, valign = "top")
parameter characteristicName
chlorophyll Chlorophyll a
Chlorophyll a (probe relative fluorescence)
Chlorophyll a, corrected for pheophytin
Chlorophyll a (probe)
Chlorophyll a, free of pheophytin
Chlorophyll a, uncorrected for pheophytin
Chlorophyll a - Phytoplankton (suspended)
doc Organic carbon
Total carbon

3.0.2 Maps of data spread

The pipeline uses a spatial grid to download data from the WQP in batches. Maps are presented below with counts of records across this spatial grid. Note: The counts here are for raw data that are not yet filtered or harmonized.

# Combine sample counts in each grid_id with the grid polygons
grid_counts <- left_join(x = global_grid,
                         y = site_counts %>%
                         by = c("id" = "grid_id")) %>%
  st_transform(crs = 9311)

state_selection <- states(progress_bar = FALSE) %>%
  st_transform(crs = 9311)
## Retrieving data for the year 2021
# Conterminous US map:
conterminous_us <- state_selection %>%
  filter(!(NAME %in% c("Alaska", "Hawaii", "American Samoa",
                       "Guam", "Puerto Rico",
                       "United States Virgin Islands",
                       "Commonwealth of the Northern Mariana Islands")))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = conterminous_us,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(conterminous_us)[,"X"]),
           ylim = c(min(st_coordinates(conterminous_us)[,"Y"]),
                    max(st_coordinates(conterminous_us)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Conterminous US") +

# Alaska map:
AK <- state_selection %>%
  filter(NAME == "Alaska")

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = AK,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(AK)[,"X"]),
           ylim = c(min(st_coordinates(AK)[,"Y"]),
                    max(st_coordinates(AK)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Alaska") +

# Hawaii map:
HI <- state_selection %>%
  filter(NAME == "Hawaii")

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = HI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(min(st_coordinates(HI)[,"X"]),
                    0.9 * max(st_coordinates(HI)[,"X"])),
           ylim = c(1.1 * min(st_coordinates(HI)[,"Y"]),
                    max(st_coordinates(HI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Hawaii") +

# American Samoa map:
AS <- state_selection %>%
  filter(NAME %in% c("American Samoa"))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = AS,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(1.025 * min(st_coordinates(AS)[,"X"]),
                    0.975 *max(st_coordinates(AS)[,"X"])),
           ylim = c(1.05 * min(st_coordinates(AS)[,"Y"]),
                    max(st_coordinates(AS)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("American Samoa") +

# Guam & Commonwealth of the Northern Mariana Islands map
GU_CNMI <- state_selection %>%
  filter(NAME %in% c("Guam", "Commonwealth of the Northern Mariana Islands"))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = GU_CNMI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(1.025 * min(st_coordinates(GU_CNMI)[,"X"]),
                    0.975 * max(st_coordinates(GU_CNMI)[,"X"])),
           ylim = c(0.95 * min(st_coordinates(GU_CNMI)[,"Y"]),
                    1.05 * max(st_coordinates(GU_CNMI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Guam & Commonwealth of the Northern Mariana Islands") +

# Puerto Rico & United States Virgin Islands map:
PR_VI <- state_selection %>%
  filter(NAME %in% c("Puerto Rico", "United States Virgin Islands"))

ggplot() +
  geom_sf(data = grid_counts,
          aes(fill = n)) +
  geom_sf(data = PR_VI,
          fill = NA, color = "black") +
  xlab(NULL) +
  ylab(NULL) +
  coord_sf(xlim = c(0.95 * min(st_coordinates(PR_VI)[,"X"]),
                    1.05 * max(st_coordinates(PR_VI)[,"X"])),
           ylim = c(1.075 * min(st_coordinates(PR_VI)[,"Y"]),
                    0.925 * max(st_coordinates(PR_VI)[,"Y"]))) +
  scale_fill_viridis_c("Record count",
                       trans = "log10",
                       labels = scales::label_number(),
                       na.value = "white",
                       breaks = c(1, 10, 100, 1000, 10000)) +
  ggtitle("Puerto Rico & United States Virgin Islands") +