Run aquaculture model for Atlantic salmon

1 Introduction

This document runs the aquaculture model for Atlantic salmon farms. It processes farm location data, species parameters, and conducts sensitivity analyses to understand the impact of various parameters on fish and farm growth measures.

library(arrow)
library(sf)
library(dplyr)
library(tidyr)
library(terra)
library(magrittr)
library(purrr)
library(furrr)
library(future)
library(tictoc)
library(ggplot2)
library(fs)
library(conflicted)
library(stringr)
library(readxl)
library(units)
library(qs)
library(here)
library(targets)
conflicted::conflicts_prefer(dplyr::filter(), dplyr::select(), .quiet = T)

here("src") %>% 
  list.files(pattern = "\\.R$", full.names = TRUE) %>% 
  str_subset("map", negate = T) %>% 
  walk(source)
# Set up parallel processing
# plan(multisession, workers = parallelly::availableCores()-1)

# Filenames
species_params_excel <- c(file = file.path(input_species_param_path, "Species.xlsx"), sheet = "Atlantic salmon")
pop_params_excel <- c(file = file.path(input_species_param_path, "Population.xlsx"))
farm_locations_parquet <- file.path(input_farm_coords_path, "farm_coords.parquet")
farm_coords_file <- file.path(output_farm_data_path, "farm_coords.qs")
farm_geometry_file <- file.path(output_farm_data_path, "farm_geometry.qs")
farm_ts_data_file <- file.path(output_farm_data_path, "farm_ts_data.qs")
species_params_file <- file.path(output_species_data_path, "species_params.qs")
sens_params_file <- file.path(output_species_data_path, "sens_params.qs")
pop_params_file <- file.path(output_species_data_path, "pop_params.qs")
feed_params_file <- file.path(output_species_data_path, "feed_params.qs")
farm_harvest_file <- file.path(output_farm_data_path, "farm_harvest_size.qs")

2 Data for targets pipelines

Much of the actual analysis is run through targets pipelines. Therefore, we need to make sure that the files going into those pipelines are correct and up to date.

Load and process farm coordinate data with appropriate timing parameters for Northern and Southern hemisphere farms.

times_N <- c("t_start" = 121, "t_end" = 121+547, "dt" = 1)
times_S <- c("t_start" = 274, "t_end" = 274+547, "dt" = 1)

farm_coords <- farm_locations_parquet %>% 
  read_parquet() %>% 
  mutate(t_start = case_when(lat > 0 ~ times_N['t_start'], TRUE ~ times_S['t_start']), 
          t_end = case_when(lat > 0 ~ times_N['t_end'], TRUE ~ times_S['t_end']),
          t_start = unname(t_start),
          t_end = unname(t_end))

qsave(farm_coords, farm_coords_file)

# Also save geometry for later
file.path(input_farm_coords_path, "atlantic_salmon_locations_w_temps.qs") %>% 
  qread() %>% 
  dplyr::filter(day == "day_1") %>% 
  dplyr::select(farm_id, geometry, country) %>% 
  qsave(farm_geometry_file)

2.1 Farm Time Series Data

Process Sea Surface Temperature (SST) data for each farm location.

farms_to_omit <- qread(sprintf(file.path(input_farm_coords_path, "%s_farms_to_omit.qs"), this_species))
farm_SST_data <- read_parquet(file.path(input_farm_sst_path, "farm_SST_extracted.parquet"))
farm_IDs <- farm_SST_data %>%
  filter(!farm_id %in% farms_to_omit) %>%
  distinct(farm_id) %>%
  pull(farm_id)

farm_ts_data <- farm_SST_data %>%
  rename(farm_ID = farm_id) %>% 
  select(c(farm_ID, day, temp_c)) %>%
  mutate(day = str_split_i(day, "day_", 2) %>% as.integer())

qsave(farm_ts_data, farm_ts_data_file)

3 Species and population parameters

Load species-specific parameters from Excel file.

species_params <- readxl::read_excel(
  path = species_params_excel["file"], 
  sheet = species_params_excel["sheet"]
)
vals <- species_params$Value
names(vals) <- species_params$Quantity
species_params <- vals[!is.na(vals)]
qsave(species_params, species_params_file)

Load population-specific parameters.

pop_params <- readxl::read_excel(path = pop_params_excel["file"])
vals <- pop_params$Value
names(vals) <- pop_params$Quantity
pop_params <- vals[!is.na(vals)]
qsave(pop_params, pop_params_file)

4 Farm Harvest Size Calculations

Calculations of expected harvest sizes for each farm (using default feed and no Monte-Carlo variation) are done in the targets pipeline.

Sys.setenv(TAR_PROJECT = "project_farmruns")

# Check that the feed will actually work
targets::tar_make(test_reference_feed)
targets::tar_load(test_reference_feed)
test_reference_feed # if weight = 0 then the feeds aren't processing properly

# Get harvest sizes for all farms
targets::tar_make(names = contains(c("harvest_size", "feed_params")))
targets::tar_load(harvest_size_chunked)
harvest_size <- harvest_size_chunked %>% bind_rows()

ggplot(harvest_size, aes(x = weight)) +
  geom_histogram(fill = "salmon", alpha = 0.75, colour = "black")

qsave(harvest_size, farm_harvest_file)

5 Sensitivity analysis

Run the sensitivity runs using the targets pipeline. Note that this can take a long time. The species parameters (19) are run for a single fish in each farm (2721) and factor (3), which takes ~30 minutes. The population parameters (6) are run for a smaller number of farms (271) and each factor (3) with a simulated population of 500 fish, which takes ~30 hours.

Sys.setenv(TAR_PROJECT = "project_sensitivities")

targets::tar_validate()
# targets::tar_visnetwork(targets_only = T)
# targets::tar_outdated()
targets::tar_make(
  seconds_meta_append = 120,
  callr_function = NULL
  )

5.1 Process sensitivity results

Combine and visualise sensitivity analysis results.

targets::tar_load(sens_results_spec)
targets::tar_load(sens_results_pop)

sens_results <- rbind(
  sens_results_pop,
  sens_results_spec
)

qs::qsave(sens_results, file.path(output_sens_data_path, paste0("sens_results_all.qs")))

sens_measures <- levels(sens_results$measure)
sens_results_figfiles <- file.path(output_sens_data_path, paste0("sens_plot_", sens_measures, ".qs"))
sens_results_figfiles2 <- file.path(output_sens_data_path, paste0("sens_plot_", sens_measures, ".png"))

for (sm in seq_along(sens_measures)) {
  p <- sens_results %>% 
    filter(measure == sens_measures[sm]) %>% 
    ggplot(aes(x = adj_param, y = mean_sens, ymin = mean_sens-sd_sens, ymax = mean_sens+sd_sens)) +
    geom_col(fill = "salmon", alpha = 0.65, colour = "black") +
    geom_errorbar(width = 0.5) +
    coord_flip() +
    theme_classic()
  qsave(p, sens_results_figfiles[sm])
  ggsave(sens_results_figfiles2[sm])
}

6 Run farms

Note that this takes a long time - each branch of farmrun takes ~5.3 minutes, and a maximum of ~12 workers can run at a time. The total therefore takes ~17 hours.

Sys.setenv(TAR_PROJECT = "project_farmruns")

targets::tar_validate()
targets::tar_outdated()
targets::tar_prune()

# Pre-run to reduce load
targets::tar_make(
  names = c("farm_IDs_chunked", "farm_ts_data_chunked", "farm_IDs", "test_reference_feed", "farm_temp_data_chunked", "harvest_size_chunked", "farm_static_data_chunked"), 
  seconds_meta_append = 90#, callr_function = NULL
)

# Run results
targets::tar_make(
  names = c("farm_run_chunked", "stat_names", "farm_results_chunked", "biomass_produced_chunked", "cohort_results_chunked" ), 
  seconds_meta_append = 90,
  shortcut = T
)

meta <- targets::tar_meta() %>% 
  filter(grepl("farm_run", name))
meta$seconds %>% mean()/60 # average minutes per target

Load objects from the targets pipeline and save them as .qs files, which are much quicker to load down the line. This takes a few minutes to gather all the targets.

targets::tar_load(stat_names)
targets::tar_load(farm_IDs_chunked)
farm_IDs_chunked <- farm_IDs_chunked %>% unlist()
targets::tar_load(feed_names)

print("Loading farm results...")
targets::tar_load(farm_results_chunked)
farm_results_chunked <- farm_results_chunked %>% 
  bind_rows()
for (stat_name in stat_names) {
  farm_results_chunked %>% 
      filter(measure == stat_name) %>% 
      qsave(file.path(output_model_farm_path, str_c(stat_name, "_all_farms.qs")))
}
rm(farm_results_chunked)

print("Loading cohort results...")
targets::tar_load(cohort_results_chunked)
cohort_results_chunked <- cohort_results_chunked %>% 
  bind_rows()
for (stat_name in stat_names) {
  cohort_results_chunked %>% 
      filter(measure == stat_name) %>% 
      qsave(file.path(output_model_cohort_path, str_c(stat_name, "_all_farms.qs")))
}
rm(cohort_results_chunked)

targets::tar_load(biomass_produced_chunked)
qsave(biomass_produced_chunked, file.path(output_model_farm_path, str_c("biomass_produced_all_farms.qs")))
rm(biomass_produced_chunked)