library(arrow)
library(sf)
library(dplyr)
library(tidyr)
library(terra)
library(magrittr)
library(purrr)
library(furrr)
library(future)
library(tictoc)
library(ggplot2)
library(fs)
library(conflicted)
library(stringr)
library(readxl)
library(units)
library(qs)
library(here)
library(targets)
::conflicts_prefer(dplyr::filter(), dplyr::select(), .quiet = T)
conflicted
here("src") %>%
list.files(pattern = "\\.R$", full.names = TRUE) %>%
str_subset("map", negate = T) %>%
walk(source)
Run aquaculture model for Atlantic salmon
1 Introduction
This document runs the aquaculture model for Atlantic salmon farms. It processes farm location data, species parameters, and conducts sensitivity analyses to understand the impact of various parameters on fish and farm growth measures.
# Set up parallel processing
# plan(multisession, workers = parallelly::availableCores()-1)
# Filenames
<- c(file = file.path(input_species_param_path, "Species.xlsx"), sheet = "Atlantic salmon")
species_params_excel <- c(file = file.path(input_species_param_path, "Population.xlsx"))
pop_params_excel <- file.path(input_farm_coords_path, "farm_coords.parquet")
farm_locations_parquet <- file.path(output_farm_data_path, "farm_coords.qs")
farm_coords_file <- file.path(output_farm_data_path, "farm_geometry.qs")
farm_geometry_file <- file.path(output_farm_data_path, "farm_ts_data.qs")
farm_ts_data_file <- file.path(output_species_data_path, "species_params.qs")
species_params_file <- file.path(output_species_data_path, "sens_params.qs")
sens_params_file <- file.path(output_species_data_path, "pop_params.qs")
pop_params_file <- file.path(output_species_data_path, "feed_params.qs")
feed_params_file <- file.path(output_farm_data_path, "farm_harvest_size.qs") farm_harvest_file
2 Data for targets pipelines
Much of the actual analysis is run through targets pipelines. Therefore, we need to make sure that the files going into those pipelines are correct and up to date.
Load and process farm coordinate data with appropriate timing parameters for Northern and Southern hemisphere farms.
<- c("t_start" = 121, "t_end" = 121+547, "dt" = 1)
times_N <- c("t_start" = 274, "t_end" = 274+547, "dt" = 1)
times_S
<- farm_locations_parquet %>%
farm_coords read_parquet() %>%
mutate(t_start = case_when(lat > 0 ~ times_N['t_start'], TRUE ~ times_S['t_start']),
t_end = case_when(lat > 0 ~ times_N['t_end'], TRUE ~ times_S['t_end']),
t_start = unname(t_start),
t_end = unname(t_end))
qsave(farm_coords, farm_coords_file)
# Also save geometry for later
file.path(input_farm_coords_path, "atlantic_salmon_locations_w_temps.qs") %>%
qread() %>%
::filter(day == "day_1") %>%
dplyr::select(farm_id, geometry, country) %>%
dplyrqsave(farm_geometry_file)
2.1 Farm Time Series Data
Process Sea Surface Temperature (SST) data for each farm location.
<- qread(sprintf(file.path(input_farm_coords_path, "%s_farms_to_omit.qs"), this_species))
farms_to_omit <- read_parquet(file.path(input_farm_sst_path, "farm_SST_extracted.parquet"))
farm_SST_data <- farm_SST_data %>%
farm_IDs filter(!farm_id %in% farms_to_omit) %>%
distinct(farm_id) %>%
pull(farm_id)
<- farm_SST_data %>%
farm_ts_data rename(farm_ID = farm_id) %>%
select(c(farm_ID, day, temp_c)) %>%
mutate(day = str_split_i(day, "day_", 2) %>% as.integer())
qsave(farm_ts_data, farm_ts_data_file)
3 Species and population parameters
Load species-specific parameters from Excel file.
<- readxl::read_excel(
species_params path = species_params_excel["file"],
sheet = species_params_excel["sheet"]
)<- species_params$Value
vals names(vals) <- species_params$Quantity
<- vals[!is.na(vals)]
species_params qsave(species_params, species_params_file)
Load population-specific parameters.
<- readxl::read_excel(path = pop_params_excel["file"])
pop_params <- pop_params$Value
vals names(vals) <- pop_params$Quantity
<- vals[!is.na(vals)]
pop_params qsave(pop_params, pop_params_file)
4 Farm Harvest Size Calculations
Calculations of expected harvest sizes for each farm (using default feed and no Monte-Carlo variation) are done in the targets pipeline.
Sys.setenv(TAR_PROJECT = "project_farmruns")
# Check that the feed will actually work
::tar_make(test_reference_feed)
targets::tar_load(test_reference_feed)
targets# if weight = 0 then the feeds aren't processing properly
test_reference_feed
# Get harvest sizes for all farms
::tar_make(names = contains(c("harvest_size", "feed_params")))
targets::tar_load(harvest_size_chunked)
targets<- harvest_size_chunked %>% bind_rows()
harvest_size
ggplot(harvest_size, aes(x = weight)) +
geom_histogram(fill = "salmon", alpha = 0.75, colour = "black")
qsave(harvest_size, farm_harvest_file)
5 Sensitivity analysis
Run the sensitivity runs using the targets pipeline. Note that this can take a long time. The species parameters (19) are run for a single fish in each farm (2721) and factor (3), which takes ~30 minutes. The population parameters (6) are run for a smaller number of farms (271) and each factor (3) with a simulated population of 500 fish, which takes ~30 hours.
Sys.setenv(TAR_PROJECT = "project_sensitivities")
::tar_validate()
targets# targets::tar_visnetwork(targets_only = T)
# targets::tar_outdated()
::tar_make(
targetsseconds_meta_append = 120,
callr_function = NULL
)
5.1 Process sensitivity results
Combine and visualise sensitivity analysis results.
::tar_load(sens_results_spec)
targets::tar_load(sens_results_pop)
targets
<- rbind(
sens_results
sens_results_pop,
sens_results_spec
)
::qsave(sens_results, file.path(output_sens_data_path, paste0("sens_results_all.qs")))
qs
<- levels(sens_results$measure)
sens_measures <- file.path(output_sens_data_path, paste0("sens_plot_", sens_measures, ".qs"))
sens_results_figfiles <- file.path(output_sens_data_path, paste0("sens_plot_", sens_measures, ".png"))
sens_results_figfiles2
for (sm in seq_along(sens_measures)) {
<- sens_results %>%
p filter(measure == sens_measures[sm]) %>%
ggplot(aes(x = adj_param, y = mean_sens, ymin = mean_sens-sd_sens, ymax = mean_sens+sd_sens)) +
geom_col(fill = "salmon", alpha = 0.65, colour = "black") +
geom_errorbar(width = 0.5) +
coord_flip() +
theme_classic()
qsave(p, sens_results_figfiles[sm])
ggsave(sens_results_figfiles2[sm])
}
6 Run farms
Note that this takes a long time - each branch of farmrun takes ~5.3 minutes, and a maximum of ~12 workers can run at a time. The total therefore takes ~17 hours.
Sys.setenv(TAR_PROJECT = "project_farmruns")
::tar_validate()
targets::tar_outdated()
targets::tar_prune()
targets
# Pre-run to reduce load
::tar_make(
targetsnames = c("farm_IDs_chunked", "farm_ts_data_chunked", "farm_IDs", "test_reference_feed", "farm_temp_data_chunked", "harvest_size_chunked", "farm_static_data_chunked"),
seconds_meta_append = 90#, callr_function = NULL
)
# Run results
::tar_make(
targetsnames = c("farm_run_chunked", "stat_names", "farm_results_chunked", "biomass_produced_chunked", "cohort_results_chunked" ),
seconds_meta_append = 90,
shortcut = T
)
<- targets::tar_meta() %>%
meta filter(grepl("farm_run", name))
$seconds %>% mean()/60 # average minutes per target meta
Load objects from the targets pipeline and save them as .qs files, which are much quicker to load down the line. This takes a few minutes to gather all the targets.
::tar_load(stat_names)
targets::tar_load(farm_IDs_chunked)
targets<- farm_IDs_chunked %>% unlist()
farm_IDs_chunked ::tar_load(feed_names)
targets
print("Loading farm results...")
::tar_load(farm_results_chunked)
targets<- farm_results_chunked %>%
farm_results_chunked bind_rows()
for (stat_name in stat_names) {
%>%
farm_results_chunked filter(measure == stat_name) %>%
qsave(file.path(output_model_farm_path, str_c(stat_name, "_all_farms.qs")))
}rm(farm_results_chunked)
print("Loading cohort results...")
::tar_load(cohort_results_chunked)
targets<- cohort_results_chunked %>%
cohort_results_chunked bind_rows()
for (stat_name in stat_names) {
%>%
cohort_results_chunked filter(measure == stat_name) %>%
qsave(file.path(output_model_cohort_path, str_c(stat_name, "_all_farms.qs")))
}rm(cohort_results_chunked)
::tar_load(biomass_produced_chunked)
targetsqsave(biomass_produced_chunked, file.path(output_model_farm_path, str_c("biomass_produced_all_farms.qs")))
rm(biomass_produced_chunked)