library(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)
library(terra)
library(qs)
library(here)
library(sf)
library(purrr)
library(furrr)
library(targets)
library(future)
library(arrow)
library(readxl)
library(units)
library(tictoc)
library(conflicted)
conflicts_prefer(dplyr::select(), dplyr::filter(), .quiet = T)
here("src") %>% list.files(pattern = "\\.R$", full.names = TRUE) %>% walk(source)
Extracting salmon farm temperature data
1 Introduction
The purpose of this markdown is to extract daily temperature timeseries from SST data to drive fish growth in the correct places. The steps in this markdown are:
- Pull farm locations data (previously determined from FAO stocking locations)
- Record whether the farm is in the southern or north hemisphere (determines cohort stocking date)
- Pull SST data from .tif file and construct “typical” temperature year for each farm location
- Gapfill missing temperature data using closest farm in the same FAO stocking area
- Save temperature data both with and without geometry information
- Determine which farms have a mean temperature <=6 C (will be omitted)
2 Farm locations
Get global farm locations and determine whether they’re in the southern or northern hemisphere.
<- file.path(input_farm_coords_path, "locations_w_species_fao_area_stocking.qs") %>%
farms qread() %>%
filter(model_name == this_species) %>%
select(-row_num) %>%
mutate(farm_id = row_number())
<- cbind(farms$farm_id, sf::st_coordinates(farms$geometry)) %>%
hemi as.data.frame() %>% rename(farm_ID = V1, lon = X, lat = Y) %>%
write_parquet(file.path(input_farm_coords_path, "farm_coords.parquet"))
Get daily temperatures for a period of 1100 days (to cover 3 cohorts of 548 days each). Whether a farm is in the southern or northern hemisphere determines the cohorts’ start date.
<- seq(1:1100)
day_number <- purrr::map_dfc(.x = day_number, .f = function(day_number){
temp_data <- if_else(day_number <= 365, true = day_number, false = day_number-365)
rast_day_number <- if_else(rast_day_number <= 365, true = rast_day_number, false = rast_day_number-365)
rast_day_number <- if_else(rast_day_number <= 365, true = rast_day_number, false = rast_day_number-365)
rast_day_number message("Getting temperature data for all sites for ", this_species, " - day ", day_number)
<- file.path(input_farm_sst_path, "SST_gf_rasters",
sst_test sprintf("sst_nasa_mur_L4_0.25_mean2010-2019_day_%s.tif", rast_day_number)) %>%
::rast()
terra
::extract(sst_test, farms) %>%
terramutate(day = paste0("day_", day_number)) %>%
pivot_wider(names_from = "day", values_from = "focal_mean") %>%
select(-ID)
%>%
}) mutate(farm_id = row_number())
# If you want the sf object it's here!
<- farms %>%
farms_w_temp_df left_join(temp_data, by = c("farm_id" = "farm_id")) %>%
pivot_longer(names_to = "day", values_to = "temp_c", cols = starts_with("day_"))
3 Missing data
(<- farms_w_temp_df %>%
missing_temp_farms filter(temp_c %>% is.na()) %>%
group_by(farm_id) %>%
reframe(num_missing = n())
)
# How far apart in the sequence are the farms? If the previous is complete we should be able to use the one before in the same country
diff(missing_temp_farms$farm_id)
# Make the farm list
<- farms_w_temp_df %>%
farm_list group_by(farm_id) %>%
group_split()
for(i in 1:length(farm_list)){
message("Checking temp data for ", unique(farm_list[[i]]$farm_id))
if(unique(is.na(farm_list[[i]]$temp_c))){ #if temp data is NA see below
cat("Is the previous farm index the same country?")
if(unique(farm_list[[i-1]]$country) == unique(farm_list[[i]]$country)){
if(!unique(is.na(farm_list[[i-1]]$temp_c))){ # if the farm index before is NOT NA, use that.
$temp_c <- farm_list[[i-1]]$temp_c
farm_list[[i]]else {
} $temp_c <- farm_list[[i-2]]$temp_c. #else use the farm index 2 before (the missing_farm_
farm_list[[i]]
}else {stop("Previous country index not the same")} #if the previous country is not the same country stop the loop
}
}
}
# Check again - looks good - no values.
bind_rows(farm_list) %>% filter(temp_c %>% is.na()) %>% pull(farm_id) %>% unique()
# Save the new locations data
<- bind_rows(farm_list) farms_w_temp_df
# With geometry, for plotting
qsave(x = farms_w_temp_df,
file = file.path(input_farm_coords_path, sprintf("%s_locations_w_temps.qs", this_species)))
# Without geometry, for targets
::st_drop_geometry(farms_w_temp_df) %>%
sfwrite_parquet(file.path(input_farm_sst_path, "farm_SST_extracted.parquet"))
4 Omit farms
Farms are omitted if the mean farm temp <= 6\(^\circ\)C.
<- farm_list %>%
mean_farm_temp map_df(.f = function(x){
data.frame(farm_id = unique(x$farm_id),
mean_temp = mean(x$temp_c),
country = unique(x$country),
volume = unique(x$tonnes_per_farm))
})
<- mean_farm_temp %>%
farms_to_omit filter(mean_temp <= 6) %>%
pull(farm_id)
qsave(x = farms_to_omit,
file = file.path(input_farm_coords_path, sprintf("%s_farms_to_omit.qs", this_species)))