Tidying data

Author

Richard Cottrell

Published

4 August 2025

library(tidyverse)
library(here)
library(terra)
library(qs)
library(magrittr)

here("src") %>% list.files(pattern = "\\.R$", full.names = TRUE) %>% walk(source)

# Specify species of interest
species <- c("atlantic_salmon", "general_salmonid")

here("src/spatial_templates.R") %>% source()

1 Mariculture locations

Pull in mariculture locations data from Clawson et al (2022) and save the species groups of interest.

farm_locations <- file.path(rdsi_dir, "raw_data/aquaculture-locations/all_marine_aquaculture_farms_sources_final.csv") %>% 
  read.csv() %>% 
  filter(species_group %in% c("Salmonidae fish", "General marine fish"))

qsave(farm_locations, "data/_general_data/farm_locations/locations.qs")

Daily SST data from NASA - GHRSST Level 4 MUR 0.25deg Global Foundation Sea Surface Temperature Analysis

nasa_dir <- "/mnt/rdsi/raw_data/NASA/GHRSST_L4_MUR_0.25_SST/"

#rename the NASA files 
#SST_files <- list.files(file.path(rdsi_dir, "raw_data/NASA/GHRSST_L4_MUR_0.25_SST"), full.names = FALSE, pattern = ".nc")
# map(SST_files, \(this_file){
#   file.rename(from = file.path(nasa_dir, this_file), to = file.path(nasa_dir, sub('^(.{4})(.*)$', '\\1_\\2',  this_file)))
# })

SST_files <- list.files(file.path(rdsi_dir, "raw_data/NASA/GHRSST_L4_MUR_0.25_SST"), full.names = TRUE, pattern = ".nc")

# this_file <- SST_files[[1]]
# this_year=2013

#initialise lists for each year
years <- c(2010:2019)
year_lists <- vector(mode ="list", length = length(years))

# Add annual files to a list of files for each year
for(y in 1:length(years)){
  this_year <- years[y]
  this_years_files <- SST_files[grep(paste0(this_year, "_"), SST_files)]
  this_years_files <- this_years_files[!grepl(paste0(this_year, "_0229"), this_years_files)]
  year_lists[[y]] <- this_years_files
}
  
# Extract the daily files for each year and rasterize in a stack
days <- c(1:365)
this_day <- days[1]
rast_list = vector(mode = "list", length = length(days))

map(days, \(this_day){
  this_days_file_list <- sapply(year_lists, "[[", this_day)
  message("SST layer for Day ", this_day)
  this_day_stack <- rast(this_days_file_list, subds = "analysed_sst")
  this_day_mean_rast <- app(this_day_stack, "mean", na.rm=TRUE)-273.15
  writeRaster(this_day_mean_rast, filename = sprintf(here("data/_general_data/SST/SST_rasters/sst_nasa_mur_L4_0.25_mean2010-2019_day_%s.tif"), this_day))
})
  
# now gapfill the saved rasters
raw_SST_files <- list.files("data/_general_data/SST/SST_rasters/", full.names = TRUE)

#test function
#this_file <- raw_SST_files[[1]]

map(.x = raw_SST_files, .f = \(this_file){
  saveName <-  basename(this_file)
  message("Gapfilling ", saveName)
  this_rast <- rast(this_file)
  this_gf_rast <- focal(this_rast, w=7, fun = "mean", na.policy = "only")
  writeRaster(this_gf_rast, filename = sprintf("data/_general_data/SST/SST_gf_rasters/%s", saveName))
})

Pull in spp vulnerability to eutrophication data - save to salmon folder

vuln_scores <- read.csv(file.path(rdsi_data_dir, "marine_spp_vulnerabilities/butt_spp_vuln_framework-publication/_output/vuln_gapfilled_score.csv")) %>% 
  select(vuln_gf_id, eutrophication_nutrient_pollution)

vuln_sd <- read.csv(file.path(rdsi_data_dir, "marine_spp_vulnerabilities/butt_spp_vuln_framework-publication/_output/vuln_gapfilled_sd.csv")) %>% 
  select(vuln_gf_id, eutrophication_nutrient_pollution) %>% rename(sd=eutrophication_nutrient_pollution)

vuln_tx <- read.csv(file.path(rdsi_data_dir, "marine_spp_vulnerabilities/butt_spp_vuln_framework-publication/_output/vuln_gapfilled_tx.csv"))

full_vuln_df <- 
  vuln_scores %>% 
  left_join(vuln_sd) %>% 
  left_join(vuln_tx)

qsave(x = full_vuln_df, file = here("data/_general_data/species_layers/vulnerabilities/marine_spp_vulnerabilities_eutrophication.qs"))

Pull in spp distribution data from Aquamaps

aquamaps_0.5d <- data.table::fread(file.path(rdsi_data_dir, "aquamaps/aquamaps_0.6_depth_prepped.csv")) 

#all cells have an id. These id's can be used to isolate only the cells of relevance for the salmon farming locations.
filter(aquamaps_0.5d, is.na(cell_id))

#save to project
qsave(x = aquamaps_0.5d, file = here("data/_general_data/species_layers/distribution/aquamaps_0.5d.qs"))