diff --git a/000-config.yml b/000-config.yml index 2de03a4..a36509c 100644 --- a/000-config.yml +++ b/000-config.yml @@ -8,8 +8,19 @@ default: raw_data_dir: "data_raw" cache_dir: "cache" pecan_outdir: "/projectnb2/dietzelab/ccmmf/modelout/ccmmf_phase_2b_mixed_pfts_20250701" + master_design_points: "/projectnb2/dietzelab/ccmmf/data/design_points.csv" pecan_xml_template: "data_raw/template.xml" sites: design_points_file: "data_raw/sa_design_points.csv" + n_sample: 10 + sensitivity: + sigma_levels: [-3, -2, -1, 1, 2, 3] # Standard + # Or use [-2, -1, 1, 2] for faster runs + # ensemble: + # size: 20 + # n_met: 10 + # run: + # start_date: "2016-01-01" + # end_date: "2023-12-31" projection: ca_albers_crs: 3310 \ No newline at end of file diff --git a/R/local_sensitivity.R b/R/local_sensitivity.R new file mode 100644 index 0000000..aac5517 --- /dev/null +++ b/R/local_sensitivity.R @@ -0,0 +1,536 @@ +#' Helper function to process a single PEcAn sensitivity file +#' +#' @param sa_file Path to the .Rdata file +#' @param workflow_site_map Data frame mapping ensemble_id to site_id +#' @return Data frame of sensitivity metrics for that file +process_sa_file <- function(sa_file, workflow_site_map) { + # Load the RData file (creates object 'sensitivity.results') + load(sa_file) + + # Extract identifiers from filename + # Format: sensitivity.results.....Rdata + filename_parts <- strsplit(basename(sa_file), "\\.")[[1]] + ensemble_id <- filename_parts[3] + response_var <- filename_parts[4] + + # Lookup site_id using the passed mapping + site_id <- workflow_site_map$site_id[workflow_site_map$ensemble_id == ensemble_id] + + # safety check if mapping fails + if (length(site_id) == 0) { + PEcAn.logger::logger.warn(paste("Could not map ensemble_id", ensemble_id, "to a site_id for file", sa_file)) + return(NULL) + } + + # Process PFTs inside the file + # sensitivity.results is a list where names are PFTs + purrr::map_dfr(names(sensitivity.results), function(pft_name) { + var_data <- sensitivity.results[[pft_name]]$variance.decomposition.output + + data.frame( + site_id = site_id, + pft = pft_name, + parameter = names(var_data$coef.vars), + response_var = response_var, + coefficient_of_variation = var_data$coef.vars, + elasticity = var_data$elasticities, + partial_variance = var_data$partial.variances, + sensitivity = var_data$sensitivities, + variance_explained = var_data$partial.variances / + sum(var_data$partial.variances, na.rm = TRUE) * 100, + stringsAsFactors = FALSE + ) + }) +} + + +#' Aggregate One-at-a-Time (OAT) Parameter Sensitivity Results Across Sites +#' +#' @description +#' This function aggregates the "Local Sensitivity Analysis" (LSA) results generated by PEcAn. +#' In PEcAn, LSA refers to the "Variance Decomposition" step where parameters are varied +#' one-at-a-time (OAT) around their median to calculate partial derivatives (elasticities) +#' and partial variances. +#' This function collects these site-level OAT metrics and aggregates them into a single +#' data frame for cross-site analysis. It specifically filters results to match the +#' Plant Functional Types (PFTs) assigned to each site in the experimental design. +#' +#' @param sensitivity_outdir Directory containing PEcAn sensitivity outputs +#' @param design_points Data frame of site metadata (must contain site_id and pft) +#' data frame **must** include at least the following columns: +#' - `site_id`: Unique identifier for each site (character). +#' - `pft`: Name of the Plant Functional Type assigned to each site (character). +#' - `lat`: (optional) Latitude of the site (numeric). +#' - `lon`: (optional) Longitude of the site (numeric). +#' @param response_vars Vector of response variables +#' @return A data frame with aggregated local sensitivity metrics, including columns +#' for `site_id`, `pft`, `parameter`, `response_var`, and sensitivity statistics +#' (e.g. `elasticity`, `variance_explained`). +aggregate_local_sa <- function(sensitivity_outdir, + design_points, + response_vars) { + + # Read PEcAn settings + settings <- PEcAn.settings::read.settings( + file.path(sensitivity_outdir, "pecan.CONFIGS.xml") + ) + + # Extract ensemble_id -> site_id mapping + workflow_site_map <- purrr::map_dfr( + names(settings$sensitivity.analysis), + function(run_name) { + if (grepl("^site\\.", run_name)) { + data.frame( + ensemble_id = settings$sensitivity.analysis[[run_name]]$ensemble.id, + site_id = gsub("^site\\.", "", run_name), + stringsAsFactors = FALSE + ) + } + } + ) + + # Find sensitivity result files + # sensitivity.results.....Rdata + sa_files <- list.files( + path = sensitivity_outdir, + pattern = "sensitivity\\.results\\..*\\.Rdata$", + full.names = TRUE, + recursive = TRUE + ) + + # Process all files using the helper function + all_results <- purrr::map_dfr( + sa_files, + process_sa_file, + workflow_site_map = workflow_site_map + ) + + # By default, PEcAn runs sensitivity analysis for ALL PFTs configured in the model, at each site + # (i.e., sensitivity.results contains parameters for both e.g. grass and temperate.deciduous at every site). + # This creates results for PFTs NOT actually present at a given site (e.g, tree results at a grass site). + # Workaround: To ensure only site-specific sensitivity results, + # we filter to site-PFT pairs actually in the experiment design. + # This is achieved by a tidy semi_join on (site_id, pft) from design_points. + # This step removes sensitivity output for PFTs not assigned to that site (e.g, avoids tree PFTs at grass sites). + + site_pft_lookup <- design_points |> dplyr::select(site_id, pft) + all_results <- all_results |> + dplyr::semi_join(site_pft_lookup, by = c("site_id", "pft")) |> + dplyr::left_join( + design_points |> dplyr::select(site_id, lat, lon), + by = "site_id" + ) |> + # Reorder so lat, lon appear right after site_id + dplyr::relocate(lat, lon, .after = site_id) |> + dplyr::arrange(site_id, response_var, dplyr::desc(abs(elasticity))) + + return(all_results) +} + + + +#' Analyze sensitivity along environmental gradients +#' +#' Fits regressions of both parameter sensitivity (elasticity) and parameter importance (variance explained) +#' against environmental gradients for each parameter/response pair. +#' +#' @param aggregated_results Data frame from aggregate_local_sa() +#' @param env_covariates Site environmental data +#' @param gradient_vars Covariates to test +#' @param min_sites Minimum sites for regression +#' @param significance_level P-value threshold +#' @param r2_threshold R2 threshold +#' @return List with regression_results (data.frame) and significant_gradients (filtered) +analyze_environmental_gradients <- function( + aggregated_results, + env_covariates, + gradient_vars = c("MAT", "MAP", "clay", "ocd", "twi"), + min_sites = 5, + significance_level = 0.05, + r2_threshold = 0.1 +) { + PEcAn.logger::logger.info("Analyzing environmental gradients for sensitivity and variance explained") + + # Join covariate data + analysis_data <- aggregated_results |> + dplyr::left_join(env_covariates, by = "site_id") |> + dplyr::filter(dplyr::if_all(dplyr::all_of(gradient_vars), ~!is.na(.x))) + + analysis_data <- analysis_data |> + dplyr::mutate(elasticity = abs(elasticity)) + + # Stack for both elasticity and variance explained targeting + target_metrics <- c("elasticity", "variance_explained") + + regression_results <- tidyr::expand_grid( + parameter = unique(analysis_data$parameter), + response_var = unique(analysis_data$response_var), + gradient_var = gradient_vars, + target = target_metrics + ) |> + purrr::pmap_dfr(function(parameter, response_var, gradient_var, target) { + metric_col <- target # Either "elasticity" or "variance_explained" + data_subset <- analysis_data |> + dplyr::filter( + parameter == !!parameter, + response_var == !!response_var, + !is.na(.data[[metric_col]]), + !is.na(.data[[gradient_var]]) + ) + if (nrow(data_subset) < min_sites) return(NULL) + fit <- tryCatch( + lm(stats::as.formula(paste0(metric_col, " ~ ", gradient_var)), data = data_subset), + error = function(e) NULL + ) + if (is.null(fit)) return(NULL) + fit_summary <- broom::glance(fit) + fit_coef <- broom::tidy(fit) + data.frame( + parameter = parameter, + response_var = response_var, + gradient_var = gradient_var, + target = target, + r_squared = fit_summary$r.squared, + adj_r_squared = fit_summary$adj.r.squared, + p_value = fit_coef$p.value[2], + slope = fit_coef$estimate[2], + intercept = fit_coef$estimate[1], + n_sites = nrow(data_subset), + stringsAsFactors = FALSE + ) + }) + + significant_gradients <- regression_results |> + dplyr::filter( + p_value < significance_level, + r_squared > r2_threshold + ) |> + dplyr::arrange(desc(r_squared)) + + return(list( + regression_results = regression_results, + significant_gradients = significant_gradients + )) +} + + + +#' Summarize local sensitivity results +#' +#' @param aggregated_results Data frame from aggregate_local_sa() +#' +#' @return List with three summary data frames: +#' - parameter_rankings: Overall parameter importance by response_var +#' - pft_differences: Parameter sensitivity by PFT +#' - response_var_patterns: Top parameters for each response variable +#' +#' @details +#' This function addresses requirement for "overall summary" +#' and provides the basis for interpretation: +#' - Which parameters is the model most sensitive to? +#' - Which parameters are good candidates for constraint? +#' - What can we infer about model structure? +#' +#' @examples +#' \dontrun{ +#' aggregated <- aggregate_local_sa(...) +#' summary <- summarize_local_sa(aggregated) +#' +#' # View overall parameter rankings +#' summary$parameter_rankings |> +#' dplyr::filter(response_var == "TotSoilCarb") |> +#' dplyr::arrange(desc(mean_abs_elasticity)) +#' } +#' +#' @export +summarize_local_sa <- function(aggregated_results) { + + PEcAn.logger::logger.info("Generating overall summary statistics") + + # Validate input + required_cols <- c("parameter", "response_var", "elasticity", + "variance_explained", "coefficient_of_variation", "pft") + missing_cols <- setdiff(required_cols, names(aggregated_results)) + if (length(missing_cols) > 0) { + PEcAn.logger::logger.severe( + "aggregated_results missing required columns: ", + paste(missing_cols, collapse = ", ") + ) + } + + # --------------------------------------------------------------------------- + # 1. OVERALL PARAMETER RANKINGS + # --------------------------------------------------------------------------- + # Question: Which parameters is the model most sensitive to? + # Averaged across all sites, PFTs + + parameter_rankings <- aggregated_results |> + dplyr::group_by(parameter, response_var) |> + dplyr::summarize( + # Mean absolute elasticity (primary metric for sensitivity) + mean_abs_elasticity = mean(abs(elasticity), na.rm = TRUE), + median_abs_elasticity = median(abs(elasticity), na.rm = TRUE), + + # Spread measures + sd_elasticity = sd(elasticity, na.rm = TRUE), + iqr_elasticity = IQR(elasticity, na.rm = TRUE), + + # Variance explained + mean_variance_explained = mean(variance_explained, na.rm = TRUE), + median_variance_explained = median(variance_explained, na.rm = TRUE), + + # Prior uncertainty (CV from posterior distributions) + mean_cv = mean(coefficient_of_variation, na.rm = TRUE), + + # Sample size + n_sites = dplyr::n(), + n_sites_significant = sum(variance_explained > 5), # >5% threshold + + .groups = "drop" + ) |> + # Calculate constraint priority score + # High elasticity + high CV = high priority for constraint + dplyr::mutate( + constraint_priority = mean_abs_elasticity * mean_cv + ) |> + # Sort by sensitivity within each response variable + dplyr::arrange(response_var, dplyr::desc(mean_abs_elasticity)) + + # --------------------------------------------------------------------------- + # 2. PFT DIFFERENCES + # --------------------------------------------------------------------------- + # Question: Does sensitivity differ between woody vs annual PFTs? + + pft_differences <- aggregated_results |> + dplyr::group_by(parameter, pft, response_var) |> + dplyr::summarize( + mean_abs_elasticity = mean(abs(elasticity), na.rm = TRUE), + mean_variance_explained = mean(variance_explained, na.rm = TRUE), + mean_cv = mean(coefficient_of_variation, na.rm = TRUE), + n_sites = dplyr::n(), + .groups = "drop" + ) |> + dplyr::arrange(pft, response_var, dplyr::desc(mean_abs_elasticity)) + + # Check for significant PFT differences (optional: could add t-test) + pft_contrast <- aggregated_results |> + dplyr::group_by(parameter, response_var) |> + dplyr::filter(dplyr::n_distinct(pft) >= 2) |> # Need both PFTs + dplyr::summarize( + elasticity_range_across_pfts = max(abs(elasticity), na.rm = TRUE) - + min(abs(elasticity), na.rm = TRUE), + cv_of_elasticity_across_pfts = sd(elasticity, na.rm = TRUE) / + mean(abs(elasticity), na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::arrange(dplyr::desc(cv_of_elasticity_across_pfts)) + + # --------------------------------------------------------------------------- + # 3. RESPONSE VARIABLE PATTERNS + # --------------------------------------------------------------------------- + # Question: What controls each output? + # SOC most sensitive to turnover, AGB to photosynthesis + + response_var_patterns <- aggregated_results |> + dplyr::group_by(response_var) |> + dplyr::summarize( + # Top parameters (ordered list) + top_parameters = list( + unique(parameter[order(abs(elasticity), decreasing = TRUE)])[1:10] + ), + + # Overall statistics + mean_total_variance_explained = mean(variance_explained, na.rm = TRUE), + median_elasticity = median(abs(elasticity), na.rm = TRUE), + + # Count of important parameters + n_parameters_important = sum(variance_explained > 5, na.rm = TRUE), # >5% + n_parameters_dominant = sum(variance_explained > 20, na.rm = TRUE), # >20% + + # Sample size + n_observations = dplyr::n(), + n_unique_parameters = dplyr::n_distinct(parameter), + + .groups = "drop" + ) + + # --------------------------------------------------------------------------- + # 4. MODEL STRUCTURE INFERENCE + # --------------------------------------------------------------------------- + # Identify parameter categories from top-ranked parameters + + # Define parameter categories (from SIPNET documentation) + photosynthesis_params <- c("Amax", "psnTOpt", "psnTMin", "psnTMax", + "Vmax", "m", "Vm_low_temp", "Vmax_min_temp") + allocation_params <- c("leaf_allocation_rate", "wood_allocation_rate", + "root_allocation_rate", "leafGrowth") + turnover_params <- c("leafTurnover", "rootTurnover", "litterTurnover", + "soilTurnover", "fineRootTurnover", "coarseRootTurnover") + nitrogen_params <- c("leafN", "leafCN", "labile_pool", "N_volatilization_rate") + + structure_inference <- parameter_rankings |> + dplyr::mutate( + parameter_category = dplyr::case_when( + parameter %in% photosynthesis_params ~ "Photosynthesis", + parameter %in% allocation_params ~ "Allocation", + parameter %in% turnover_params ~ "Turnover", + parameter %in% nitrogen_params ~ "Nitrogen", + TRUE ~ "Other" + ) + ) |> + dplyr::group_by(response_var, parameter_category) |> + dplyr::summarize( + n_parameters = dplyr::n(), + mean_elasticity = mean(mean_abs_elasticity, na.rm = TRUE), + mean_variance_explained = mean(mean_variance_explained, na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::arrange(response_var, dplyr::desc(mean_variance_explained)) + + return(list( + parameter_rankings = parameter_rankings, + pft_differences = pft_differences, + pft_contrast = pft_contrast, + response_var_patterns = response_var_patterns, + structure_inference = structure_inference + )) +} + + +#' Plot sensitivity by environmental gradient +#' +#' Creates scatter plots showing how parameter sensitivity varies along +#' environmental gradients (MAT, MAP, soil properties). +#' +#' @param aggregated_results Data frame from aggregate_local_sa() +#' @param env_covariates Data frame with site_id and gradient variables +#' @param gradient_var Character. Which gradient to plot (e.g., "MAT", "MAP") +#' @param top_n_parameters Integer. Number of top parameters to plot (default: 10) +#' @param response_var Character. Which response variable to plot (default: "TotSoilCarb") +#' +#' @return ggplot object +#' +#' @examples +#' \dontrun{ +#' plot_gradient <- plot_sensitivity_gradient( +#' aggregated_results = results, +#' env_covariates = env_data, +#' gradient_var = "MAT", +#' top_n_parameters = 5, +#' response_var = "TotSoilCarb" +#' ) +#' ggsave("figures/sensitivity_vs_MAT.pdf", plot_gradient) +#' } +#' +#' @export +plot_sensitivity_gradient <- function(aggregated_results, + env_covariates, + gradient_var = "MAT", + top_n_parameters = 10, + response_var = "TotSoilCarb") { + + # Join with environmental data + plot_data <- aggregated_results |> + dplyr::filter(response_var == !!response_var) |> + dplyr::left_join(env_covariates, by = "site_id") |> + dplyr::filter(!is.na(.data[[gradient_var]])) + + # Identify top parameters by mean absolute elasticity + top_params <- plot_data |> + dplyr::group_by(parameter) |> + dplyr::summarize(mean_abs_elast = mean(abs(elasticity), na.rm = TRUE), .groups = "drop") |> + dplyr::slice_max(mean_abs_elast, n = top_n_parameters) |> + dplyr::pull(parameter) + + # Filter to top parameters + plot_data <- plot_data |> + dplyr::filter(parameter %in% top_params) + + # Create plot + p <- ggplot2::ggplot( + plot_data, + ggplot2::aes( + x = .data[[gradient_var]], + y = abs(elasticity), + color = parameter + ) + ) + + ggplot2::geom_point(alpha = 0.6, size = 2) + + ggplot2::geom_smooth(method = "lm", se = TRUE, linewidth = 1) + + ggplot2::facet_wrap(~ parameter, scales = "free_y", ncol = 3) + + ggplot2::labs( + title = paste0("Parameter Sensitivity vs ", gradient_var), + subtitle = paste0("Response variable: ", response_var), + x = gradient_var, + y = "Absolute Elasticity", + color = "Parameter" + ) + + ggplot2::theme_bw(base_size = 12) + + ggplot2::theme( + legend.position = "none", # Redundant with facets + strip.background = ggplot2::element_rect(fill = "lightgray") + ) + + return(p) +} + + +#' Plot PDP for parameter sensitivity vs. environmental gradients +#' +#' @param aggregated_results The output of aggregate_local_sa +#' @param site_covariates Data frame of site_id & environmental gradients (MAT, MAP,...) +#' @param response_var Which response variable you want (e.g., "NPP") +#' @param parameter The model parameter to analyze +#' @param target Outcome: "elasticity" or "variance_explained" +#' @return ggplot + +plot_pdp_sensitivity <- function( + aggregated_results, + site_covariates, + response_var, + parameter, + target = "elasticity", + gradients = c("MAT", "MAP", "clay", "ocd", "twi") +) { + + data <- aggregated_results |> + dplyr::left_join(site_covariates, by = "site_id") |> + dplyr::filter(response_var == !!response_var, parameter == !!parameter) + + grads_missing <- setdiff(gradients, colnames(data)) + if (length(grads_missing) > 0) { + PEcAn.logger::logger.warn( + "Missing gradients: ", paste(grads_missing, collapse = ", ") + ) + } + + grads_used <- setdiff(gradients, grads_missing) + if (length(grads_used) == 0) { + PEcAn.logger::logger.severe("No valid gradients available to use.") + } + + if (nrow(data) < 10) { + PEcAn.logger::logger.severe("Not enough sites available for ML / PDP computation.") + } + + rf_mod <- randomForest::randomForest( + as.formula(paste0(target, " ~ ", paste(grads_used, collapse = "+"))), + data = data, + na.action = na.exclude, + importance = TRUE + ) + + plots <- lapply(grads_used, function(grad) { + pd <- pdp::partial(rf_mod, pred.var = grad, train = data, grid.resolution = 20) + ggplot2::autoplot(pd) + + ggplot2::labs( + title = paste("PDP for", parameter, "-", target, "vs.", grad), + subtitle = paste("Response variable:", response_var), + y = paste("Predicted", target) + ) + + ggplot2::theme_minimal() + }) + + names(plots) <- grads_used + return(plots) +} diff --git a/analysis/local_sensitivity.qmd b/analysis/local_sensitivity.qmd new file mode 100644 index 0000000..7a8fc4e --- /dev/null +++ b/analysis/local_sensitivity.qmd @@ -0,0 +1,590 @@ +--- +title: "Local Sensitivity Analysis" +author: "Akash B V" +date: today +format: + html: + self-contained: true + df-print: paged + toc: true + toc-depth: 3 + code-fold: true + code-tools: true + theme: cosmo + number-sections: true +execute: + echo: false + warning: false + message: false + cache: false +--- + +# Overview + +This report presents results from a local sensitivity analysis of the SIPNET ecosystem model across multiple design points representing diverse environmental conditions across California croplands. + +**Purpose:** Identify which model parameters have the largest impact on key outputs (NPP, soil carbon, aboveground biomass), quantify how parameter importance varies with environmental gradients (climate, soil properties), and assess spatial heterogeneity in parameter sensitivity. + +::: callout-note +## Analysis Framework + +This sensitivity analysis follows established best practices for reporting both elasticity (sensitivity magnitude) and variance explained (parameter importance). Results support targeted parameter calibration and uncertainty reduction strategies. +::: + +```{r} +#| label: setup +#| include: false + +library(readr) +library(dplyr) +library(ggplot2) +library(DT) +library(here) +library(config) +library(knitr) +library(randomForest) +library(pdp) +library(patchwork) +library(tidyr) + +source(here::here("R", "local_sensitivity.R")) + +# Load configuration +cfg <- config::get(file = here::here("000-config.yml")) + +# Load datasets +aggregated_results <- readr::read_csv( + here::here(cfg$paths$data_dir, "aggregated_sensitivity.csv"), + show_col_types = FALSE +) + +param_rankings <- readr::read_csv( + here::here(cfg$paths$data_dir, "parameter_rankings.csv"), + show_col_types = FALSE +) + +pft_differences <- readr::read_csv( + here::here(cfg$paths$data_dir, "pft_differences.csv"), + show_col_types = FALSE +) + +significant_gradients <- readr::read_csv( + here::here(cfg$paths$data_dir, "significant_gradients.csv"), + show_col_types = FALSE +) + +regression_results <- readr::read_csv( + here::here(cfg$paths$data_dir, "regression_results.csv"), + show_col_types = FALSE +) + +site_covariates <- readr::read_csv( + here::here(cfg$paths$ccmmf_dir, "data/site_covariates.csv"), + show_col_types = FALSE +) + +# Paths and metadata +plots_dir <- here::here(cfg$paths$data_dir, "plots") +sa_variables <- unique(aggregated_results$response_var) +gradient_vars <- cfg$sensitivity$gradient_vars %||% c("MAT", "MAP", "clay", "ocd", "twi") +n_sites <- dplyr::n_distinct(aggregated_results$site_id) +n_parameters <- dplyr::n_distinct(aggregated_results$parameter) +``` + +## Study Design + +**Spatial coverage:** `r n_sites` design points selected via k-means clustering to represent environmental heterogeneity across California croplands + +**Model outputs:** `r length(sa_variables)` response variables (`r paste(sa_variables, collapse = ", ")`) + +**Parameters analyzed:** `r n_parameters` SIPNET parameters across photosynthesis, allocation, turnover, and phenology processes + +**Environmental gradients:** `r paste(gradient_vars, collapse = ", ")` + +------------------------------------------------------------------------ + +# Parameter Rankings {#sec-rankings} + +Parameter rankings by mean absolute elasticity identify which inputs most strongly influence model outputs, averaged across all environmental conditions. High elasticity indicates the model is highly sensitive to that parameter. + +::: panel-tabset +## NPP + +```{r} +#| label: fig-param-npp +#| fig-cap: "Top 15 parameters affecting Net Primary Productivity (NPP). Bars show mean absolute elasticity across all sites." +#| out-width: "100%" + +knitr::include_graphics(file.path(plots_dir, "param_ranking_NPP.png")) +``` + +**Key findings:** NPP is primarily controlled by photosynthesis parameters (`Amax`, `psnTOpt`) and allocation parameters (`leafGrowth`), consistent with theoretical expectations for productivity. + +## TotSoilCarb + +```{r} +#| label: fig-param-soc +#| fig-cap: "Top 15 parameters affecting Total Soil Carbon (TotSoilCarb). High sensitivity to turnover parameters reflects soil carbon dynamics." +#| out-width: "100%" + +knitr::include_graphics(file.path(plots_dir, "param_ranking_TotSoilCarb.png")) +``` + +**Key findings:** Soil carbon shows highest sensitivity to turnover rates (`litterTurnover`, `soilTurnover`), indicating decomposition processes dominate long-term carbon storage. + +## AbvGrndWood + +```{r} +#| label: fig-param-agb +#| fig-cap: "Top 15 parameters affecting Aboveground Biomass (AbvGrndWood). Woody allocation and photosynthesis parameters dominate." +#| out-width: "100%" + +knitr::include_graphics(file.path(plots_dir, "param_ranking_AbvGrndWood.png")) +``` + +**Key findings:** Aboveground biomass is most sensitive to woody allocation (`wood_allocation_rate`) and maximum photosynthesis (`Amax`), reflecting growth dynamics in perennial systems. +::: + +------------------------------------------------------------------------ + +# Site-by-Site Sensitivity {#sec-site-sensitivity} + +Interactive tables enable exploration of site-specific parameter sensitivities, revealing spatial heterogeneity that aggregate statistics may obscure. + +```{r} +#| label: tbl-site-sensitivity +#| tbl-cap: "Top 10 parameters by site and response variable. Use filters to explore site-specific patterns." + +site_top_table <- aggregated_results %>% + dplyr::group_by(site_id, response_var) %>% + dplyr::arrange(desc(abs(elasticity))) %>% + dplyr::slice_head(n = 10) %>% + dplyr::ungroup() %>% + dplyr::mutate( + across(c(elasticity, variance_explained), \(x) round(x, 3)), + across(c(lat, lon), \(x) round(x, 2)) + ) %>% + dplyr::select( + site_id, lat, lon, response_var, parameter, + elasticity, variance_explained, pft + ) + +DT::datatable( + site_top_table, + extensions = c('Scroller', 'Buttons'), + options = list( + dom = 'Plfrtip', + pageLength = 15, + deferRender = TRUE, + scroller = TRUE, + scrollY = "500px", + searchPanes = list( + cascadePanes = TRUE, + initCollapsed = TRUE + ), + columnDefs = list( + list(searchPanes = list(show = TRUE), targets = c(0, 3)) + ), + buttons = c('copy', 'csv', 'excel') + ), + class = "stripe hover compact", + rownames = FALSE +) %>% + DT::formatStyle( + 'elasticity', + background = DT::styleColorBar(range(site_top_table$elasticity), 'lightblue'), + backgroundSize = '100% 90%', + backgroundRepeat = 'no-repeat', + backgroundPosition = 'center' + ) %>% + DT::formatStyle( + 'variance_explained', + background = DT::styleColorBar(range(site_top_table$variance_explained), 'lightgreen'), + backgroundSize = '100% 90%', + backgroundRepeat = 'no-repeat', + backgroundPosition = 'center' + ) +``` + +::: callout-tip +## Interpreting Site Patterns + +- **High elasticity**: Parameter strongly influences output at this site +- **High variance explained**: Parameter contributes substantially to output uncertainty +- **Geographic clustering**: Similar sites may share dominant parameters, suggesting environmental controls +::: + +------------------------------------------------------------------------ + +# Environmental Gradient Analysis {#sec-gradients} + +Regression analysis tests whether parameter sensitivity systematically varies with environmental conditions, identifying context-dependent calibration needs. + +## Regression Results + +For each parameter-response-gradient combination, we fit linear models to test relationships between environmental conditions and both: + +- **Elasticity** (sensitivity): Rate of output change per unit parameter change +- **Variance explained** (importance): Contribution to output uncertainty + +::: callout-warning +## Statistical Thresholds + +Results shown meet significance criteria: **p \< 0.05** and $R^2$ \> 0.10. Non-significant relationships are excluded to focus on robust patterns. +::: + +::: panel-tabset +## Elasticity Trends + +```{r} +#| label: tbl-gradient-elasticity +#| tbl-cap: "Environmental gradients significantly associated with parameter sensitivity (elasticity)" + +elast_tab <- regression_results %>% + filter(target == "elasticity", r_squared > 0.1, p_value < 0.05) %>% + mutate( + across(c(r_squared, slope, intercept), \(x) round(x, 3)), + p_value = format.pval(p_value, digits = 3) + ) %>% + arrange(desc(r_squared)) %>% + select( + Parameter = parameter, + Response = response_var, + Gradient = gradient_var, + `R2` = r_squared, + Slope = slope, + Intercept = intercept, + `p-value` = p_value, + N = n_sites + ) + +DT::datatable( + elast_tab, + extensions = c('Scroller', 'Buttons'), + options = list( + dom = 'Plfrtip', + pageLength = 20, + deferRender = TRUE, + scroller = TRUE, + scrollY = "500px", + searchPanes = list( + cascadePanes = TRUE, + initCollapsed = TRUE + ), + columnDefs = list( + list(searchPanes = list(show = TRUE), targets = c(0, 1, 2)) + ), + buttons = c('copy', 'csv') + ), + class = "stripe hover compact", + rownames = FALSE +) %>% + DT::formatStyle( + 'R2', + background = DT::styleColorBar(c(0.1, max(elast_tab$`R2`)), 'coral'), + backgroundSize = '100% 90%', + backgroundRepeat = 'no-repeat', + backgroundPosition = 'center' + ) +``` + +**Interpretation guide:** + +- **Positive slope**: Sensitivity *increases* along gradient (e.g., warmer sites more sensitive) +- **Negative slope**: Sensitivity *decreases* along gradient +- **High** $R^2$: Strong environmental control on parameter importance + +## Variance Explained Trends + +```{r} +#| label: tbl-gradient-variance +#| tbl-cap: "Environmental gradients significantly associated with parameter importance (variance explained)" + +var_tab <- regression_results %>% + filter(target == "variance_explained", r_squared > 0.1, p_value < 0.05) %>% + mutate( + across(c(r_squared, slope, intercept), \(x) round(x, 3)), + p_value = format.pval(p_value, digits = 3) + ) %>% + arrange(desc(r_squared)) %>% + select( + Parameter = parameter, + Response = response_var, + Gradient = gradient_var, + `R2` = r_squared, + Slope = slope, + Intercept = intercept, + `p-value` = p_value, + N = n_sites + ) + +DT::datatable( + var_tab, + extensions = c('Scroller', 'Buttons'), + options = list( + dom = 'Plfrtip', + pageLength = 20, + deferRender = TRUE, + scroller = TRUE, + scrollY = "500px", + searchPanes = list( + cascadePanes = TRUE, + initCollapsed = TRUE + ), + columnDefs = list( + list(searchPanes = list(show = TRUE), targets = c(0, 1, 2)) + ), + buttons = c('copy', 'csv') + ), + class = "stripe hover compact", + rownames = FALSE +) %>% + DT::formatStyle( + 'R2', + background = DT::styleColorBar(c(0.1, max(var_tab$`R2`)), 'lightgreen'), + backgroundSize = '100% 90%', + backgroundRepeat = 'no-repeat', + backgroundPosition = 'center' + ) +``` + +**Why both metrics matter:** A parameter may have high *sensitivity* (elasticity) but low *importance* (variance explained) if its prior uncertainty is small. Conversely, a moderately sensitive parameter with high prior uncertainty may dominate output variability. +::: + +## Summary Statistics + +```{r} +#| label: tbl-gradient-summary + +gradient_summary <- regression_results %>% + filter(r_squared > 0.1, p_value < 0.05) %>% + group_by(target) %>% + summarise( + `N Significant` = n(), + `Mean R2` = mean(r_squared), + `Median R2` = median(r_squared), + `Max R2` = max(r_squared), + .groups = "drop" + ) %>% + mutate(across(where(is.numeric), \(x) round(x, 3))) + +knitr::kable( + gradient_summary, + caption = "Summary of significant gradient relationships by target metric", + col.names = c("Target Metric", "N Significant", "Mean R2", "Median R2", "Max R2") +) +``` + +------------------------------------------------------------------------ + +# Partial Dependence Analysis {#sec-pdp} + +Partial dependence plots visualize the marginal effect of environmental gradients on parameter sensitivity while averaging over all other predictors, revealing non-linear relationships missed by linear regression. + +## Method + +We use Random Forest regression to model `elasticity ~ MAT + MAP + clay + ocd + twi`, then compute partial dependence for each gradient. PDPs show how the predicted sensitivity changes as one gradient varies, holding others at their average values. + +## Example: Amax Sensitivity for NPP + +```{r} +#| label: fig-pdp-example +#| fig-cap: "Partial dependence of Amax (maximum photosynthesis) sensitivity on environmental gradients for NPP. Rug plots show observed site values." +#| fig-height: 8 +#| fig-width: 10 +#| warning: false + +if (exists("plot_pdp_sensitivity")) { + tryCatch({ + pdp_plots <- plot_pdp_sensitivity( + aggregated_results = aggregated_results, + site_covariates = site_covariates, + response_var = "NPP", + parameter = "Amax", + target = "elasticity", + gradients = gradient_vars + ) + + wrap_plots(pdp_plots, ncol = 2) + + plot_annotation( + title = "Marginal Effects of Environmental Gradients on Amax Sensitivity", + subtitle = "Random Forest partial dependence analysis", + theme = theme(plot.title = element_text(size = 14, face = "bold")) + ) + }, error = function(e) { + cat("PDP generation failed. Check that randomForest and pdp packages are installed.\n") + cat("Error:", e$message, "\n") + }) +} else { + cat("plot_pdp_sensitivity() function not found. Ensure R/local_sensitivity.R is sourced.\n") +} +``` + +::: callout-note +## Interpreting PDPs + +- **Flat lines**: Sensitivity constant across gradient (no environmental control) +- **Monotonic trends**: Linear increase/decrease (consistent with regression) +- **Non-monotonic patterns**: Thresholds or optima (linear models inadequate) +- **Rug plots**: Density of observed data (avoid extrapolation where data sparse) +::: + +------------------------------------------------------------------------ + +# PFT Comparisons {#sec-pft} + +Parameter sensitivity may differ systematically between woody perennial and annual herbaceous plant functional types, reflecting distinct physiological strategies and carbon cycling pathways. + +```{r} +#| label: tbl-pft-comparison +#| tbl-cap: "Top 20 parameter-PFT combinations by sensitivity for Total Soil Carbon" + +pft_comp_table <- pft_differences |> + dplyr::filter(response_var == "TotSoilCarb") |> + dplyr::arrange(desc(mean_abs_elasticity)) |> + dplyr::slice_head(n = 20) |> + dplyr::mutate( + across(c(mean_abs_elasticity, mean_variance_explained, mean_cv), \(x) round(x, 3)) + ) |> + dplyr::select( + PFT = pft, + Parameter = parameter, + `Mean |Elasticity|` = mean_abs_elasticity, + `Variance Explained (%)` = mean_variance_explained, + `Coefficient of Variation` = mean_cv, + `N Sites` = n_sites + ) + +DT::datatable( + pft_comp_table, + extensions = c('Scroller', 'Buttons'), + options = list( + dom = 'Bfrtip', + pageLength = 20, + deferRender = TRUE, + scroller = TRUE, + scrollY = "500px", + buttons = c('copy', 'csv') + ), + class = "stripe hover compact", + rownames = FALSE +) %>% + DT::formatStyle( + 'Mean |Elasticity|', + background = DT::styleColorBar(pft_comp_table$`Mean |Elasticity|`, 'steelblue'), + backgroundSize = '100% 90%', + backgroundRepeat = 'no-repeat', + backgroundPosition = 'center' + ) +``` + +**Ecological interpretation:** Woody PFTs typically show higher sensitivity to allocation and wood turnover parameters, while annual PFTs are more sensitive to leaf phenology and rapid nutrient cycling parameters. + +------------------------------------------------------------------------ + +# Cross-Cutting Insights {#sec-insights} + +## Overall Sensitivity Summary + +```{r} +#| label: tbl-summary-stats +#| tbl-cap: "Overall sensitivity summary statistics by response variable" + +summary_stats <- aggregated_results |> + dplyr::group_by(response_var) |> + dplyr::summarise( + `N Parameters` = dplyr::n_distinct(parameter), + `Mean |Elasticity|` = mean(abs(elasticity), na.rm = TRUE), + `Median |Elasticity|` = median(abs(elasticity), na.rm = TRUE), + `Max |Elasticity|` = max(abs(elasticity), na.rm = TRUE), + `Mean Variance Explained (%)` = mean(variance_explained, na.rm = TRUE), + .groups = "drop" + ) |> + dplyr::mutate(across(where(is.numeric), \(x) round(x, 3))) + +knitr::kable(summary_stats) +``` + +## Key Findings + +### 1. Parameter Prioritization for Calibration + +**High priority** (high elasticity × high prior uncertainty): + +```{r} +#| label: tbl-calibration-priority + +priority_params <- param_rankings %>% + filter(response_var == "TotSoilCarb") %>% + arrange(desc(constraint_priority)) %>% + slice_head(n = 5) %>% + select( + Parameter = parameter, + `Mean |Elasticity|` = mean_abs_elasticity, + `Mean CV` = mean_cv, + `Priority Score` = constraint_priority + ) %>% + mutate(across(where(is.numeric), \(x) round(x, 3))) + +knitr::kable( + priority_params, + caption = "Top 5 parameters for calibration priority (TotSoilCarb)" +) +``` + +### 2. Spatial Heterogeneity + +- **`r n_sites` sites** show substantial variation in parameter importance, confirming need for site-specific or region-specific calibration +- **Climate gradients** explain 10-40% of variance in parameter sensitivity for key parameters +- **Soil properties** (clay, OCD) primarily control turnover parameter importance + +### 3. Model Structure Implications + +Parameter sensitivity patterns reveal underlying model structure: + +- **NPP**: Dominated by photosynthesis (`Amax`, `psnTOpt`) and allocation processes +- **Soil Carbon**: Controlled by turnover rates (`litterTurnover`, `soilTurnover`) +- **Woody Biomass**: Sensitive to allocation (`wood_allocation_rate`) and longevity + +These patterns align with ecological theory and provide confidence in model representation of ecosystem processes. + +------------------------------------------------------------------------ + +# Technical Details {#sec-technical} + +## Sensitivity Metrics + +- **Elasticity** ($\varepsilon$): Normalized sensitivity = $\frac{\partial Y}{\partial X} \cdot \frac{X}{Y}$ +- **Variance explained**: $\frac{\text{Partial Variance}_i}{\sum \text{Partial Variances}} \times 100\%$ +- **Coefficient of variation** (CV): Prior uncertainty = $\frac{\sigma}{\mu}$ from parameter distributions + +## Software Environment + +```{r} +#| label: session-info +#| code-fold: false + +sessionInfo() +``` + +## Configuration + +```{r} +#| label: config-summary +#| code-fold: false + +cat("Config file:", here::here("000-config.yml"), "\n") +cat("Analysis date:", as.character(Sys.Date()), "\n") +cat("Number of sites:", n_sites, "\n") +cat("Number of parameters:", n_parameters, "\n") +cat("Response variables:", paste(sa_variables, collapse = ", "), "\n") +cat("Gradient variables:", paste(gradient_vars, collapse = ", "), "\n") +``` + +------------------------------------------------------------------------ + +::: callout-tip +## Data Availability + +All datasets used in this report are available in `data/aggregated_sensitivity.csv` and related files. Analysis code is in `R/local_sensitivity.R`. +::: diff --git a/data_raw/design_points_198.csv b/data_raw/design_points_198.csv new file mode 100644 index 0000000..6614182 --- /dev/null +++ b/data_raw/design_points_198.csv @@ -0,0 +1,199 @@ +UniqueID,site_id,lat,lon,pft +1301797,8773c58306e8d9a0,32.71585,-115.47163,annual crop +1300446,f2ce12d75f3b2cc6,32.75557,-115.41586,annual crop +1303893,6fc4cac215be0705,32.76887,-115.57358,annual crop +1308022,7bbdad271ed502db,32.7755,-115.40383,annual crop +1307459,36c0ce814a9688e4,32.79792,-115.30592,annual crop +1304607,301bbdabfeeca8db,32.80027,-115.51625,annual crop +1306405,b0cc94533ba2b1f0,33.10033,-115.43671,annual crop +1306761,ca601db78178679a,33.18742,-115.48613,annual crop +3312652,fbc6cd79aa048b05,33.52919,-114.56727,annual crop +3306161,139b0e8868923eb8,33.54674,-114.71593,annual crop +3304048,922c6a68452fc8ca,33.57847,-116.03879,annual crop +3304650,30fdb422bceeeedb,33.84184,-117.01405,annual crop +5613504,97d82456aa198af6,34.12896,-119.13588,annual crop +5601416,c8021a7adbd0f0e2,34.15523,-119.07796,annual crop +5614743,9295820af0a10e1d,34.18363,-119.11175,annual crop +5603011,6fefc1fcb9c8e464,34.20296,-119.09146,annual crop +5613341,6055716d4210bf35,34.21075,-119.24137,annual crop +5603993,ddbdb53e6fcf0ca1,34.43232,-119.31422,annual crop +4209359,a89b3d838b4c7860,34.62273,-120.02083,annual crop +4205349,5b5603ac6c281b50,34.78812,-120.43311,annual crop +4011908,e486b7c9a1343b84,34.98172,-120.5523,annual crop +4010486,3c2c435e77d7b0ea,35.08948,-120.58903,annual crop +1518362,bb622fd1e21955a4,35.12894,-118.58115,annual crop +4011045,3109e15b126d31ab,35.13826,-120.51548,annual crop +4009120,7fa45795fbd5f436,35.14573,-120.53684,annual crop +1518439,b698c76bbfddfe7f,35.20522,-119.11233,annual crop +1518958,f8536d1c22b6ce7e,35.45825,-119.32789,annual crop +1517119,8c2a4c220bba15a9,35.67697,-118.2591,annual crop +5424970,7e870c6692619f9a,35.95568,-119.23712,annual crop +5413691,a88d10858574320c,36.08904,-119.16801,annual crop +5417984,a8666f289cdb29fc,36.18533,-119.30574,annual crop +5407156,b395f792fe19ad47,36.27344,-119.37709,annual crop +1600057,3f9e2e9da147e821,36.29281,-119.5864,annual crop +2705056,02f8ecd6d492badc,36.30494,-121.16543,annual crop +2707824,e851b358902a0c78,36.33377,-121.25771,annual crop +2702091,85cd30a1d454404e,36.38188,-121.33113,annual crop +2708238,0de4e1e230754eb8,36.39065,-121.30012,annual crop +5423174,e965173d43abe95b,36.45436,-119.29039,annual crop +1019250,af981f5dbbfe5f7f,36.49954,-119.61152,annual crop +2711288,ac69d3083808f403,36.65592,-121.6045,annual crop +2712297,d49fa798b11d00e3,36.69189,-121.70652,annual crop +1015509,1166d305b963968f,36.70175,-119.80375,annual crop +1030090,f7f7febde5b6504e,36.75672,-120.20252,annual crop +1002672,2f9bf5c8fea8f0bf,36.78353,-119.56569,annual crop +1008876,6e30ed9f8e72a777,36.79489,-119.53708,annual crop +2710210,d98dac9c17d8ecef,36.86857,-121.7761,annual crop +3500047,0633a0f257ea79ca,36.87747,-121.3747,annual crop +2713696,b97e310a97d5449d,36.89654,-121.69318,annual crop +4401113,b3ed7ece68eabba6,36.92991,-121.73205,annual crop +2411477,841462dcb78ab4a1,37.11778,-120.56257,annual crop +4302276,9ae004848ad220b9,37.16367,-121.69841,annual crop +2403740,5658557ef71b8d43,37.1862,-120.33464,annual crop +2409349,252448a48135ff4c,37.27926,-121.03566,annual crop +2414164,cadbfc53b64fb024,37.33403,-120.86818,annual crop +2412814,b91359c48db7a3ac,37.34097,-120.68625,annual crop +4100274,16b3df611a6d9df6,37.376,-122.2249,annual crop +2402144,640f384688a4e591,37.42769,-120.83345,annual crop +2412799,2eab840584d9a6ea,37.45028,-120.82393,annual crop +5004008,a749a5393e2d0a9a,37.69577,-121.08735,annual crop +5005000,1c407a0bbabd14a2,37.74443,-120.88007,annual crop +3909993,f622269b4af08ffe,37.79911,-121.49169,annual crop +5003326,17f4b71353e99733,37.82475,-120.67878,annual crop +3901124,58a39bd688496479,37.82917,-121.04874,annual crop +3902257,829b943224934d75,37.85302,-121.29026,annual crop +3900024,629195fe5cadee05,37.89266,-121.30972,annual crop +0701315,5bbdd579f1ee9f19,37.9064,-121.58376,annual crop +0701169,9c15afc53357714b,37.95167,-121.67676,annual crop +3910950,1e782bbd9aba2833,37.96339,-121.41258,annual crop +0500459,eb70a6c8b0f58ea8,38.08514,-120.47661,annual crop +0500297,b0aad0a7d69598b0,38.18682,-120.82394,annual crop +3403466,b4c14ae3bec001c1,38.25789,-121.2001,annual crop +4800963,bd24018cde78901b,38.29991,-121.78208,annual crop +4804682,299f79b1e03bf3eb,38.35436,-121.83418,annual crop +5705583,dc9179b07c8ed8a7,38.36642,-121.59721,annual crop +5701986,167ab5fc67d3d4d3,38.57421,-121.78019,annual crop +3405105,69c398daff357da0,38.62267,-121.53812,annual crop +3102743,3f80ff62fe2838ad,38.86665,-121.40978,annual crop +3102385,190922c49a44f9aa,38.96796,-121.26205,annual crop +5100179,31e6da9168656711,38.97407,-121.66474,annual crop +5803501,578966bf4bfb4cfd,39.07784,-121.49638,annual crop +2900303,7935e2f555cdf9bb,39.13701,-121.15666,annual crop +0604034,c75e6dd82a4bdfa3,39.13847,-121.97803,annual crop +5101698,5a62bd8c14b93fc6,39.16234,-121.68997,annual crop +2900254,a98a5fe94a51c5a5,39.17197,-121.1941,annual crop +5103028,84bb575f8c8fb657,39.26372,-121.88491,annual crop +0603115,e59dbefe9c9236ff,39.27161,-122.21843,annual crop +0403485,dc98b4e761edc53a,39.35259,-121.83374,annual crop +1104895,e19e245fdad78def,39.55064,-122.15473,annual crop +4600048,df901934f52831d6,39.59935,-120.35656,annual crop +0403397,848b12bacff089ef,39.68882,-121.82083,annual crop +4500961,4e5a039db65ad456,40.50238,-122.32069,annual crop +1201175,aa8c8cadf5efa20f,40.51496,-124.02349,annual crop +4501224,c3cc92790e82238f,40.63064,-121.90013,annual crop +1200468,b88961b09985da28,40.72994,-124.18378,annual crop +1801128,803cd0235a410598,40.92088,-120.55699,annual crop +4500613,9e9b2e8dd0670374,41.0422,-121.47316,annual crop +1802573,507ebd1c6e9f5d14,41.14991,-120.97333,annual crop +2502746,41e40eb7f9b60788,41.54258,-120.46489,annual crop +4702940,5433838600b8edc0,41.54862,-122.51858,annual crop +4703655,72d745ad8741be18,41.62203,-122.78298,annual crop +3707491,b8b2dd94a7c44662,33.35306,-117.19182,woody perennial crop +3312395,ebb783e86d2ac6fb,33.57847,-116.03157,woody perennial crop +3312961,6328b22167f31aad,33.86884,-117.40838,woody perennial crop +3306059,df703141000c59c9,33.90119,-117.40624,woody perennial crop +3311313,9c562bb603f3b156,33.96727,-117.34049,woody perennial crop +5604295,9a28cf783448dc03,34.29708,-119.06014,woody perennial crop +5603719,d1dd970659a71943,34.37258,-119.03318,woody perennial crop +5603233,11aa82e47c15b8df,34.38596,-118.81446,woody perennial crop +5604628,5b18c5e0bebe449b,34.47244,-119.22015,woody perennial crop +4203463,096c9eebd28d0280,34.91295,-120.40345,woody perennial crop +1510506,dbb2c95a5cb30b5d,35.05618,-119.2735,woody perennial crop +1516688,5e21d98f69f14285,35.49646,-119.72323,woody perennial crop +1518251,cf9c2d9309d78058,35.65629,-119.97917,woody perennial crop +1511610,6616b64433b71c3c,35.77284,-119.09088,woody perennial crop +5420540,d69343ed38ea49c7,35.93301,-119.01888,woody perennial crop +5406185,729a6ef989c5a277,35.96904,-118.98552,woody perennial crop +5424759,a860cdc63cfc7cd1,35.98506,-119.01459,woody perennial crop +5426684,25d92d00fe5f31ea,35.99613,-119.13104,woody perennial crop +5424088,69151056c07813d8,36.0165,-119.12213,woody perennial crop +5420816,7b2e85c30ecfcbe8,36.03695,-118.96562,woody perennial crop +5429747,04268ba7e72d7bfb,36.04216,-119.36168,woody perennial crop +5435846,2c9f5faf24bff9fd,36.13925,-119.10525,woody perennial crop +1011248,2e11530b93cd492f,36.1778,-120.20411,woody perennial crop +5427391,f0c5457e0db29788,36.18711,-118.99795,woody perennial crop +1020088,d6ce58a349148d32,36.20587,-120.25336,woody perennial crop +5424360,8b27eb5fc60d506c,36.25395,-119.26319,woody perennial crop +5428155,64cf20b7075f16bd,36.28705,-119.12755,woody perennial crop +5429900,bfee4301f7c99060,36.35733,-119.09283,woody perennial crop +5418978,7afa2a6c097c1f5b,36.3626,-119.10984,woody perennial crop +5420876,6c1e7a1eb4c36ea5,36.37693,-119.45051,woody perennial crop +1601598,e968e9c8f8574cb2,36.38383,-119.65554,woody perennial crop +1603759,4c3498f22c47aa3f,36.47325,-119.58999,woody perennial crop +5432363,ce81d5b03685a33e,36.48936,-119.44393,woody perennial crop +1015089,05ce4008f973ff67,36.49535,-119.5918,woody perennial crop +5413858,1114b0b499d4854c,36.51519,-119.42062,woody perennial crop +5408292,6def26440f10476c,36.5524,-119.45881,woody perennial crop +5410669,60f846ca525da31e,36.55727,-119.22236,woody perennial crop +5421095,427606829eff9a98,36.6005,-119.25964,woody perennial crop +1012828,32b66a2605ee700d,36.62789,-119.5511,woody perennial crop +1004605,29baab2f12e2fd80,36.62869,-119.55927,woody perennial crop +1014765,b68880e61d448b93,36.62893,-119.6305,woody perennial crop +1033716,ecc585936d3203a5,36.63048,-119.58156,woody perennial crop +1024981,24766015396f3dde,36.63384,-119.49152,woody perennial crop +1033342,cf0be21778f4dfa5,36.64072,-119.42862,woody perennial crop +5416064,39b7dba5debef188,36.6438,-119.28748,woody perennial crop +1008233,9c19b1bcbd391e00,36.6657,-120.07238,woody perennial crop +1024636,268a52869b01fe54,36.703,-120.23483,woody perennial crop +1009725,17b056351d1484f8,36.73876,-120.03574,woody perennial crop +6000264,a05ea26a797f4bad,36.74201,-119.66029,woody perennial crop +1022803,4032fbd64a0b5147,36.76356,-119.50957,woody perennial crop +1027402,bd841534f8883a00,36.79087,-120.42722,woody perennial crop +1001260,7a8855e8f27579dd,36.822,-120.01067,woody perennial crop +2007540,e07417e56c78efbc,36.82779,-120.13964,woody perennial crop +2003975,462a123c3840df4e,36.88373,-119.96643,woody perennial crop +1027797,3bde8bc179b8fcae,36.90691,-119.67872,woody perennial crop +4401700,57ea98e608333b76,36.90697,-121.68079,woody perennial crop +2005008,3747151a269036b7,36.94728,-120.10533,woody perennial crop +4401091,8c37c59148a2b480,36.95087,-121.79638,woody perennial crop +4302696,132df615713a9102,36.96033,-121.37833,woody perennial crop +2004376,cf1c223d4e408116,37.07745,-120.44392,woody perennial crop +2407743,8c23aa17d5b211ed,37.23624,-120.32542,woody perennial crop +5018890,000f186bcf4b2be5,37.29108,-121.04882,woody perennial crop +2402405,984a04e7d14a80a3,37.37849,-120.70412,woody perennial crop +2404916,c8ca707e38904560,37.4307,-120.82563,woody perennial crop +5010990,54d2ebadd8e4e8e2,37.4912,-121.00855,woody perennial crop +5000084,2bd084fb5a916420,37.52908,-121.13639,woody perennial crop +5003882,ae81f35c7ed7c3f1,37.5845,-121.28798,woody perennial crop +5003737,765ad4a2cb597b04,37.61728,-120.85985,woody perennial crop +5004110,1cb62b1edd188368,37.62449,-120.5485,woody perennial crop +0100516,02ed5f046523985e,37.66868,-121.71246,woody perennial crop +5013043,5aabe8a4b61ac4b1,37.7022,-120.98628,woody perennial crop +3902833,b609ae97085420fb,37.87292,-121.30337,woody perennial crop +3905745,b33f7db23b64a377,37.95409,-121.21348,woody perennial crop +3914976,9e3b1749179a4a68,37.99925,-121.20417,woody perennial crop +3906534,1e113c95a1054df5,38.0175,-121.19637,woody perennial crop +3909214,08da82da7c956979,38.10502,-121.21071,woody perennial crop +4805224,01609970faa3c5b9,38.24749,-122.11328,woody perennial crop +3402961,7f1efab0660af0a9,38.29195,-121.54985,woody perennial crop +4801570,4c49ef413147e623,38.50693,-121.9747,woody perennial crop +5706106,30d7e9a739591579,38.69489,-122.04115,woody perennial crop +4900643,e5d633da3758c491,38.69824,-122.82084,woody perennial crop +1701490,6fe9d9756f962d11,38.99993,-122.85197,woody perennial crop +5802741,bf27f77fb0ee40e0,39.14373,-121.57068,woody perennial crop +5101572,889d6cb7440babd8,39.14836,-121.78505,woody perennial crop +2900379,84389254458b3a58,39.15317,-120.95863,woody perennial crop +5106273,e3daa31507eccfd0,39.16473,-121.76551,woody perennial crop +1105518,ba5921d796f688e5,39.54047,-122.22839,woody perennial crop +0401407,f5cde95be18bb83d,39.61337,-121.84505,woody perennial crop +1104914,95af7876982dad47,39.66783,-122.25811,woody perennial crop +1107030,92e5e100e1a3a472,39.68505,-122.3378,woody perennial crop +0404422,f87888e9df3fab14,39.7019,-121.79205,woody perennial crop +1100552,8c469dfef3eac688,39.73426,-122.139,woody perennial crop +0404306,058d96dea56318e8,39.84545,-121.99766,woody perennial crop +5201739,254f091dee798a22,39.86793,-122.16831,woody perennial crop +5203095,148e91e1a4352adb,39.88393,-122.1723,woody perennial crop +5203110,d4aa2f4b173e52d9,39.89466,-122.21828,woody perennial crop +5200592,e664ce5ac2fb2b6f,39.91337,-122.11623,woody perennial crop +5201262,59d94ec22335aba5,40.11295,-122.12625,woody perennial crop \ No newline at end of file diff --git a/data_raw/template.xml b/data_raw/template.xml index b4c4091..defd670 100644 --- a/data_raw/template.xml +++ b/data_raw/template.xml @@ -11,11 +11,13 @@ temperate.deciduous - pfts/temperate.deciduous/post.distns.Rdata + + grass - pfts/grass/post.distns.Rdata + + @@ -40,13 +42,23 @@ + + + + + NPP + TotSoilCarb + AbvGrndWood + + + 99000000003 SIPNET git FALSE - sipnet.git - cp data/events.in @RUNDIR@ + ../sipnet.git + cp /projectnb2/dietzelab/ccmmf/ensemble/events.in @RUNDIR@ @@ -69,13 +81,8 @@ localhost output/out output/run - sbatch -J @NAME@ -o @STDOUT@ -e @STDERR@ - .*job ([0-9]+).* - squeue -j @JOBID@ || echo DONE - - slurm_array_submit.sh - -a 1-@NJOBS@ - 1000 - + qsub -V -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash + Your job ([0-9]+) .* + qstat -j @JOBID@ || echo DONE diff --git a/scripts/001_setup_design_points.R b/scripts/001_setup_design_points.R new file mode 100644 index 0000000..aac3ffb --- /dev/null +++ b/scripts/001_setup_design_points.R @@ -0,0 +1,58 @@ +#!/usr/bin/env Rscript + +# ======================================================================= +# 001_setup_design_points.R +# Prepares design points for Uncertainty Analysis. +# +# Currently, this script consumes the 'design_points.csv' from the shared +# directory. At this stage of the project, these are MANUALLY SELECTED +# points (198 sites), not yet the output of the automated clustering +# workflow. +# +# TODO: Once the integration architecture between the Downscaling and +# Uncertainty repos is finalized, refactor this script, continue to consume +# the artifact generated by that pipeline. +# ======================================================================= + +library(config) +library(dplyr) +library(readr) +library(PEcAn.logger) + +PEcAn.logger::logger.info("*** Starting 001_setup_design_points.R ***") + +cfg <- config::get(file = "000-config.yml") + +# Define paths based on config +master_file <- cfg$paths$master_design_points +out_file <- file.path(cfg$paths$raw_data_dir, basename(cfg$sites$design_points_file)) + +if (!file.exists(master_file)) { + PEcAn.logger::logger.severe( + paste0("Could not find master design points at: ", master_file, + "\nEnsure the shared data is available in the CCMMF data directory.") + ) +} + +master_data <- readr::read_csv(master_file, show_col_types = FALSE) + +n_sample <- if(!is.null(cfg$sites$n_sample)) cfg$sites$n_sample else 10 +set.seed(42) # Ensure reproducibility of the random subset +final_data <- master_data |> + # Map LandIQ PFT names to PEcAn PFT names (currently used two pft in original design_points.csv) + # This mapping ensures compatibility whether the input is manual or clustered + dplyr::mutate( + pft = dplyr::case_when( + pft == "annual crop" ~ "grass", + pft == "woody perennial crop" ~ "temperate.deciduous", + TRUE ~ NA_character_ + ) + ) |> + dplyr::filter(!is.na(pft)) |> + dplyr::slice_sample(n = n_sample) |> + dplyr::select(site_id, lat, lon, pft) + +readr::write_csv(final_data, out_file) + +PEcAn.logger::logger.info(paste("Saved processed SA design points to:", out_file)) +PEcAn.logger::logger.info("*** Finished 001_setup_design_points.R ***") \ No newline at end of file diff --git a/scripts/002_build_xml.R b/scripts/002_build_xml.R new file mode 100644 index 0000000..5be120c --- /dev/null +++ b/scripts/002_build_xml.R @@ -0,0 +1,119 @@ +#!/usr/bin/env Rscript + +library(config) +library(dplyr) +library(lubridate) +library(PEcAn.settings) +library(PEcAn.logger) + +PEcAn.logger::logger.info("*** Starting 002_build_xml.R ***") + +cfg <- config::get(file = "000-config.yml") + +ccmmf_dir <- cfg$paths$ccmmf_dir +raw_data_dir <- cfg$paths$raw_data_dir + +# SA configuration (configurable paths) +options <- list( + n_ens = 20, + n_met = 10, + start_date = "2016-01-01", + end_date = "2023-12-31", + sigma_levels = cfg$sensitivity$sigma_levels %||% c(-3, -2, -1, 1, 2, 3), + ic_dir = file.path(ccmmf_dir, "ensemble/IC_files"), + met_dir = file.path(ccmmf_dir, "ensemble/ERA5_SIPNET"), + pft_dir = file.path(ccmmf_dir, "ensemble/pfts"), + site_file = file.path(raw_data_dir, "sa_design_points.csv"), + template_file = file.path(raw_data_dir, "template.xml"), + output_file = file.path(raw_data_dir, "settings_sa.xml") +) + +# Load SA design points (expect at least one row) +site_info <- read.csv(options$site_file, stringsAsFactors = FALSE) +stopifnot( + all(c("site_id", "lat", "lon") %in% names(site_info)), + nrow(site_info) > 0 +) +site_info <- site_info |> dplyr::rename(id = site_id) + +# Read template settings +settings <- PEcAn.settings::read.settings(options$template_file) + +# Set run dates +settings$run$start.date <- options$start_date +settings$run$end.date <- options$end_date + +# Ensemble meta +settings$ensemble$size <- options$n_ens +settings$ensemble$start.year <- lubridate::year(options$start_date) +settings$ensemble$end.year <- lubridate::year(options$end_date) +settings$run$inputs$poolinitcond$ensemble <- options$n_ens + +# Sensitivity windows and sigma levels +settings$sensitivity.analysis$start.year <- lubridate::year(options$start_date) +settings$sensitivity.analysis$end.year <- lubridate::year(options$end_date) + +# Make multisite settings and set ensemble input paths (met + IC) +settings <- settings |> + PEcAn.settings::createMultiSiteSettings(site_info) |> + PEcAn.settings::setEnsemblePaths( + n_reps = options$n_met, + input_type = "met", + path = options$met_dir, + d1 = options$start_date, + d2 = options$end_date, + path_template = "{path}/{id}/ERA5.{n}.{d1}.{d2}.clim" + ) |> + PEcAn.settings::setEnsemblePaths( + n_reps = options$n_ens, + input_type = "poolinitcond", + path = options$ic_dir, + path_template = "{path}/{id}/IC_site_{id}_{n}.nc" + ) + +quantiles_list <- list() +for (sigma_val in options$sigma_levels) { + quantiles_list <- c(quantiles_list, list(sigma = as.character(sigma_val))) +} +settings$sensitivity.analysis$quantiles <- quantiles_list + +# ----------------------------------------------------------------------- +# Set PFT posterior files and outdirs +# - options$pft_dir is the base folder containing per-pft directories +# - each pft should contain 'post.distns.Rdata' +# ----------------------------------------------------------------------- +if (!is.null(options$pft_dir) && nzchar(options$pft_dir)) { + if (!dir.exists(options$pft_dir)) { + PEcAn.logger::logger.warn("Configured pft_dir does not exist: ", options$pft_dir) + } + + settings$pfts <- settings$pfts |> + lapply(function(pft) { + # Construct absolute posterior file path + candidate <- file.path(options$pft_dir, pft$name, "post.distns.Rdata") + + if (!file.exists(candidate)) { + # If absent, raise a severe error - posterior files are required for SA/ensemble + PEcAn.logger::logger.severe( + "Posterior file for PFT '", pft$name, "' not found at: ", candidate, + "\nPlease place post.distns.Rdata there or adjust cfg$paths$ccmmf_dir." + ) + } + + pft$posterior.files <- candidate + pft$outdir <- file.path(settings$outdir, "pfts", pft$name) + pft + }) +} else { + PEcAn.logger::logger.warn("options$pft_dir is empty; skipping posterior file configuration.") +} + +# Write settings XML (multisite) +PEcAn.settings::write.settings( + settings, + outputfile = basename(options$output_file), + outputdir = dirname(options$output_file) +) + +PEcAn.logger::logger.info("Wrote multisite SA settings to ", options$output_file) +PEcAn.logger::logger.info("*** Finished 002_build_xml.R ***") diff --git a/scripts/011_run_local_sensitivity.R b/scripts/011_run_local_sensitivity.R new file mode 100644 index 0000000..31ecfa1 --- /dev/null +++ b/scripts/011_run_local_sensitivity.R @@ -0,0 +1,127 @@ +#!/usr/bin/env Rscript + +# ======================================================================= +# 011_run_local_sensitivity.R +# Run PEcAn sensitivity analysis workflow for 10 SA design points +# ======================================================================= + +library(PEcAn.all) +library(PEcAn.logger) + +PEcAn.logger::logger.info("*** Starting 011_run_local_sensitivity.R ***") + +# ----------------------------------------------------------------------- +# Runtime parameters +# ----------------------------------------------------------------------- +options <- list( + optparse::make_option(c("-s", "--settings"), + default = "data_raw/settings_sa.xml", + help = "Path to multisite SA settings XML [default: %default]" + ), + optparse::make_option(c("-c", "--continue"), + default = FALSE, + help = "Resume interrupted workflow? [default: %default]" + ) +) |> + purrr::modify(\(x) { + x@help <- paste(x@help, "[default: %default]") + x + }) + +args <- optparse::OptionParser(option_list = options) |> + optparse::parse_args() + +# ----------------------------------------------------------------------- +# Error handling +# ----------------------------------------------------------------------- +options(warn = 1) +options(error = quote({ + try(PEcAn.utils::status.end("ERROR")) + try(PEcAn.remote::kill.tunnel(settings)) + if (!interactive()) { + q(status = 1) + } +})) + +# ----------------------------------------------------------------------- +# PEcAn Workflow +# ----------------------------------------------------------------------- + +PEcAn.all::pecan_version() + +# Read and prepare settings +settings <- PEcAn.settings::read.settings(args$settings) +# settings <- PEcAn.settings::prepare.settings(settings) # still db free settings not supported + +if (!dir.exists(settings$outdir)) { + dir.create(settings$outdir, recursive = TRUE) +} + +# Handle workflow resumption +status_file <- file.path(settings$outdir, "STATUS") +if (!args$continue && file.exists(status_file)) { + file.remove(status_file) +} + +# ----------------------------------------------------------------------- +# Step 1: Write model configs +# ----------------------------------------------------------------------- +if (PEcAn.utils::status.check("CONFIG") == 0) { + PEcAn.utils::status.start("CONFIG") + settings <- runModule.run.write.configs(settings) + PEcAn.settings::write.settings(settings, outputfile = "pecan.CONFIGS.xml") + PEcAn.utils::status.end() +} else if (file.exists(file.path(settings$outdir, "pecan.CONFIGS.xml"))) { + settings <- PEcAn.settings::read.settings( + file.path(settings$outdir, "pecan.CONFIGS.xml") + ) +} + +# ----------------------------------------------------------------------- +# Step 2: Run SIPNET models +# ----------------------------------------------------------------------- +if (PEcAn.utils::status.check("MODEL") == 0) { + PEcAn.utils::status.start("MODEL") + + # Determine stop_on_error behavior + stop_on_error <- as.logical(settings[[c("run", "stop_on_error")]]) + if (length(stop_on_error) == 0) { + # For SA runs, don't stop on single failures + stop_on_error <- FALSE + } + + PEcAn.workflow::runModule_start_model_runs(settings, + stop.on.error = stop_on_error) + PEcAn.utils::status.end() +} + +# ----------------------------------------------------------------------- +# Step 3: Extract model outputs +# ----------------------------------------------------------------------- +if (PEcAn.utils::status.check("OUTPUT") == 0) { + PEcAn.utils::status.start("OUTPUT") + runModule.get.results(settings) + PEcAn.utils::status.end() +} + +# ----------------------------------------------------------------------- +# Step 4: Sensitivity analysis +# ----------------------------------------------------------------------- +if ("sensitivity.analysis" %in% names(settings) && + PEcAn.utils::status.check("SENSITIVITY") == 0) { + PEcAn.utils::status.start("SENSITIVITY") + runModule.run.sensitivity.analysis(settings) + PEcAn.utils::status.end() +} + +# ----------------------------------------------------------------------- +# Workflow complete +# ----------------------------------------------------------------------- +if (PEcAn.utils::status.check("FINISHED") == 0) { + PEcAn.utils::status.start("FINISHED") + PEcAn.remote::kill.tunnel(settings) + PEcAn.utils::status.end() +} + +PEcAn.logger::logger.info("*** Finished 011_run_local_sensitivity.R ***") +print("---------- PEcAn Sensitivity Analysis Complete ----------") \ No newline at end of file diff --git a/scripts/012_aggregate_sensitivity.R b/scripts/012_aggregate_sensitivity.R new file mode 100644 index 0000000..d9cc191 --- /dev/null +++ b/scripts/012_aggregate_sensitivity.R @@ -0,0 +1,164 @@ +#!/usr/bin/env Rscript + +# ======================================================================= +# 012_aggregate_sensitivity.R +# Aggregate sensitivity analysis results across design points +# ======================================================================= + +library(config) +library(dplyr) +library(readr) +library(tidyr) +library(ggplot2) +library(PEcAn.logger) +library(PEcAn.settings) + +PEcAn.logger::logger.info("*** Starting 012_aggregate_sensitivity.R ***") + +# Source helper functions +source("R/local_sensitivity.R") + +# ----------------------------------------------------------------------- +# Load config and settings +# ----------------------------------------------------------------------- +cfg <- config::get(file = "000-config.yml") + +if (!dir.exists(cfg$paths$data_dir)) { + dir.create(cfg$paths$data_dir, recursive = TRUE) +} + +# Read PEcAn settings (use pecan.CONFIGS.xml from 011 run) +settings <- PEcAn.settings::read.settings( + file.path("output", "pecan.CONFIGS.xml") +) + +# Handles cases where multiple tags exist +sa_variables <- settings$sensitivity.analysis[ + grepl("^site\\.", names(settings$sensitivity.analysis)) +] |> + purrr::map(~.x[names(.x) == "variable"]) |> + unlist() |> + unique() + +# ----------------------------------------------------------------------- +# Load design points +# ----------------------------------------------------------------------- +design_points <- readr::read_csv( + file.path(cfg$paths$raw_data_dir, "sa_design_points.csv") +) + +site_covariates <- readr::read_csv( + file.path(cfg$paths$ccmmf_dir, "data/site_covariates.csv") +) |> + dplyr::filter(site_id %in% design_points$site_id) |> + dplyr::select(site_id, temp, precip, clay, ocd, twi) |> + dplyr::rename(MAT = temp, MAP = precip) + +gradient_vars <- c("MAT", "MAP", "clay", "ocd", "twi") +# ----------------------------------------------------------------------- +# Aggregate sensitivity results +# ----------------------------------------------------------------------- +PEcAn.logger::logger.info("Aggregating sensitivity results...") + +aggregated_results <- aggregate_local_sa( + sensitivity_outdir = settings$outdir, + design_points = design_points, + response_vars = sa_variables +) + +readr::write_csv(aggregated_results, file.path(cfg$paths$data_dir, "aggregated_sensitivity.csv")) + +# ----------------------------------------------------------------------- +# Summarize +# ----------------------------------------------------------------------- +PEcAn.logger::logger.info("Generating summary...") + +summary_results <- summarize_local_sa(aggregated_results) + +readr::write_csv( + summary_results$parameter_rankings, + file.path(cfg$paths$data_dir, "parameter_rankings.csv") +) + +readr::write_csv( + summary_results$pft_differences, + file.path(cfg$paths$data_dir, "pft_differences.csv") +) + +# ----------------------------------------------------------------------- +# Environmental gradients +# ----------------------------------------------------------------------- +PEcAn.logger::logger.info("Analyzing environmental gradients...") + +gradient_analysis <- analyze_environmental_gradients( + aggregated_results = aggregated_results, + env_covariates = site_covariates, + gradient_vars = gradient_vars, + min_sites = 5, + significance_level = 0.05, + r2_threshold = 0.1 +) + +# Save FULL regression results (all combinations, both targets) +readr::write_csv( + gradient_analysis$regression_results, + file.path(cfg$paths$data_dir, "regression_results.csv") +) + +# Save filtered significant gradients (for quick reference) +readr::write_csv( + gradient_analysis$significant_gradients, + file.path(cfg$paths$data_dir, "significant_gradients.csv") +) + +PEcAn.logger::logger.info( + "Found ", nrow(gradient_analysis$significant_gradients), " significant gradients" +) + +# ----------------------------------------------------------------------- +# Plots +# ----------------------------------------------------------------------- +PEcAn.logger::logger.info("Generating plots...") + +plots_dir <- file.path(cfg$paths$data_dir, "plots") +if (!dir.exists(plots_dir)) dir.create(plots_dir, recursive = TRUE) + +# Parameter rankings +for (var in sa_variables) { + p <- summary_results$parameter_rankings |> + dplyr::filter(response_var == var) |> + dplyr::slice_head(n = 15) |> + ggplot(aes(x = reorder(parameter, mean_abs_elasticity), y = mean_abs_elasticity)) + + geom_col(fill = "steelblue") + + coord_flip() + + labs( + title = paste("Top 15 Parameters for", var), + x = "Parameter", + y = "Mean Absolute Elasticity" + ) + + theme_minimal() + + ggsave( + file.path(plots_dir, paste0("param_ranking_", var, ".png")), + p, width = 8, height = 6, dpi = 300 + ) +} + +for (var in sa_variables) { + for (grad in gradient_vars) { + p_gradient <- plot_sensitivity_gradient( + aggregated_results = aggregated_results, + env_covariates = site_covariates, + gradient_var = grad, + top_n_parameters = 9, + response_var = var + ) + ggsave( + file.path(plots_dir, paste0("sensitivity_vs_", grad, "_", var, ".png")), + p_gradient, width = 12, height = 8, dpi = 300 + ) + } +} + + +PEcAn.logger::logger.info("*** Finished 012_aggregate_sensitivity.R ***")