Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
74f602e
Thresholding using Inf values for ignoring
ErikKusch Mar 9, 2026
c422568
feat: Metrics calculation for ETCCDI switched to CFVariable handling.…
ErikKusch Mar 9, 2026
f4b0fb9
fix: Helper_Threshold now works properly
ErikKusch Mar 9, 2026
c85f833
feat: monthly ETCCDI indices, custom temporal aggregation, BUT: daily…
ErikKusch Mar 9, 2026
ca18540
feat: custom Rnnmm threshold
ErikKusch Mar 9, 2026
4a87537
feat: CWD and CDD implemented in ETCCDI calculation
ErikKusch Mar 10, 2026
1729fe0
feat: Rx5day claulcation implemented in ETCCDI
ErikKusch Mar 10, 2026
3ff8903
feat: Quantile computation for ETCCDI Baselines
ErikKusch Mar 11, 2026
28d11e4
fix: file naming for KiN ETCCDI example data for easier identificatio…
ErikKusch Mar 11, 2026
7b6f420
fix: Helper_Threshold now thresholding by array actually placing NAs
ErikKusch Mar 12, 2026
b014bbc
feat: Tresholding now also for ETCCDI QUantiles and arrays
ErikKusch Mar 13, 2026
a5f1165
fix: dataset commit now ignoring large files
ErikKusch Mar 13, 2026
6d98c3c
feat: quantile-based metrics implemented but still missing GSL
ErikKusch Mar 13, 2026
9581abf
feat: GSL implementation to ETCCDI
ErikKusch Mar 16, 2026
19dfc44
fix: now importing create_ncdf function
ErikKusch Mar 16, 2026
63467ae
fix: comment on CDD and CWD needing cumilative implementation
ErikKusch Mar 20, 2026
44cd381
docs: new figures for GitHub
ErikKusch Mar 27, 2026
ed5cff5
fix: simplified discovery library script
ErikKusch Mar 27, 2026
8f2ab07
feat: can now select individual ETCCDI
ErikKusch Mar 27, 2026
0778ace
docs: dataset update as new example handling does not make use of ins…
ErikKusch Mar 27, 2026
5e41a34
feat: ETCCDI calculation now comes with default temporal resolution
ErikKusch Mar 27, 2026
c31099c
fix: now loading metadata from disk instead of GitHub
ErikKusch Apr 7, 2026
5b40b94
fix: now outputting a single file for ETCCDI calculations (even at di…
ErikKusch Apr 7, 2026
cb8052e
fix: Consecutive ETCCDI now implemented correctly
ErikKusch Apr 17, 2026
4591080
argument usage update to avoid merge conflict
ErikKusch May 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .github/ArgumentUsage.xlsx
Binary file not shown.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
.httr-oauth
.DS_Store
.quarto
.vscode
.vscode

# ignore KiN nc files for now as most are too big to ship the package with
ExampleData/*
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(Discovery_Library)
export(Discovery_QuickFacts)
export(Discovery_Read)
export(Discovery_Variables)
export(Metrics_BootstrapQuantiles)
export(Metrics_ETCCDI)
export(NC_Read)
export(NC_Write)
Expand Down Expand Up @@ -51,6 +52,8 @@ importFrom(ncdf4,ncatt_put)
importFrom(ncdf4,ncdim_def)
importFrom(ncdf4,ncvar_def)
importFrom(ncdf4,ncvar_put)
importFrom(ncdfCF,as_CF)
importFrom(ncdfCF,create_ncdf)
importFrom(parallel,detectCores)
importFrom(parallel,makeCluster)
importFrom(progress,progress_bar)
Expand Down
55 changes: 27 additions & 28 deletions R/Discovery_Functionality.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,24 @@ Discovery_DataSet.Check <- function(dataSet) {
#'
#' @export
Discovery_Read <- function(dataSet = "NULL") {
owner <- "Clim-Hub"
repo <- "ClimHub"
branch <- "master"
path <- "product-metadata"
metadata_dir <- system.file("product-metadata", package = "ClimHub")
if (metadata_dir == "") {
stop("Could not locate 'product-metadata' directory in package 'ClimHub'.")
}

Discovery_DataSet.Check(dataSet)

# URL of the raw JSON file
URL <- paste0("https://raw.githubusercontent.com/", owner, "/", repo, "/", branch, "/", path, "/", paste0(dataSet, ".json"))
json_file <- file.path(metadata_dir, paste0(dataSet, ".json"))

# URL-based fallback kept for reference:
# owner <- "Clim-Hub"
# repo <- "ClimHub"
# branch <- "master"
# path <- "product-metadata"
# URL <- paste0("https://raw.githubusercontent.com/", owner, "/", repo, "/", branch, "/", path, "/", paste0(dataSet, ".json"))

# Read JSON content
json_data <- jsonlite::fromJSON(URL)
# Read JSON content from package files.
json_data <- jsonlite::fromJSON(json_file)

# View the data
json_data
Expand Down Expand Up @@ -129,28 +135,21 @@ Discovery_Citation <- function(dataSet = "NULL") {
#'
#' @export
Discovery_Library <- function() {
owner <- "Clim-Hub"
repo <- "ClimHub"
path <- "product-metadata"

# Construct the URL
URL <- paste0("https://github.com/", owner, "/", repo, "/tree/master/", path)

# Read the page
page <- rvest::read_html(URL)

# Extract the embedded JSON metadata
raw_metadata <- page %>%
rvest::html_nodes("script") %>% # Extract all <script> tags
rvest::html_text() %>% # Get the text content
grep("payload", ., value = TRUE) %>% # Locate the script containing "payload"
jsonlite::fromJSON(simplifyVector = FALSE) # Parse JSON metadata
metadata_dir <- system.file("product-metadata", package = "ClimHub")
if (metadata_dir == "") {
stop("Could not locate 'product-metadata' directory in package 'ClimHub'.")
}

# Navigate to the relevant "tree" section
tree_items <- raw_metadata$payload$codeViewFileTreeLayoutRoute$fileTree$`product-metadata`$items
tree_items <- list.files(path = metadata_dir, pattern = "\\.json$", full.names = FALSE)

# make into vector of names of files
tree_items <- unlist(lapply(tree_items, "[[", "name"))
# GitHub Contents API approach kept for reference:
# owner <- "Clim-Hub"
# repo <- "ClimHub"
# branch <- "master"
# path <- "product-metadata"
# URL <- paste0("https://api.github.com/repos/", owner, "/", repo, "/contents/", path, "?ref=", branch)
# tree_items <- jsonlite::fromJSON(URL, simplifyVector = TRUE)
# tree_items <- tree_items$name

# subset for .json files
tree_items <- tree_items[grep(pattern = "\\.json$", tree_items)]
Expand Down
49 changes: 49 additions & 0 deletions R/Helper_ETCCDIDailyBootstraps.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' @title Prepare Bootstrap Intervals of Daily Time Series across Years
#'
#' @description Create bootstrap intervals of daily time series across years for a given time sequence. This function is needed for baseperiod quantile calculations for `Metrics_ETCCDI`.
#'
#' @param dates A character vector of dates in the format "YYYY-MM-DDTHH:MM:SS" (e.g., "1961-01-01T00:00:00") representing the time series for which bootstrap intervals should be created. Same as is obtained with the `dimnames(ds[["time"]])` of a `CFDataset` object called `ds`.
#' @param bootstrapWindow Numeric, uneven integer. Length of bootstrapping interval around each target date.
#'
#' @return A list of character vectors, each containing the dates that fall within the bootstrap window around each target date. The names of the list correspond to the day of year (in "MM-DD" format) for which the bootstrap intervals are created. Interval creation assumes a leap year (i.e., 366 days) to ensure that the same day of year is always included in the bootstrap intervals, even for February 29th.
#'
#' @author Erik Kusch
#'
#' @examples
#' time_series <- seq(
#' as.POSIXct("1961-01-01 00:00:00", tz = "UTC"),
#' as.POSIXct("1999-12-31 23:00:00", tz = "UTC"),
#' by = "1 day"
#' )
#' Time_seq <- format(time_series, "%Y-%m-%dT%H:%M:%S")
#' Helper_ETCCDIDailyBootstraps(dates = Time_seq)
#'
Helper_ETCCDIDailyBootstraps <- function(dates, bootstrapWindow = 5) {
## input checking
if (bootstrapWindow %% 2 != 1) {
stop("Bootstrap window must be an odd number.")
}

## transforming time sequence into dates
Date_seq <- format(as.POSIXct(dates, tryFormats = c("%Y-%m-%dT%H:%M:%OS"), tz = "UTC"), "%Y-%m-%d")
dates <- as.Date(Date_seq) # Convert Date_seq to Date objects
years <- sort(unique(as.integer(format(dates, "%Y")))) # Get unique years

## actual grouping creation
half_window <- (bootstrapWindow - 1) / 2
offsets <- c(-half_window:-1, 0, 1:half_window)
leap_year_dates <- as.Date("2000-01-01") + (0:365) # Compute MM-DD names for each day of year using a leap year

# For each day of year
grouping_ls <- lapply(1:366, function(doy) {
centre_dates <- as.Date(paste0(years, "-01-01")) + (doy - 1)
target_dates <- do.call(c, lapply(centre_dates, function(centre_date) {
neigh_dates <- centre_date + offsets
neigh_str <- format(neigh_dates, "%Y-%m-%d")
neigh_str
}))
target_dates[target_dates %in% Date_seq] # Keep only dates that exist in the data
})
names(grouping_ls) <- format(leap_year_dates, "%m-%d")
grouping_ls
}
187 changes: 187 additions & 0 deletions R/Helper_ETCCDIGSL.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#' @title Calculate Growing Season Length (GSL)
#'
#' @description Calculates Growing Season Length as defined by ETCCDI with different treatment of northern and southern hemisphere. For the northern hemisphere, GSL is defined as the number of days between the first occurrence of 6 consecutive days with mean temperature above 5°C after Jan 1st and the first occurrence of 6 consecutive days with mean temperature below 5°C after Jul 1st. For the southern hemisphere, GSL is defined as the number of days between the first occurrence of 6 consecutive days with mean temperature above 5°C after Jul 1st and the first occurrence of 6 consecutive days with mean temperature below 5°C after Jan 1st of the following year. If no valid start or end date is found, GSL is set to NA for that year and grid cell.
#'
#' @param TM A CFVariable containing daily mean temperatures (or similar) to be used for GSL calculation.
#'
#' @importFrom ncdfCF as_CF
#'
#' @return A CFVariable.
#'
#' @author Erik Kusch
#'
#' @examples
#' \dontrun{
#' TX <- NC_Read("inst/extdata/KiN_tasmax_2050.nc")[["tasmax"]]
#' TN <- NC_Read("inst/extdata/KiN_tasmin_2050.nc")[["tasmin"]]
#' TM <- (TN + TX) / 2
#' GSL <- Helper_ETCCDIGSL(TM)
#' }
Helper_ETCCDIGSL <- function(TM) {
threshold <- 273.15 + 5
TM_array <- TM$raw()
dn <- dimnames(TM_array)
if (is.null(dn) || is.null(dn$lat)) {
stop("Array must have dimnames with a 'lat' entry.")
}
lat <- as.numeric(dn$lat)
if (anyNA(lat)) {
stop("Latitude dimnames could not be converted to numeric.")
}

north_idx <- which(lat >= 0)
south_idx <- which(lat < 0)
GSL_ls <- list(
north = TM_array[, north_idx, , drop = FALSE],
south = TM_array[, south_idx, , drop = FALSE]
)

first_run_start <- function(x, idx, conditionFun, runLength = 6) {
if (length(idx) < runLength) {
return(NA_integer_)
}
v <- conditionFun(x[idx])
v[is.na(v)] <- FALSE
r <- rle(v)

hit <- which(r$values & r$lengths >= runLength)
if (length(hit) == 0) {
return(NA_integer_)
}

relStart <- if (hit[1] == 1) 1L else (sum(r$lengths[seq_len(hit[1] - 1)]) + 1L)
idx[relStart]
}


GSL_calcs <- lapply(names(GSL_ls), function(hemi) {
arr <- GSL_ls[[hemi]]
# Skip if no latitude cells
if (is.null(arr) || length(dim(arr)) != 3 || dim(arr)[2] == 0) {
return(NULL)
}

## figure out dates and years
dn <- dimnames(arr)
if (is.null(dn) || is.null(dn$time)) stop("Time dimnames are required.")
dates <- as.Date(dn$time)
yrs <- sort(unique(as.integer(format(dates, "%Y"))))

## makke arrays for filling with information about start/end dates and GSL days
nlon <- dim(arr)[which(names(dn) == "lon")]
nlat <- dim(arr)[which(names(dn) == "lat")]
ny <- length(yrs)
startDate <- array(as.Date(NA),
dim = c(nlon, nlat, ny),
dimnames = list(lon = dn$lon, lat = dn$lat, time = as.character(yrs))
)
endDate <- array(as.Date(NA),
dim = c(nlon, nlat, ny),
dimnames = list(lon = dn$lon, lat = dn$lat, time = as.character(yrs))
)
gslDays <- array(NA_real_,
dim = c(nlon, nlat, ny),
dimnames = list(lon = dn$lon, lat = dn$lat, time = as.character(yrs))
)

## iterate over years and grid cells to find start/end dates and GSL days
for (yIdx in seq_along(yrs)) {
y <- yrs[yIdx]

## define windows in which to look for runs
if (hemi == "north") {
# Start window (north): Jan 1 -> Dec 31 of year y
startWin <- which(dates >= as.Date(sprintf("%d-01-01", y)) &
dates <= as.Date(sprintf("%d-12-31", y)))

# End window (north): Jul 1 -> Dec 31 of year y
endWin <- which(dates >= as.Date(sprintf("%d-07-01", y)) &
dates <= as.Date(sprintf("%d-12-31", y)))

# Fallback if no 6-day <= threshold run
endFallback <- as.Date(sprintf("%d-12-31", y))
} else {
# Start window (south): Jul 1 -> Dec 31 of year y
startWin <- which(dates >= as.Date(sprintf("%d-07-01", y)) &
dates <= as.Date(sprintf("%d-12-31", y)))

# End window (south): Jan 1 -> Jun 30 of year y+1
# (June 31 is not a valid date, so Jun 30 is used)
yNext <- y + 1L
hasNextYear <- any(as.integer(format(dates, "%Y")) == yNext)

if (hasNextYear) {
endWin <- which(dates >= as.Date(sprintf("%d-01-01", yNext)) &
dates <= as.Date(sprintf("%d-06-30", yNext)))
endFallback <- as.Date(sprintf("%d-06-30", yNext))
} else {
endWin <- integer(0)
endFallback <- as.Date(NA)
}
}

## iterate over grid cells to find start/end dates and GSL days
for (i in seq_len(nlon)) {
for (j in seq_len(nlat)) {
ts <- arr[i, j, ]

sIdx <- first_run_start(ts, startWin, function(v) v > threshold)

if (is.na(sIdx)) {
# no valid season start -> leave NA
next
}

# Find end run; if not found, use fallback date (except missing next year case in south)
eIdx <- first_run_start(ts, endWin, function(v) v < threshold)

sDate <- dates[sIdx]
if (is.na(eIdx)) {
eDate <- endFallback
} else {
eDate <- dates[eIdx]
}

startDate[i, j, yIdx] <- sDate
endDate[i, j, yIdx] <- eDate

if (!is.na(eDate)) {
gslDays[i, j, yIdx] <- as.numeric(eDate - sDate)
# If you want inclusive count, use: as.numeric(eDate - sDate) + 1
}
}
}
}

# list(
# startDate = startDate,
# endDate = endDate,
# gslDays =
gslDays
# ,
# years = yrs
# )
})
names(GSL_calcs) <- names(GSL_ls)

## fusing north and south-hemisphere arrays
gsl_array <- abind::abind(GSL_calcs$south, GSL_calcs$north, along = 2)

## transforming back to CFVariable
names(dimnames(gsl_array)) <- names(dimnames(TM_array))
dimnames(gsl_array)[1:2] <- dimnames(TM_array)[1:2]
dimnames(gsl_array)$time <- paste0(dimnames(gsl_array)$time, "-07-02T12:00:00")
gsl_array_tll <- aperm(gsl_array, c(3, 1, 2))

# optional: fix dimname order explicitly
dn <- dimnames(gsl_array)
dimnames(gsl_array_tll) <- list(
time = dn$time,
lon = dn$lon,
lat = dn$lat
)
GSL <- as_CF("GSL", gsl_array_tll)

## report back
return(GSL)
}
Loading
Loading