Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
d718c19
feat: implement multi-GPU support and related optimizations in Sup3r …
bnb32 Apr 25, 2026
10523d7
refactor: streamline loss calculation and update loss details in Sup3…
bnb32 Apr 26, 2026
8214e7c
Refactor Sup3rX class for improved caching and feature handling
bnb32 Apr 27, 2026
a170c16
test: add assertions for queue state in eager and lazy batchers
bnb32 Apr 27, 2026
e581de2
feat: implement exception handling in TrainingSession with dedicated …
bnb32 Apr 28, 2026
05f25b0
refactor: remove unnecessary tf.function decorator from calc_loss met…
bnb32 Apr 29, 2026
36987ff
feat: enhance loss metrics with TensorFlow rank assertions and optimi…
bnb32 Apr 30, 2026
52c6458
fix: ensure proper data type casting for high-resolution features in …
bnb32 Apr 30, 2026
563ddff
fix: remove unnecessary type casting for exo in AbstractSingleModel
bnb32 Apr 30, 2026
5e3b1ab
fix: update error handling in loss functions to raise ValueError inst…
bnb32 Apr 30, 2026
f5f318d
refactor: change logger info to debug level for training loss and bat…
bnb32 May 5, 2026
3cc0326
fix: update TensorFlow function decorators to reduce retracing and im…
bnb32 May 6, 2026
dbcc297
Enhance logging across multiple modules
bnb32 May 9, 2026
17ddf08
Refactor logging levels and improve model loading in the pipeline
bnb32 May 10, 2026
5a7d88a
feat: implement resolve_feature method for feature alias handling and…
bnb32 May 10, 2026
471c8ed
fix: include lr_features in exo_features filtering for ForwardPassStr…
bnb32 May 10, 2026
4b40e2e
fix: update hr_exo_features to return topography for LinearInterp model
bnb32 May 10, 2026
cb06b9e
fix: enhance exo_features filtering to support multiple submodels in …
bnb32 May 10, 2026
780bcd4
fix: update exo_features filtering to include hr_exo_features in Forw…
bnb32 May 10, 2026
8f8a98d
fix: update lr_features comment for clarity in ForwardPassStrategy an…
bnb32 May 10, 2026
48be408
fix: update feature resolution method in test_netcdf_uv_invert for co…
bnb32 May 10, 2026
de27efd
fix: rename resolve_feature to derive for consistency and update rela…
bnb32 May 10, 2026
e634dce
fix: update mask handling in ForwardPassStrategy to improve logging a…
bnb32 May 11, 2026
be71f38
f-string to % formatting for logging to save time when log_level = INFO
bnb32 May 11, 2026
7de4f58
fix: update logging format in AbstractSingleModel to use % formatting…
bnb32 May 11, 2026
16abe41
fix: simplify loss_details dictionary comprehension to improve readab…
bnb32 May 12, 2026
405ecb8
fix: update batch size calculation in AbstractSingleModel for compati…
bnb32 May 13, 2026
25a5d4a
fix: clarify hr_exo_features docstring in SurfaceSpatialMetModel for …
bnb32 May 13, 2026
baeaa8d
fix: update batch size calculation in AbstractSingleModel to use low_…
bnb32 May 13, 2026
bcf10f3
fix: wrap apply_fn call in strategy.scope for multi-GPU compatibility
bnb32 May 14, 2026
54d5591
fix: streamline sample_batch docstring for clarity
bnb32 May 14, 2026
cda5917
fix: correct reference to optimizer in update_optimizer_gen method
bnb32 May 14, 2026
82348b3
fix: correct optimizer reference in Sup3rGan class for consistency
bnb32 May 14, 2026
b665ce1
fix: remove unnecessary device context check in _training_scope method
bnb32 May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,4 @@ tags

# test dirs
exo_cache
.timings
138 changes: 90 additions & 48 deletions pixi.lock

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions sup3r/batch/batch_cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# pylint: disable=all
"""Batch Job CLI entry points."""
import logging

import click
from gaps.batch import BatchJob

from sup3r import __version__

logger = logging.getLogger(__name__)


@click.group()
@click.version_option(version=__version__)
Expand Down Expand Up @@ -41,6 +45,15 @@ def from_config(ctx, config_file, dry_run, cancel, delete, monitor_background,
"""Run Sup3r batch from a config file."""
ctx.ensure_object(dict)
ctx.obj['VERBOSE'] = verbose or ctx.obj.get('VERBOSE', False)
logger.info(
'Starting batch job from %s (dry_run=%s, cancel=%s, delete=%s, '
'monitor_background=%s).',
config_file,
dry_run,
cancel,
delete,
monitor_background,
)
batch = BatchJob(config_file)

if cancel:
Expand All @@ -50,6 +63,8 @@ def from_config(ctx, config_file, dry_run, cancel, delete, monitor_background,
else:
batch.run(dry_run=dry_run, monitor_background=monitor_background)

logger.info('Finished batch CLI invocation for %s.', config_file)


if __name__ == '__main__':
main(obj={})
23 changes: 11 additions & 12 deletions sup3r/bias/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,10 @@ class is used, all data will be loaded in this class'
"""

logger.info(
'Initializing DataRetrievalBase for base dset "{}" '
'correcting biased dataset(s): {}'.format(base_dset, bias_feature)
'Initializing DataRetrievalBase for base dset "%s" correcting '
'biased dataset(s): %s',
base_dset,
bias_feature,
)
self.base_fps = base_fps
self.bias_fps = bias_fps
Expand Down Expand Up @@ -235,9 +237,8 @@ def distance_upper_bound(self):
diff = np.max(np.median(diff, axis=0))
self._distance_upper_bound = diff
logger.info(
'Set distance upper bound to {:.4f}'.format(
self._distance_upper_bound
)
'Set distance upper bound to %.4f',
self._distance_upper_bound,
)
return self._distance_upper_bound

Expand Down Expand Up @@ -587,13 +588,11 @@ def _match_zero_rate(bias_data, base_data):
q_zero_bias_out = np.nanmean(bias_data == 0)

logger.debug(
'Input bias/base zero rate is {:.3e}/{:.3e}, '
'output is {:.3e}/{:.3e}'.format(
q_zero_bias_in,
q_zero_base_in,
q_zero_bias_out,
q_zero_base_out,
)
'Input bias/base zero rate is %.3e/%.3e, output is %.3e/%.3e',
q_zero_bias_in,
q_zero_base_in,
q_zero_bias_out,
q_zero_base_out,
)

return bias_data
Expand Down
9 changes: 3 additions & 6 deletions sup3r/bias/bias_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,7 @@ def write_outputs(self, fp_out, out):
for k, v in self.meta.items():
f.attrs[k] = json.dumps(v)

logger.info(
'Wrote scalar adder factors to file: {}'.format(fp_out)
)
logger.info('Wrote scalar adder factors to file: %s', fp_out)

def _get_run_kwargs(self, **kwargs_extras):
"""Get dictionary of kwarg dictionaries to use for calls to
Expand Down Expand Up @@ -236,9 +234,8 @@ def run(
logger.debug('Starting linear correction calculation...')

logger.info(
'Initialized scalar / adder with shape: {}'.format(
self.bias_gid_raster.shape
)
'Initialized scalar / adder with shape: %s',
self.bias_gid_raster.shape,
)
self.out = self._run(
out=self.out,
Expand Down
89 changes: 69 additions & 20 deletions sup3r/bias/bias_calc_cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""sup3r bias correction calculation CLI entry points."""

import copy
import logging
import os
Expand All @@ -15,8 +16,12 @@

@click.group()
@click.version_option(version=__version__)
@click.option('-v', '--verbose', is_flag=True,
help='Flag to turn on debug logging. Default is not verbose.')
@click.option(
'-v',
'--verbose',
is_flag=True,
help='Flag to turn on debug logging. Default is not verbose.',
)
@click.pass_context
def main(ctx, verbose):
"""Sup3r bias calc Command Line Interface"""
Expand All @@ -25,16 +30,25 @@ def main(ctx, verbose):


@main.command()
@click.option('--config_file', '-c', required=True,
type=click.Path(exists=True),
help='sup3r bias correction calculation config .json file.')
@click.option('-v', '--verbose', is_flag=True,
help='Flag to turn on debug logging. Default is not verbose.')
@click.option(
'--config_file',
'-c',
required=True,
type=click.Path(exists=True),
help='sup3r bias correction calculation config .json file.',
)
@click.option(
'-v',
'--verbose',
is_flag=True,
help='Flag to turn on debug logging. Default is not verbose.',
)
@click.pass_context
def from_config(ctx, config_file, verbose=False, pipeline_step=None):
"""Run sup3r bias correction calculation from a config file."""
config = BaseCLI.from_config_preflight(ModuleName.BIAS_CALC, ctx,
config_file, verbose)
config = BaseCLI.from_config_preflight(
ModuleName.BIAS_CALC, ctx, config_file, verbose
)

exec_kwargs = config.get('execution_control', {})
hardware_option = exec_kwargs.pop('option', 'local')
Expand All @@ -44,31 +58,57 @@ def from_config(ctx, config_file, verbose=False, pipeline_step=None):
log_pattern = config.get('log_pattern', None)

jobs = config['jobs']
logger.info(
'Preparing bias calculation from %s with hardware=%s across %s jobs '
'using %s.',
config_file,
hardware_option,
len(jobs),
calc_class_name,
)
for i_node, job in enumerate(jobs):
node_config = copy.deepcopy(job)
node_config['status_dir'] = config['status_dir']
node_config['log_file'] = (
log_pattern if log_pattern is None
else os.path.normpath(log_pattern.format(node_index=i_node)))
name = ('{}_{}'.format(basename, str(i_node).zfill(6)))
log_pattern
if log_pattern is None
else os.path.normpath(log_pattern.format(node_index=i_node))
)
name = '{}_{}'.format(basename, str(i_node).zfill(6))
ctx.obj['NAME'] = name
node_config['job_name'] = name
node_config["pipeline_step"] = pipeline_step
node_config['pipeline_step'] = pipeline_step

cmd = BiasCalcClass.get_node_cmd(node_config)

cmd_log = '\n\t'.join(cmd.split('\n'))
logger.debug(f'Running command:\n\t{cmd_log}')
logger.debug('Running command:\n\t%s', cmd_log)
logger.info(
'Queueing bias calculation node %s as job "%s".',
i_node,
name,
)

if hardware_option.lower() in AVAILABLE_HARDWARE_OPTIONS:
kickoff_slurm_job(ctx, cmd, pipeline_step, **exec_kwargs)
else:
kickoff_local_job(ctx, cmd, pipeline_step)


def kickoff_slurm_job(ctx, cmd, pipeline_step=None, alloc='sup3r',
memory=None, walltime=4, feature=None,
stdout_path='./stdout/'):
logger.info(
'Finished queueing bias calculation work for %s jobs.', len(jobs)
)


def kickoff_slurm_job(
ctx,
cmd,
pipeline_step=None,
alloc='sup3r',
memory=None,
walltime=4,
feature=None,
stdout_path='./stdout/',
):
"""Run sup3r on HPC via SLURM job submission.

Parameters
Expand All @@ -94,8 +134,17 @@ def kickoff_slurm_job(ctx, cmd, pipeline_step=None, alloc='sup3r',
stdout_path : str
Path to print .stdout and .stderr files.
"""
BaseCLI.kickoff_slurm_job(ModuleName.BIAS_CALC, ctx, cmd, alloc, memory,
walltime, feature, stdout_path, pipeline_step)
BaseCLI.kickoff_slurm_job(
ModuleName.BIAS_CALC,
ctx,
cmd,
alloc,
memory,
walltime,
feature,
stdout_path,
pipeline_step,
)


def kickoff_local_job(ctx, cmd, pipeline_step=None):
Expand Down
Loading
Loading