From 79470ec36768dae6c17922dfe340ab59e021aa46 Mon Sep 17 00:00:00 2001 From: Paul Date: Fri, 8 May 2026 16:00:56 -0600 Subject: [PATCH 1/2] Fix regex pattern in preprocess_collect_config and add test for untagged files Co-authored-by: Copilot --- gaps/cli/preprocessing.py | 7 +++++-- tests/cli/test_cli_preprocesing.py | 32 ++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/gaps/cli/preprocessing.py b/gaps/cli/preprocessing.py index a125754f..9bd81ea9 100644 --- a/gaps/cli/preprocessing.py +++ b/gaps/cli/preprocessing.py @@ -150,13 +150,16 @@ def preprocess_collect_config( files = collect_pattern if files == "PIPELINE": files = parse_previous_status(project_dir, command_name) - files = [re.sub(f"{TAG}\\d+", "*", fname) for fname in files] + files = [re.sub(f"{TAG}\\d+", f"{TAG}*", fname) for fname in files] if isinstance(files, str): files = [files] if isinstance(files, abc.Sequence): - files = {pattern.replace("*", ""): pattern for pattern in files} + files = { + pattern.replace(f"{TAG}*", "").replace("*", ""): pattern + for pattern in files + } files = [ ( diff --git a/tests/cli/test_cli_preprocesing.py b/tests/cli/test_cli_preprocesing.py index d4e10c69..69eaa96c 100644 --- a/tests/cli/test_cli_preprocesing.py +++ b/tests/cli/test_cli_preprocesing.py @@ -6,6 +6,7 @@ """ import json +import glob from pathlib import Path import pytest @@ -114,6 +115,37 @@ def test_preprocess_collect_config_pipeline_input(tmp_path): assert out_fp == pattern.replace("*", "") +def test_preprocess_collect_config_pipeline_input_ignores_untagged_file( + tmp_path, +): + """Test that PIPELINE collection patterns do not match untagged files.""" + config_fp = tmp_path / "pipe_config.json" + with open(config_fp, "w") as file_: + json.dump(SAMPLE_CONFIG, file_) + + (tmp_path / "config.json").touch() + (tmp_path / "collect_config.json").touch() + + Pipeline(config_fp) + + job_file = tmp_path / "output_file_j0.h5" + job_file.touch() + (tmp_path / "output_file.h5").touch() + Status.make_single_job_file( + tmp_path, + pipeline_step="run", + job_name="test_0", + attrs={StatusField.OUT_FILE: job_file.as_posix()}, + ) + + config = preprocess_collect_config({}, tmp_path, "collect-run") + + matched_files = sorted( + Path(path) for path in glob.glob(config["_pattern"][0]) + ) + assert matched_files == [job_file] + + def test_split_project_points_into_ranges(): """Test the `split_project_points_into_ranges` function.""" From 37127aef2e3176bd8c3b1fdaf345e5fa6ab3af74 Mon Sep 17 00:00:00 2001 From: Paul Date: Sat, 9 May 2026 14:47:24 -0600 Subject: [PATCH 2/2] Fix tests --- tests/cli/test_cli.py | 4 ++-- tests/cli/test_cli_preprocesing.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py index 8077349a..a8412dd5 100644 --- a/tests/cli/test_cli.py +++ b/tests/cli/test_cli.py @@ -370,7 +370,7 @@ def test_cli_monitor( assert "collect-run_collect_pattern" in collected_outputs.attrs assert ( Path(collected_outputs.attrs["collect-run_collect_pattern"]) - == tmp_cwd / file_pattern + == tmp_cwd / file_pattern.replace("*", f"{TAG}*") ) profiles = manual_collect(data_dir / file_pattern, "cf_profile") @@ -487,7 +487,7 @@ def test_cli_background( assert "collect-run_collect_pattern" in collected_outputs.attrs assert ( Path(collected_outputs.attrs["collect-run_collect_pattern"]) - == tmp_cwd / file_pattern + == tmp_cwd / file_pattern.replace("*", f"{TAG}*") ) profiles = manual_collect(data_dir / file_pattern, "cf_profile") diff --git a/tests/cli/test_cli_preprocesing.py b/tests/cli/test_cli_preprocesing.py index 69eaa96c..f72e5aab 100644 --- a/tests/cli/test_cli_preprocesing.py +++ b/tests/cli/test_cli_preprocesing.py @@ -17,6 +17,7 @@ preprocess_collect_config, split_project_points_into_ranges, ) +from gaps.cli.config import TAG from gaps.exceptions import gapsConfigError from gaps.warn import gapsWarning @@ -112,7 +113,7 @@ def test_preprocess_collect_config_pipeline_input(tmp_path): assert len(config["_pattern"]) == 2 for out_fp, pattern in zip(config["_out_path"], config["_pattern"]): assert any(name in out_fp for name in allowed_out_fn) - assert out_fp == pattern.replace("*", "") + assert out_fp == pattern.replace(f"{TAG}*", "") def test_preprocess_collect_config_pipeline_input_ignores_untagged_file(