feat(runs): add standalone model presets

xraymemory · xraymemory · commit e99e5f2cfee2 · 2026-05-23T14:16:28.000-04:00
diff --git a/README.md b/README.md
@@ -178,7 +178,10 @@ results directory, and MSA cache.
 run_experiments --list        # show available presets
 run_experiments --show rf3    # inspect what will run
 run_experiments --dry-run rf3 # print commands without running
-run_experiments rf3           # run only the RF3 job from full_8gpu
+run_experiments rf3           # run the standalone RF3 preset
+run_experiments boltz         # run Boltz2 X-ray + Boltz2 MD
+run_experiments boltz1        # run standalone Boltz1
+run_experiments protenix      # run the standalone Protenix preset
 run_experiments               # run the default full_8gpu preset
 ```
 
@@ -189,6 +192,11 @@ parallel. Run a subset with:
 run_experiments full_8gpu --jobs rf3,protenix
 ```
 
+Standalone presets are available for each model/model family: `boltz`,
+`boltz1`, `boltz2`, `boltz2_xrd`, `boltz2_md`, `rf3`, and `protenix`.
+Additional comparison presets include `protenix_dual`, `rf3_protenix`, and RF3
+variants.
+
 Presets live in `experiments/*.toml` in your local checkout and on the pod at
 `/home/dev/workspace/experiments/*.toml`. To modify an experiment, edit or copy
 a preset locally, let ACTL sync it, then run it by name or path:
diff --git a/experiments/boltz.toml b/experiments/boltz.toml
@@ -0,0 +1,30 @@
+description = "Boltz2 X-ray and MD canonical occ-sweep jobs."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpus = "0,1"
+output_subdir = "boltz2_xrd"
+args = { model = "boltz2", method = "X-RAY DIFFRACTION", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpus = "2,3"
+output_subdir = "boltz2_md"
+args = { model = "boltz2", method = "MD", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
diff --git a/experiments/boltz1.toml b/experiments/boltz1.toml
@@ -0,0 +1,27 @@
+description = "Boltz1 canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz1"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+BOLTZ1_CHECKPOINT = "/checkpoints/boltz1_conf.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz1"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+model-checkpoint = "${BOLTZ1_CHECKPOINT}"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz1"
+env = "boltz"
+gpus = "0,1"
+output_subdir = "boltz1"
+args = {}
diff --git a/experiments/boltz2.toml b/experiments/boltz2.toml
@@ -0,0 +1,30 @@
+description = "Boltz2 X-ray and MD canonical occ-sweep jobs."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpus = "0,1"
+output_subdir = "boltz2_xrd"
+args = { model = "boltz2", method = "X-RAY DIFFRACTION", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpus = "2,3"
+output_subdir = "boltz2_md"
+args = { model = "boltz2", method = "MD", gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5" }
diff --git a/experiments/boltz2_md.toml b/experiments/boltz2_md.toml
@@ -0,0 +1,26 @@
+description = "Boltz2 MD canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2_md"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz2"
+method = "MD"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_md"
+env = "boltz"
+gpus = "0,1"
+output_subdir = "boltz2_md"
+args = {}
diff --git a/experiments/boltz2_xrd.toml b/experiments/boltz2_xrd.toml
@@ -0,0 +1,26 @@
+description = "Boltz2 X-ray canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/boltz2_xrd"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "boltz2"
+method = "X-RAY DIFFRACTION"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "boltz2_xrd"
+env = "boltz"
+gpus = "0,1"
+output_subdir = "boltz2_xrd"
+args = {}
diff --git a/experiments/protenix.toml b/experiments/protenix.toml
@@ -0,0 +1,25 @@
+description = "Protenix canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/protenix"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "protenix"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.05 0.1 0.2 0.35 0.5"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "protenix"
+env = "protenix"
+gpus = "0,1"
+output_subdir = "protenix"
+args = {}
diff --git a/experiments/rf3.toml b/experiments/rf3.toml
@@ -0,0 +1,27 @@
+description = "RF3 canonical occ-sweep job."
+
+[defaults]
+DATA_DIR = "/data/inputs"
+RESULTS_DIR = "/data/results/rf3"
+MSA_CACHE_DIR = "/root/.sampleworks"
+PROTEINS_CSV = "${DATA_DIR}/proteins.csv"
+RF3_CHECKPOINT = "/checkpoints/rf3_foundry_01_24_latest.ckpt"
+
+[shared_args]
+proteins = "${PROTEINS_CSV}"
+model = "rf3"
+scalers = "pure_guidance"
+partial-diffusion-step = 120
+ensemble-sizes = "8"
+gradient-weights = "0.0 0.005 0.01 0.02 0.035 0.05 0.1"
+model-checkpoint = "${RF3_CHECKPOINT}"
+gradient-normalization = true
+augmentation = true
+align-to-input = true
+
+[[jobs]]
+name = "rf3"
+env = "rf3"
+gpus = "0,1"
+output_subdir = "rf3"
+args = {}
diff --git a/src/sampleworks/runs/cli.py b/src/sampleworks/runs/cli.py
@@ -67,16 +67,17 @@ def _build_parser() -> argparse.ArgumentParser:
         prog="sampleworks-runs",
         description=(
             "Run Sampleworks experiment presets. With no target, runs the "
-            "full_8gpu preset. A target like 'rf3' or 'rf3,protenix' is a "
-            "job shortcut from full_8gpu; use --preset for another TOML preset."
+            "full_8gpu preset. A target like 'rf3', 'boltz', or 'protenix' "
+            "runs that preset; comma-separated targets like 'rf3,protenix' "
+            "select jobs from full_8gpu."
         ),
     )
     parser.add_argument(
         "target",
         nargs="?",
         help=(
-            "Job shortcut from full_8gpu (rf3, protenix, boltz2_xrd, "
-            "boltz2_md, or comma-separated), or 'full'/'full_8gpu'."
+            "Preset name from experiments/ (rf3, boltz, protenix, etc.), "
+            "comma-separated job shortcut from full_8gpu, or 'full'/'full_8gpu'."
         ),
     )
     parser.add_argument(
@@ -161,6 +162,9 @@ def _resolve_target(
     if target.endswith(".toml") or "/" in target:
         parser.error("pass custom preset paths with --preset path/to/preset.toml")
 
+    if "," not in target and target in loader.list_presets():
+        return target, ""
+
     return DEFAULT_PRESET, target
 
 
diff --git a/tests/runs/test_cli.py b/tests/runs/test_cli.py
@@ -14,10 +14,17 @@ def test_list_prints_all_experiment_presets(capsys: pytest.CaptureFixture[str])
     assert exit_code == 0
     out = capsys.readouterr().out.splitlines()
     assert set(out) == {
+        "boltz",
+        "boltz1",
+        "boltz2",
+        "boltz2_md",
+        "boltz2_xrd",
         "full_8gpu",
+        "protenix",
+        "protenix_dual",
+        "rf3",
         "rf3_partial",
         "rf3_partial_chiral_off",
-        "protenix_dual",
         "rf3_protenix",
     }
 
@@ -69,6 +76,31 @@ def test_job_shortcut_filters_default_preset(
     assert "boltz2_md" not in out
 
 
+def test_model_target_uses_named_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """A single model target resolves to the matching standalone preset."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["boltz", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: boltz" in out
+    assert "name: boltz2_xrd" in out
+    assert "name: boltz2_md" in out
+
+
+def test_boltz1_target_uses_named_preset(
+    monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
+) -> None:
+    """The Boltz1 model has its own standalone preset target."""
+    monkeypatch.setenv("HOME", "/home/test")
+    exit_code = cli.main(["boltz1", "--show"])
+    assert exit_code == 0
+    out = capsys.readouterr().out
+    assert "name: boltz1" in out
+    assert "output_subdir: boltz1" in out
+
+
 def test_jobs_filters_explicit_preset(
     monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]
 ) -> None:
diff --git a/tests/runs/test_loader.py b/tests/runs/test_loader.py
@@ -8,10 +8,23 @@
 from sampleworks.runs import loader
 
 
-BUNDLED = ["full_8gpu", "rf3_partial", "rf3_partial_chiral_off", "protenix_dual", "rf3_protenix"]
-
-
-def test_list_presets_returns_the_five() -> None:
+BUNDLED = [
+    "boltz",
+    "boltz1",
+    "boltz2",
+    "boltz2_md",
+    "boltz2_xrd",
+    "full_8gpu",
+    "protenix",
+    "protenix_dual",
+    "rf3",
+    "rf3_partial",
+    "rf3_partial_chiral_off",
+    "rf3_protenix",
+]
+
+
+def test_list_presets_returns_bundled_experiments() -> None:
     """Preset discovery returns the expected bundled experiment names."""
     names = loader.list_presets()
     assert set(names) == set(BUNDLED), f"unexpected experiment presets: {names}"