diff-use · coderabbitai · Mar 16, 2026
diff --git a/README.md b/README.md
@@ -88,8 +88,8 @@ Output files appear in `output/boltz2_pure_guidance/`: `refined.cif` (final ense
 ```bash
 pixi run -e boltz python run_grid_search.py \
     --proteins proteins.csv \
-    --models boltz2 \                # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
-    --methods "X-RAY DIFFRACTION" \  # only useful for Boltz-2, ignored otherwise
+    --model boltz2 \                 # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
+    --method "X-RAY DIFFRACTION" \   # only useful for Boltz-2, ignored otherwise
     --scalers pure_guidance \        # options: pure_guidance, fk_steering, or both as space-separated list
     --ensemble-sizes "1 4" \
     --gradient-weights "0.1 0.2" \
@@ -113,11 +113,11 @@ name,structure,density,resolution
 | Argument | Description | Default |
 |---|---|---|
 | `--proteins` | CSV with structure/density/resolution columns | required |
-| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required |
+| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | `boltz2` |
 | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` |
 | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` |
 | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` |
-| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` |
+| `--method` | Boltz-2 sampling method | `X-RAY DIFFRACTION` |
 | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` |
 | `--dry-run` | Print jobs without running them | off |
 | `--force-all` | Re-run including already-successful jobs | off |
@@ -126,7 +126,7 @@ name,structure,density,resolution
 
 Output layout: `grid_search_results/<protein>/<model>[_<method>]/<scaler>/ens<N>_gw<W>/`
 
-> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon.
+> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--step-scaler-type`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon.
 
 Instructions for running evaluation and metrics scripts are coming soon.
 
@@ -170,4 +170,4 @@ To develop on OS X, ensure you have [homebrew](https://brew.sh/) installed and r
 
 There are different (and as yet untested) environments for `boltz`. `protenix` won't currently work on a Mac due to
 the strict requirement of `triton` which requires an NVIDIA GPU. You may find similar issues with other environments.
-Debug as needed.
+Debug as needed.
diff --git a/run_all_models.sh b/run_all_models.sh
@@ -1,156 +1,119 @@
 #!/bin/bash
-# Run all 4 model grid searches in parallel, 2 GPUs each
-# Total: 8 GPUs used (4 jobs x 2 GPUs each)
+# Run all model grid searches in parallel: Boltz1, Boltz2, Protenix, and RF3
+# Total: 16 GPUs used (4 jobs x 4 GPUs each)
 #
-# Models:
-#   - Boltz2 X-ray diffraction (GPUs 0,1)
-#   - Boltz2 MD               (GPUs 2,3)
-#   - RosettaFold3             (GPUs 4,5)
-#   - Protenix                 (GPUs 6,7)
-#
-# Checkpoints are BAKED INTO the Docker image at /checkpoints/.
-# If missing, the code auto-falls back to mounted paths.
+# Checkpoints are BAKED INTO the Docker image - no need to mount them!
 #
 # Usage:
 #   ./run_all_models.sh
 
 set -e
 
-# Configuration
-DATA_DIR="/mnt/diffuse-private/raw/sampleworks/initial_dataset_40_occ_sweeps"
-RESULTS_DIR="${RESULTS_DIR:-/data/sampleworks-exp/occ_sweep/grid_search_results}"
-MSA_CACHE_DIR="${MSA_CACHE_DIR:-/data/sampleworks-exp/msa_cache}"
+# Configuration - uses absolute path to data
+DATA_DIR="/mnt/diffuse-private/raw/sampleworks/initial_dataset_40"
+RESULTS_DIR="${RESULTS_DIR:-$HOME/sampleworks-exp/grid_search_results}"
+# Docker image to use (override with IMAGE env var)
+IMAGE="${IMAGE:-diffuseproject/sampleworks:latest}"
 
-# Create directories
+# Create output directory
 mkdir -p "$RESULTS_DIR"
-mkdir -p "$MSA_CACHE_DIR"
-
-# Pull latest image (no-op if already up to date)
-echo "Pulling latest Docker image..."
-docker pull diffuseproject/sampleworks:latest
 
 # Common docker options
 DOCKER_OPTS="--rm --shm-size=16g"
 
 echo "=========================================="
-echo "Starting all model grid searches (4 jobs x 2 GPUs)"
+echo "Starting all model grid searches"
+echo "Models: boltz1, boltz2, protenix, rf3"
 echo "Data: $DATA_DIR"
 echo "Results: $RESULTS_DIR"
-echo "MSA Cache: $MSA_CACHE_DIR"
-echo "Checkpoints: BAKED INTO IMAGE (with mount fallback)"
-echo ""
-echo "Models:"
-echo "  - Boltz2 X-ray (GPUs 0,1)"
-echo "  - Boltz2 MD    (GPUs 2,3)"
-echo "  - RF3          (GPUs 4,5)"
-echo "  - Protenix     (GPUs 6,7)"
+echo "Image: $IMAGE"
+echo "Checkpoints: BAKED INTO IMAGE"
 echo "=========================================="
 
-PIDS=()
-
-# --- Boltz2 X-ray Diffraction (GPUs 0,1) ---
-echo "[$(date)] Starting Boltz2 X-ray on GPUs 0,1"
-docker run $DOCKER_OPTS \
-    --gpus '"device=0,1"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    diffuseproject/sampleworks:latest \
-    -e boltz run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --models boltz2 \
-    --methods "X-RAY DIFFRACTION" \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/boltz2_xrd_run.log" &
-PIDS+=($!)
-echo "[$(date)] Boltz2 X-ray job started (PID: ${PIDS[-1]})"
-
-# --- Boltz2 MD (GPUs 2,3) ---
-echo "[$(date)] Starting Boltz2 MD on GPUs 2,3"
-docker run $DOCKER_OPTS \
-    --gpus '"device=2,3"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    diffuseproject/sampleworks:latest \
-    -e boltz run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --models boltz2 \
-    --methods "MD" \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/boltz2_md_run.log" &
-PIDS+=($!)
-echo "[$(date)] Boltz2 MD job started (PID: ${PIDS[-1]})"
-
-# --- RosettaFold3 (GPUs 4,5) ---
-echo "[$(date)] Starting RosettaFold3 on GPUs 4,5"
-docker run $DOCKER_OPTS \
-    --gpus '"device=4,5"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    diffuseproject/sampleworks:latest \
-    -e rf3 run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --models rf3 \
-    --partial-diffusion-step 120 \
-    --scalers pure_guidance \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.01 0.02 0.05" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/rf3_run.log" &
-PIDS+=($!)
-echo "[$(date)] RosettaFold3 job started (PID: ${PIDS[-1]})"
-
-# --- Protenix (GPUs 6,7) ---
-echo "[$(date)] Starting Protenix on GPUs 6,7"
-docker run $DOCKER_OPTS \
-    --gpus '"device=6,7"' \
-    -v "$DATA_DIR:/data/inputs:ro" \
-    -v "$RESULTS_DIR:/data/results" \
-    -v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-    diffuseproject/sampleworks:latest \
-    -e protenix run_grid_search.py \
-    --proteins "/data/inputs/proteins.csv" \
-    --models protenix \
-    --scalers pure_guidance \
-    --partial-diffusion-step 120 \
-    --ensemble-sizes "8" \
-    --gradient-weights "0.1 0.2 0.5" \
-    --gradient-normalization --augmentation --align-to-input \
-    --output-dir /data/results \
-    2>&1 | tee "$RESULTS_DIR/protenix_run.log" &
-PIDS+=($!)
-echo "[$(date)] Protenix job started (PID: ${PIDS[-1]})"
+# Track background job PIDs
+declare -a PIDS=()
+declare -a PID_NAMES=()
+
+# Function to run a model with specific GPUs
+# Usage: run_model <model> <env> <gpus> [extra_args...]
+run_model() {
+    local model=$1
+    local env=$2
+    local gpus=$3
+    shift 3
+    local extra_args=("$@")
+
+    echo "[$(date)] Starting $model on GPUs $gpus"
+
+    docker run $DOCKER_OPTS \
+        --gpus "\"device=$gpus\"" \
+        -v /mnt/diffuse-private:/mnt/diffuse-private:ro \
+        -v "$RESULTS_DIR:/data/results" \
+        "$IMAGE" \
+        -e "$env" run_grid_search.py \
+        --proteins "$DATA_DIR/proteins.csv" \
+        --model "$model" \
+        --scalers "pure_guidance" \
+        --ensemble-sizes "1 4" \
+        --gradient-weights "0.1 0.2" \
+        --gradient-normalization --augmentation --align-to-input \
+        --output-dir /data/results \
+        "${extra_args[@]}" \
+        2>&1 | tee "$RESULTS_DIR/${model}_run.log" &
+
+    PIDS+=($!)
+    PID_NAMES+=("$model")
+    echo "[$(date)] $model job started (PID: $!)"
+}
+
+# Run all four models in parallel with 4 GPUs each:
+# - boltz1:   GPUs 0,1,2,3
+# - boltz2:   GPUs 4,5,6,7
+# - protenix: GPUs 8,9,10,11
+# - rf3:      GPUs 12,13,14,15
+
+# Boltz1 (GPUs 0-3) - checkpoints baked in, uses defaults
+run_model "boltz1" "boltz" "0,1,2,3"
+
+# Boltz2 (GPUs 4-7) - needs --method flag
+run_model "boltz2" "boltz" "4,5,6,7" --method "X-RAY DIFFRACTION"
+
+# Protenix (GPUs 8-11)
+run_model "protenix" "protenix" "8,9,10,11"
+
+# RF3 (GPUs 12-15)
+run_model "rf3" "rf3" "12,13,14,15"
 
 echo ""
 echo "=========================================="
-echo "All 4 jobs launched! PIDs: ${PIDS[*]}"
+echo "All model jobs launched!"
 echo "Logs:"
-echo "  - $RESULTS_DIR/boltz2_xrd_run.log"
-echo "  - $RESULTS_DIR/boltz2_md_run.log"
-echo "  - $RESULTS_DIR/rf3_run.log"
+echo "  - $RESULTS_DIR/boltz1_run.log"
+echo "  - $RESULTS_DIR/boltz2_run.log"
 echo "  - $RESULTS_DIR/protenix_run.log"
+echo "  - $RESULTS_DIR/rf3_run.log"
 echo ""
 echo "Monitor GPU usage: nvidia-smi -l 1"
 echo "Waiting for all jobs to complete..."
 echo "=========================================="
 
-# Wait for all background jobs
-wait
+# Wait for all background jobs and check exit codes
+overall_exit=0
+for i in "${!PIDS[@]}"; do
+    if wait "${PIDS[$i]}"; then
+        echo "[$(date)] ${PID_NAMES[$i]} completed successfully"
+    else
+        echo "[$(date)] ${PID_NAMES[$i]} FAILED (exit code: $?)"
+        overall_exit=1
+    fi
+done
 
 echo ""
 echo "=========================================="
-echo "[$(date)] All jobs completed!"
+if [ $overall_exit -eq 0 ]; then
+    echo "[$(date)] All jobs completed successfully!"
+else
+    echo "[$(date)] Some jobs FAILED — check logs above"
+fi
 echo "=========================================="
+exit $overall_exit
diff --git a/scripts/eval/rscc_grid_search_script.py b/scripts/eval/rscc_grid_search_script.py
@@ -322,4 +322,4 @@ def main(args: argparse.Namespace):
 
 if __name__ == "__main__":
     args = parse_args("Evaluate RSCC on grid search results.")
-    main(args)
+    main(args)
diff --git a/tests/integration/test_mismatch_integration.py b/tests/integration/test_mismatch_integration.py
@@ -12,7 +12,7 @@
 from atomworks.io.transforms.atom_array import ensure_atom_array_stack
 from biotite.structure import AtomArray
 from sampleworks.core.rewards.protocol import RewardInputs
-from sampleworks.core.samplers.edm import AF3EDMSampler, EDMSamplerConfig
+from sampleworks.core.samplers.edm import AF3EDMSampler
 from sampleworks.core.samplers.protocol import StepParams
 from sampleworks.core.scalers.fk_steering import FKSteering
 from sampleworks.core.scalers.pure_guidance import PureGuidance
@@ -720,14 +720,13 @@ class TestSamplerStep:
     @pytest.fixture
     def sampler(self) -> AF3EDMSampler:
         """Sampler configured for deterministic mismatch tests."""
-        config = EDMSamplerConfig(
+        return AF3EDMSampler(
             augmentation=False,
             align_to_input=True,
             alignment_reverse_diffusion=False,
             scale_guidance_to_diffusion=True,
             device="cpu",
         )
-        return AF3EDMSampler(config)
 
     def _context_with_reference(
         self,
@@ -783,14 +782,13 @@ def test_alignment_reduces_rmsd(self, mismatch_case: MismatchCase, sampler: AF3E
         state = torch.randn(1, mismatch_case.n_model, 3)
         context = self._context_with_reference(reconciler, reference)
 
-        config_no_align = EDMSamplerConfig(
+        sampler_no_align = AF3EDMSampler(
             augmentation=False,
             align_to_input=False,
             alignment_reverse_diffusion=False,
             scale_guidance_to_diffusion=True,
             device="cpu",
         )
-        sampler_no_align = AF3EDMSampler(config_no_align)
 
         torch.manual_seed(42)
         output_aligned = sampler.step(state.clone(), wrapper, context, features=features)
@@ -879,8 +877,7 @@ def _run_scaler(self, case: MismatchCase, scaler_type: str, reward) -> Any:
             "asym_unit": case.struct_atom_array.copy(),
             "metadata": {"id": case.id},
         }
-        config = EDMSamplerConfig(augmentation=False, align_to_input=True, device="cpu")
-        sampler = AF3EDMSampler(config)
+        sampler = AF3EDMSampler(augmentation=False, align_to_input=True, device="cpu")
         step_scaler = DataSpaceDPSScaler(step_size=0.01)
 
         if scaler_type == "pure_guidance":
@@ -1023,4 +1020,4 @@ def test_save_with_model_template(self, tmp_path: Path):
         )
 
         assert (tmp_path / "refined.cif").exists()
-        assert (tmp_path / "losses.txt").exists()
+        assert (tmp_path / "losses.txt").exists()