PR 205 feedback fixes

marcuscollins · marcuscollins · commit ec53661df2ff · 2026-04-09T16:00:36.000-07:00
diff --git a/run_grid_search.py b/run_grid_search.py
@@ -450,7 +450,15 @@ def parse_args() -> argparse.Namespace:
         "--recycling-steps",
         type=int,
         default=None,
-        help="Number of recycling steps for model inference (if not specified, uses model default)",
+        help="Number of recycling steps for model inference. If not specified, "
+        "uses model default, which can be found in each model's wrapper.py file",
+    )
+    parser.add_argument(
+        "--num-diffusion-steps",
+        type=int,
+        default=200,
+        help="Number of diffusion steps for model inference. If not specified, "
+        "uses model default, which can be found in each model's wrapper.py file",
     )
 
     # Trajectory scaling arguments
@@ -460,9 +468,6 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--ensemble-sizes", default="1 2 4 8", help="Space-separated ensemble sizes"
     )
-    parser.add_argument(
-        "--num-diffusion-steps", type=int, default=200, help="Number of diffusion steps"
-    )
     parser.add_argument(
         "--gradient-weights",
         default="0.01 0.1 0.2",
diff --git a/src/sampleworks/models/boltz/wrapper.py b/src/sampleworks/models/boltz/wrapper.py
@@ -331,7 +331,7 @@ def process_structure_for_boltz(
     out_dir: str | Path | None = None,
     num_workers: int = 8,
     ensemble_size: int = 1,
-    recycling_steps: int = 3,
+    recycling_steps: int | None = 3,
 ) -> dict:
     """Annotate an Atomworks structure with Boltz-specific configuration.
 
@@ -346,14 +346,20 @@ def process_structure_for_boltz(
         Number of parallel workers for preprocessing.
     ensemble_size : int
         Number of samples to generate (batch dimension of x_init).
-    recycling_steps : int
+    recycling_steps : int | None
         Number of recycling steps to perform during featurization Pairformer pass.
+        Will set to 3 if None.
 
     Returns
     -------
     dict
         Structure dict with "_boltz_config" key added.
     """
+    # Other models define a default deeper in their code,
+    # but Boltz requires an integer value, so fix it here.
+    if recycling_steps is None:
+        recycling_steps = 3
+
     config = BoltzConfig(
         out_dir=out_dir or structure.get("metadata", {}).get("id", "boltz_output"),
         num_workers=num_workers,
@@ -603,7 +609,7 @@ def _setup_data_module(
 
         processed_dir = out_dir / "processed"
         processed = BoltzProcessedInput(
-            manifest=Manifest.load(processed_dir / "manifest.json"),  # type: ignore (Boltz repo doesn't have the right type hints?)
+            manifest=Manifest.load(processed_dir / "manifest.json"),
             targets_dir=processed_dir / "structures",
             msa_dir=processed_dir / "msa",
             constraints_dir=(processed_dir / "constraints")
@@ -783,27 +789,25 @@ def _pairformer_pass(
 
             if self.model.use_templates:
                 if self.model.is_template_compiled:
-                    template_module = (
-                        self.model.template_module._orig_mod  # type: ignore (compiled torch module has this attribute, type checker doesn't know)
-                    )
+                    template_module = self.model.template_module._orig_mod
                 else:
                     template_module = self.model.template_module
 
-                z = z + template_module(z, features, pair_mask, use_kernels=self.model.use_kernels)  # type: ignore (Object will be callable here)
+                z = z + template_module(z, features, pair_mask, use_kernels=self.model.use_kernels)
 
             if self.model.is_msa_compiled:
-                msa_module = self.model.msa_module._orig_mod  # type: ignore (compiled torch module has this attribute, type checker doesn't know)
+                msa_module = self.model.msa_module._orig_mod
             else:
                 msa_module = self.model.msa_module
 
-            z = z + msa_module(z, s_inputs, features, use_kernels=self.model.use_kernels)  # type: ignore (Object will be callable here)
+            z = z + msa_module(z, s_inputs, features, use_kernels=self.model.use_kernels)
 
             if self.model.is_pairformer_compiled:
-                pairformer_module = self.model.pairformer_module._orig_mod  # type: ignore (compiled torch module has this attribute, type checker doesn't know)
+                pairformer_module = self.model.pairformer_module._orig_mod
             else:
                 pairformer_module = self.model.pairformer_module
 
-            s, z = pairformer_module(s, z, mask=mask, pair_mask=pair_mask)  # type: ignore (Object will be callable here)
+            s, z = pairformer_module(s, z, mask=mask, pair_mask=pair_mask)
 
         q, c, to_keys, atom_enc_bias, atom_dec_bias, token_trans_bias = (
             self.model.diffusion_conditioning(
@@ -1068,7 +1072,7 @@ def _setup_data_module(
 
         processed_dir = out_dir / "processed"
         processed = BoltzProcessedInput(
-            manifest=Manifest.load(processed_dir / "manifest.json"),  # type: ignore (Boltz repo doesn't have the right type hints?)
+            manifest=Manifest.load(processed_dir / "manifest.json"),
             targets_dir=processed_dir / "structures",
             msa_dir=processed_dir / "msa",
             constraints_dir=(processed_dir / "constraints")
@@ -1357,7 +1361,7 @@ def _pairformer_pass(
                 )
 
             if self.model.is_pairformer_compiled:
-                pairformer_module = self.model.pairformer_module._orig_mod  # type: ignore (compiled torch module has this attribute, type checker doesn't know)
+                pairformer_module = self.model.pairformer_module._orig_mod
             else:
                 pairformer_module = self.model.pairformer_module
 
@@ -1367,7 +1371,7 @@ def _pairformer_pass(
                 mask=mask,
                 pair_mask=pair_mask,
                 use_kernels=self.model.use_kernels,
-            )  # type: ignore (Object will be callable here)
+            )
 
         return {
             "s": s,
diff --git a/src/sampleworks/models/protenix/wrapper.py b/src/sampleworks/models/protenix/wrapper.py
@@ -687,9 +687,9 @@ def step(
             s_inputs=s_inputs,
             s_trunk=s_trunk,
             z_trunk=z_trunk,
-            pair_z=pair_z,  # ty: ignore[invalid-argument-type]
-            p_lm=p_lm,  # ty: ignore[invalid-argument-type]
-            c_l=c_l,  # ty: ignore[invalid-argument-type]
+            pair_z=pair_z,
+            p_lm=p_lm,
+            c_l=c_l,
         )
 
         # TODO: is there a way to handle this more cleanly?
diff --git a/src/sampleworks/models/rf3/wrapper.py b/src/sampleworks/models/rf3/wrapper.py
@@ -326,7 +326,7 @@ def featurize(self, structure: dict) -> GenerativeModelInput[RF3Conditioning]:
         )  # since we're not batching, the loader returns a list of length 1
 
         # (Hydra instantiation of pipeline means it is going to be hard to type check here)
-        pipeline_output = self.inference_engine.pipeline(input_spec.to_pipeline_input())  # ty: ignore[call-non-callable]
+        pipeline_output = self.inference_engine.pipeline(input_spec.to_pipeline_input())  # type: ignore[call-non-callable]
         pipeline_output = trainer.fabric.to_device(pipeline_output)
 
         features = trainer._assemble_network_inputs(pipeline_output)
diff --git a/src/sampleworks/utils/guidance_script_utils.py b/src/sampleworks/utils/guidance_script_utils.py
@@ -27,7 +27,6 @@
 from sampleworks.core.scalers.pure_guidance import PureGuidance
 from sampleworks.core.scalers.step_scalers import (
     DataSpaceDPSScaler,
-    NoScalingScaler,
     NoiseSpaceDPSScaler,
     NoScalingScaler,
 )
@@ -232,7 +231,7 @@ def get_reward_function_and_structure(
     logger.debug(f"Loading structure from {structure_path}")
     safe_structure_path = resolve_mixed_hetatm_atom_altlocs(Path(structure_path))
     structure = parse(
-        Path(safe_structure_path),
+        safe_structure_path,
         hydrogen_policy="remove",
         add_missing_atoms=False,
         ccd_mirror_path=None,
@@ -428,14 +427,22 @@ def _run_guidance(
     is_boltz = "Boltz" in wrapper_class_name
 
     # Annotate structure with model-specific configuration (including recycling_steps)
+    # See https://github.com/diff-use/sampleworks/issues/192 for a plan to organize this better.
     recycling_steps = getattr(args, "recycling_steps", None)
+    if recycling_steps is not None and recycling_steps <= 0:
+        raise ValueError("recycling_steps must be > 0")
+    if args.num_diffusion_steps is not None and args.num_diffusion_steps <= 0:
+        raise ValueError("num_diffusion_steps must be > 0")
+
     if "Protenix" in wrapper_class_name:
         from sampleworks.models.protenix.wrapper import annotate_structure_for_protenix
+
         structure = annotate_structure_for_protenix(
             structure, ensemble_size=args.ensemble_size, recycling_steps=recycling_steps
         )
     elif "RF3" in wrapper_class_name:
         from sampleworks.models.rf3.wrapper import annotate_structure_for_rf3
+
         structure = annotate_structure_for_rf3(
             structure,
             ensemble_size=args.ensemble_size,
@@ -446,6 +453,7 @@ def _run_guidance(
         )
     elif "Boltz" in wrapper_class_name:
         from sampleworks.models.boltz.wrapper import process_structure_for_boltz
+
         structure = process_structure_for_boltz(
             structure, ensemble_size=args.ensemble_size, recycling_steps=recycling_steps
         )