diff --git a/run_all_models.sh b/run_all_models.sh index c958a7cf..76e9f6a1 100755 --- a/run_all_models.sh +++ b/run_all_models.sh @@ -58,8 +58,8 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e boltz run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models boltz2 \ - --methods "X-RAY DIFFRACTION" \ + --model boltz2 \ + --method "X-RAY DIFFRACTION" \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ @@ -80,8 +80,8 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e boltz run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models boltz2 \ - --methods "MD" \ + --model boltz2 \ + --method "MD" \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ @@ -102,7 +102,7 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e rf3 run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models rf3 \ + --model rf3 \ --partial-diffusion-step 120 \ --scalers pure_guidance \ --ensemble-sizes "8" \ @@ -123,7 +123,7 @@ docker run $DOCKER_OPTS \ diffuseproject/sampleworks:latest \ -e protenix run_grid_search.py \ --proteins "/data/inputs/proteins.csv" \ - --models protenix \ + --model protenix \ --scalers pure_guidance \ --partial-diffusion-step 120 \ --ensemble-sizes "8" \ diff --git a/run_grid_search.py b/run_grid_search.py index 10aa04e7..8aaafa78 100755 --- a/run_grid_search.py +++ b/run_grid_search.py @@ -25,12 +25,12 @@ @dataclass class GridSearchConfig: - models: list[str] + model: str scalers: list[str] ensemble_sizes: list[int] gradient_weights: list[float] gd_steps: list[int] - methods: list[str] + method: str proteins_file: str output_dir: str @@ -244,13 +244,6 @@ def main(args: argparse.Namespace): log_args(args, gpus) - if len(args.models.split()) > 1: - # this is designed to run one type of model per script, # TODO to allow multiple models - raise ValueError("Multiple --models selected, this is not compatible with the new script!") - if len(args.methods.split(",")) > 1: - # this is designed to run one type of model per script, # TODO to allow multiple models - raise ValueError("Multiple --methods selected, this is not compatible with the new script!") - filtered_jobs, job_statuses = generate_and_filter_jobs(args) if len(filtered_jobs) == 0: @@ -258,12 +251,12 @@ def main(args: argparse.Namespace): return config = GridSearchConfig( - models=args.models.split(), + model=args.model, scalers=args.scalers.split(), ensemble_sizes=[int(x) for x in args.ensemble_sizes.split()], gradient_weights=[float(x) for x in args.gradient_weights.split()], gd_steps=[int(x) for x in args.num_gd_steps.split()], - methods=[m.strip() for m in args.methods.split(",")], + method=args.method, proteins_file=args.proteins, output_dir=args.output_dir, ) @@ -284,81 +277,75 @@ def generate_jobs(args: argparse.Namespace) -> list[JobConfig]: jobs = [] proteins = ProteinInput.from_csv(Path(args.proteins)) - models = args.models.split() + model = args.model scalers = args.scalers.split() ensemble_sizes = [int(x) for x in args.ensemble_sizes.split()] gradient_weights = [float(x) for x in args.gradient_weights.split()] gd_steps_list = [int(x) for x in args.num_gd_steps.split()] - methods = [m.strip() for m in args.methods.split(",")] for protein in proteins: structure = protein.structure - density = str(protein.density) # in case patch for Path in qfit.volume doesn't work + density = str(protein.density) resolution = protein.resolution protein_name = protein.name - for model in models: - model_methods = methods if model == StructurePredictor.BOLTZ_2 else [None] - - for method in model_methods: - method_suffix = f"_{method.replace(' ', '_')}" if method else "" - - for scaler in scalers: - if scaler == GuidanceType.FK_STEERING: - for ens in ensemble_sizes: - for gw in gradient_weights: - for gd in gd_steps_list: - output_dir = os.path.join( - args.output_dir, - protein_name, - f"{model}{method_suffix}", - scaler, - f"ens{ens}_gw{gw}_gd{gd}", - ) - log_path = os.path.join(output_dir, "run.log") - jobs.append( - JobConfig( - protein=protein_name, - structure_path=structure, - density_path=density, - resolution=resolution, - model=model, - scaler=scaler, - ensemble_size=ens, - gradient_weight=gw, - gd_steps=gd, - method=method, - output_dir=output_dir, - log_path=log_path, - ) - ) - else: - for ens in ensemble_sizes: - for gw in gradient_weights: - output_dir = os.path.join( - args.output_dir, - protein_name, - f"{model}{method_suffix}", - scaler, - f"ens{ens}_gw{gw}", - ) - log_path = os.path.join(output_dir, "run.log") - jobs.append( - JobConfig( - protein=protein_name, - structure_path=structure, - density_path=density, - resolution=resolution, - model=model, - scaler=scaler, - ensemble_size=ens, - gradient_weight=gw, - gd_steps=1, - method=method, - output_dir=output_dir, - log_path=log_path, - ) + method_suffix = f"_{args.method.replace(' ', '_')}" if args.method else "" + for scaler in scalers: + if scaler == GuidanceType.FK_STEERING: + for ens in ensemble_sizes: + for gw in gradient_weights: + for gd in gd_steps_list: + output_dir = os.path.join( + args.output_dir, + protein_name, + f"{model}{method_suffix}", + scaler, + f"ens{ens}_gw{gw}_gd{gd}", + ) + log_path = os.path.join(output_dir, "run.log") + jobs.append( + JobConfig( + protein=protein_name, + structure_path=structure, + density_path=density, + resolution=resolution, + model=model, + scaler=scaler, + ensemble_size=ens, + gradient_weight=gw, + gd_steps=gd, + method=args.method, + output_dir=output_dir, + log_path=log_path, ) + ) + else: + for ens in ensemble_sizes: + for gw in gradient_weights: + output_dir = os.path.join( + args.output_dir, + protein_name, + f"{model}{method_suffix}", + scaler, + f"ens{ens}_gw{gw}", + ) + log_path = os.path.join(output_dir, "run.log") + jobs.append( + JobConfig( + protein=protein_name, + structure_path=structure, + density_path=density, + resolution=resolution, + model=model, + scaler=scaler, + ensemble_size=ens, + gradient_weight=gw, + gd_steps=1, + method=args.method, + output_dir=output_dir, + log_path=log_path, + ) + ) return jobs @@ -431,16 +418,36 @@ def save_results( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( - description="Run grid search across models, scalers, and parameters." + description="Run grid search across scalers, and parameters for a single " + "protein structure predictor model." ) - + # Experiment level arguments parser.add_argument( "--proteins", required=True, - help="CSV file with columns: structure,density,resolution,name", + help="CSV file with columns: structure,density,resolution,name" + ) + + # Model arguments + parser.add_argument( + "--model", + default="boltz2", + choices=["boltz1", "boltz2", "protenix", "rf3"], + help="The protein structure predictor model to use" + ) + parser.add_argument( + "--model-checkpoint", + default="", + help="Override the default checkpoint path for the selected model" + ) + parser.add_argument( + "--method", + default="X-RAY DIFFRACTION", + choices=["X-RAY DIFFRACTION", "MD"], + help="Method for Boltz2 ('X-RAY DIFFRACTION', 'MD')", ) - parser.add_argument("--models", default="boltz2 protenix", help="Space-separated models") + # Trajectory scaling arguments parser.add_argument( "--scalers", default="pure_guidance fk_steering", help="Space-separated scalers" ) @@ -450,28 +457,22 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--gradient-weights", default="0.01 0.1 0.2", - help="Space-separated gradient weights", + help="Space-separated gradient weights/step sizes", + ) + parser.add_argument( + "--partial-diffusion-step", type=int, default=0, help="Partial diffusion step" ) parser.add_argument( "--num-gd-steps", default="20", help="Space-separated GD steps (FK steering only)", ) - parser.add_argument("--output-dir", default="./grid_search_results", help="Output directory") - parser.add_argument( - "--model-checkpoint", - default="", - help="Override the default checkpoint path for the selected model", + "--num-particles", type=int, default=3, help="FK steering: num particles" ) parser.add_argument( - "--methods", - default="X-RAY DIFFRACTION", - help="Comma-separated methods for Boltz2", + "--fk-lambda", type=float, default=0.5, help="FK steering: lambda" ) - - parser.add_argument("--num-particles", type=int, default=3, help="FK steering: num particles") - parser.add_argument("--fk-lambda", type=float, default=0.5, help="FK steering: lambda") parser.add_argument( "--fk-resampling-interval", type=int, @@ -479,11 +480,14 @@ def parse_args() -> argparse.Namespace: help="FK steering: resampling interval", ) + # Step Scaler arguments parser.add_argument( - "--partial-diffusion-step", type=int, default=0, help="Partial diffusion step" + "--step-scaler-type", + type=str, + default="noisespace", + choices=["dataspace", "noisespace", "none"], + help="Type of step scaler to use (pure guidance only)", ) - parser.add_argument("--loss-order", type=int, default=2, help="L1 (1) or L2 (2) loss") - parser.add_argument("--use-tweedie", action="store_true", help="Use Tweedie (pure guidance)") parser.add_argument( "--gradient-normalization", action="store_true", @@ -492,13 +496,23 @@ def parse_args() -> argparse.Namespace: parser.add_argument("--augmentation", action="store_true", help="Enable augmentation") parser.add_argument("--align-to-input", action="store_true", help="Align to input structure") + # Reward/Loss function arguments + parser.add_argument("--loss-order", type=int, default=2, help="L1 (1) or L2 (2) loss") + + # Output arguments + parser.add_argument("--output-dir", default="./grid_search_results", help="Output directory") + + # Arguments for choosing what to run and what hardware to use. parser.add_argument( "--max-parallel", default="auto", help="Max parallel jobs (default: auto = number of GPUs)", ) - parser.add_argument("--dry-run", action="store_true", help="Print commands without executing") - + parser.add_argument( + "--dry-run", + action="store_true", + help="Print commands without executing", + ) parser.add_argument( "--force-all", action="store_true", @@ -521,12 +535,13 @@ def parse_args() -> argparse.Namespace: def log_args(args: argparse.Namespace, gpus: list[str]): log.info("=" * 50) log.info("Starting grid search") - log.info(f"Models: {args.models}") + log.info(f"Model: {args.model}") + if args.model == "boltz2": + log.info(f"Boltz2 method: {args.method}") log.info(f"Scalers: {args.scalers}") log.info(f"Ensemble sizes: {args.ensemble_sizes}") log.info(f"Gradient weights: {args.gradient_weights}") log.info(f"GD steps: {args.num_gd_steps}") - log.info(f"Boltz2 methods: {args.methods}") log.info(f"Output directory: {args.output_dir}") log.info(f"GPUs: {gpus}") log.info(f"Dry run: {args.dry_run}") diff --git a/src/sampleworks/utils/guidance_script_arguments.py b/src/sampleworks/utils/guidance_script_arguments.py index ed91df40..509c64ae 100644 --- a/src/sampleworks/utils/guidance_script_arguments.py +++ b/src/sampleworks/utils/guidance_script_arguments.py @@ -185,7 +185,7 @@ def populate_config_for_guidance_type(self, job: JobConfig, args: argparse.Names self.ensemble_size = job.ensemble_size else: self.step_size = job.gradient_weight - self.use_tweedie = args.use_tweedie + self.step_scaler_type = args.step_scaler_type self.ensemble_size = job.ensemble_size @@ -246,9 +246,11 @@ def add_generic_args(parser: argparse.ArgumentParser | GuidanceConfig): def add_pure_guidance_args(parser: argparse.ArgumentParser | GuidanceConfig): parser.add_argument("--step-size", type=float, default=0.1, help="Gradient step") parser.add_argument( - "--use-tweedie", - action="store_true", - help="Use Tweedie's formula for gradient computation (enables augmentation/alignment)", + "--step-scaler-type", + type=str, + default="noisespace", + choices=["dataspace", "noisespace", "none"], + help="Type of step scaler to use: dataspace (DataSpaceDPSScaler), noisespace (NoiseSpaceDPSScaler), or none (NoScalingScaler)", ) diff --git a/src/sampleworks/utils/guidance_script_utils.py b/src/sampleworks/utils/guidance_script_utils.py index deea463e..bbefb327 100644 --- a/src/sampleworks/utils/guidance_script_utils.py +++ b/src/sampleworks/utils/guidance_script_utils.py @@ -27,6 +27,7 @@ from sampleworks.core.scalers.pure_guidance import PureGuidance from sampleworks.core.scalers.step_scalers import ( DataSpaceDPSScaler, + NoScalingScaler, NoiseSpaceDPSScaler, ) from sampleworks.utils.guidance_constants import ( @@ -435,17 +436,21 @@ def _run_guidance( ) # Create step scaler for gradient-based guidance - use_tweedie = getattr(args, "use_tweedie", False) - if use_tweedie: + step_scaler_type = getattr(args, "step_scaler_type", "noisespace") + if step_scaler_type == "dataspace": step_scaler = DataSpaceDPSScaler( step_size=args.step_size, gradient_normalization=args.gradient_normalization, ) - else: + elif step_scaler_type == "noisespace": step_scaler = NoiseSpaceDPSScaler( step_size=args.step_size, gradient_normalization=args.gradient_normalization, ) + elif step_scaler_type == "none": + step_scaler = NoScalingScaler() + else: + raise ValueError(f"Invalid step_scaler_type: {step_scaler_type}") # TODO: this should be a config option num_steps = 200