diff --git a/Dockerfile b/Dockerfile index 20a3d675..45ca0ae0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,7 +27,6 @@ # --ensemble-sizes "1 4" \ # --gradient-weights "0.1 0.2" \ # --output-dir /data/results \ -# --use-tweedie \ # --gradient-normalization \ # --augmentation \ # --align-to-input @@ -41,8 +40,7 @@ # --methods "X-RAY DIFFRACTION" \ # --ensemble-sizes "1 4" \ # --gradient-weights "0.1 0.2" \ -# --output-dir /data/results \ -# --use-tweedie +# --output-dir /data/results # # # Interactive shell # docker run --gpus all -it sampleworks bash @@ -64,6 +62,9 @@ # ============================================================================ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS base +ARG SAMPLEWORKS_GIT_SHA=unknown +ARG SAMPLEWORKS_IMAGE_TAG=unknown + ENV DEBIAN_FRONTEND=noninteractive \ # Pixi configuration PIXI_HOME=/root/.pixi \ @@ -74,7 +75,12 @@ ENV DEBIAN_FRONTEND=noninteractive \ # Disable user site packages (isolation) PYTHONNOUSERSITE=1 \ # Optimize CUDA compilation for H100 - TORCH_CUDA_ARCH_LIST="9.0" + TORCH_CUDA_ARCH_LIST="9.0" \ + SAMPLEWORKS_GIT_SHA=${SAMPLEWORKS_GIT_SHA} \ + SAMPLEWORKS_IMAGE_TAG=${SAMPLEWORKS_IMAGE_TAG} + +LABEL org.opencontainers.image.revision=${SAMPLEWORKS_GIT_SHA} \ + org.opencontainers.image.version=${SAMPLEWORKS_IMAGE_TAG} # Install system dependencies required for building scientific packages RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/GRID_SEARCH.md b/GRID_SEARCH.md index 154d00c5..e50bfb16 100644 --- a/GRID_SEARCH.md +++ b/GRID_SEARCH.md @@ -26,8 +26,8 @@ protein structure, density map, and resolution columns, described below. ```bash pixi run -e boltz python run_grid_search.py \ --proteins proteins.csv \ - --models boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) - --methods "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise + --model boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) + --method "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise --scalers pure_guidance \ # options: pure_guidance, fk_steering, or both as space-separated list --ensemble-sizes "1 4" \ --gradient-weights "0.1 0.2" \ @@ -51,11 +51,11 @@ name,structure,density,resolution | Argument | Description | Default | |----------------------|------------------------------------------------------------|-----------------------------| | `--proteins` | CSV with structure/density/resolution columns | required | -| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | +| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` | | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` | | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` | -| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | +| `--method` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` | | `--dry-run` | Print jobs without running them | off | | `--force-all` | Re-run including already-successful jobs | off | @@ -65,7 +65,7 @@ name,structure,density,resolution | `--track-chiral-features` | Track RF3 chiral gradient magnitude | off | > **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. -> Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the +> Some flags (e.g., `--gradient-normalization`) are not reflected in the > directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to > re-run all jobs regardless. This is under active development and will likely change soon. diff --git a/README.md b/README.md index 7087c139..84d63cdf 100644 --- a/README.md +++ b/README.md @@ -88,8 +88,8 @@ Output files appear in `output/boltz2_pure_guidance/`: `refined.cif` (final ense ```bash pixi run -e boltz python run_grid_search.py \ --proteins proteins.csv \ - --models boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) - --methods "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise + --model boltz2 \ # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!) + --method "X-RAY DIFFRACTION" \ # only useful for Boltz-2, ignored otherwise --scalers pure_guidance \ # options: pure_guidance, fk_steering, or both as space-separated list --ensemble-sizes "1 4" \ --gradient-weights "0.1 0.2" \ @@ -113,11 +113,11 @@ name,structure,density,resolution | Argument | Description | Default | |---|---|---| | `--proteins` | CSV with structure/density/resolution columns | required | -| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | +| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required | | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` | | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` | | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` | -| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | +| `--method` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` | | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` | | `--dry-run` | Print jobs without running them | off | | `--force-all` | Re-run including already-successful jobs | off | @@ -126,7 +126,7 @@ name,structure,density,resolution Output layout: `grid_search_results//[_]//ens_gw/` -> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon. +> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon. Instructions for running evaluation and metrics scripts are coming soon. diff --git a/diffuse.yaml b/diffuse.yaml new file mode 100644 index 00000000..bdec6330 --- /dev/null +++ b/diffuse.yaml @@ -0,0 +1,60 @@ +# diffuse.yaml -- the contract between Sampleworks and Diffuse. +# +# Apply with: diffuse apply +# Run with: diffuse run sampleworks --params params.json --output-dir /data/results/run-001 +# +# Idempotent -- safe to apply repeatedly. + +version: 1 + +type: sampleworks +name: Sampleworks +description: > + Generic Sampleworks experiment runner. Diffuse accepts a flexible params.json + blob, stores it for traceability, materializes it inside the pod, and lets + Sampleworks own validation and model-specific execution. + +profiles: + - slug: sampleworks + name: Sampleworks + container: + image: diffuseproject/sampleworks + tag: latest + gpus: + min: 2 + entrypoint: null + input_schema: + inputs: + - key: params_json + type: json + required: true + description: Arbitrary Sampleworks-owned params.json object. + - key: output_dir + type: text + required: true + default: /data/results + description: Container output directory, usually under /data/results. + run_config_defaults: + shared_memory_size: "16Gi" + image_pull_policy: IfNotPresent + poll_max_retries: 720 + params_file: + param_key: params_json + path: /diffuse/input/params.json + volumes: + - name: data-input + hostPath: /data/sampleworks-exp/input + mountPath: /data/input + readOnly: true + - name: data-results + hostPath: /data/sampleworks-exp/grid_search_results + mountPath: /data/results + - name: msa-cache + hostPath: /data/sampleworks-exp/msa_cache + mountPath: /root/.sampleworks/msa + args_template: + base_command: [--params, /diffuse/input/params.json] + flag_args: + output_dir: --output-dir + boolean_args: {} + static_args: [] diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index d89abc5d..45ca3bb2 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -4,6 +4,7 @@ # Usage: # docker run sampleworks -e