diff-use · xraymemory · Apr 22, 2026 · Apr 23, 2026 · Apr 27, 2026 · Apr 30, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -27,7 +27,6 @@
 #     --ensemble-sizes "1 4" \
 #     --gradient-weights "0.1 0.2" \
 #     --output-dir /data/results \
-#     --use-tweedie \
 #     --gradient-normalization \
 #     --augmentation \
 #     --align-to-input
@@ -41,8 +40,7 @@
 #     --methods "X-RAY DIFFRACTION" \
 #     --ensemble-sizes "1 4" \
 #     --gradient-weights "0.1 0.2" \
-#     --output-dir /data/results \
-#     --use-tweedie
+#     --output-dir /data/results
 #
 #   # Interactive shell
 #   docker run --gpus all -it sampleworks bash
@@ -64,6 +62,9 @@
 # ============================================================================
 FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS base
 
+ARG SAMPLEWORKS_GIT_SHA=unknown
+ARG SAMPLEWORKS_IMAGE_TAG=unknown
+
 ENV DEBIAN_FRONTEND=noninteractive \
     # Pixi configuration
     PIXI_HOME=/root/.pixi \
@@ -74,7 +75,12 @@ ENV DEBIAN_FRONTEND=noninteractive \
     # Disable user site packages (isolation)
     PYTHONNOUSERSITE=1 \
     # Optimize CUDA compilation for H100
-    TORCH_CUDA_ARCH_LIST="9.0"
+    TORCH_CUDA_ARCH_LIST="9.0" \
+    SAMPLEWORKS_GIT_SHA=${SAMPLEWORKS_GIT_SHA} \
+    SAMPLEWORKS_IMAGE_TAG=${SAMPLEWORKS_IMAGE_TAG}
+
+LABEL org.opencontainers.image.revision=${SAMPLEWORKS_GIT_SHA} \
+      org.opencontainers.image.version=${SAMPLEWORKS_IMAGE_TAG}
 
 # Install system dependencies required for building scientific packages
 RUN apt-get update && apt-get install -y --no-install-recommends \

diff --git a/GRID_SEARCH.md b/GRID_SEARCH.md
@@ -26,8 +26,8 @@ protein structure, density map, and resolution columns, described below.
 ```bash
 pixi run -e boltz python run_grid_search.py \
     --proteins proteins.csv \
-    --models boltz2 \                # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
-    --methods "X-RAY DIFFRACTION" \  # only useful for Boltz-2, ignored otherwise
+    --model boltz2 \                 # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
+    --method "X-RAY DIFFRACTION" \   # only useful for Boltz-2, ignored otherwise
-    --model boltz2 \                 # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
-    --method "X-RAY DIFFRACTION" \   # only useful for Boltz-2, ignored otherwise
+    # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
+    --model boltz2 \
+    # only useful for Boltz-2, ignored otherwise
+    --method "X-RAY DIFFRACTION" \
-    --model boltz2 \                 # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
-    --method "X-RAY DIFFRACTION" \   # only useful for Boltz-2, ignored otherwise
+    # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
+    --model boltz2 \
+    # only useful for Boltz-2, ignored otherwise
+    --method "X-RAY DIFFRACTION" \
     --scalers pure_guidance \        # options: pure_guidance, fk_steering, or both as space-separated list
     --ensemble-sizes "1 4" \
     --gradient-weights "0.1 0.2" \
@@ -51,11 +51,11 @@ name,structure,density,resolution
 | Argument             | Description                                                | Default                     |
 |----------------------|------------------------------------------------------------|-----------------------------|
 | `--proteins`         | CSV with structure/density/resolution columns              | required                    |
-| `--models`           | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required                    |
+| `--model`            | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required                    |
 | `--scalers`          | Guidance method(s) to sweep                                | `pure_guidance fk_steering` |
 | `--ensemble-sizes`   | Space-separated values, e.g. `"1 4"`                       | `"1 2 4 8"`                 |
 | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"`                   | `"0.01 0.1 0.2"`            |
-| `--methods`          | Boltz-2 sampling method (required for boltz2)              | `X-RAY DIFFRACTION`         |
+| `--method`           | Boltz-2 sampling method (required for boltz2)              | `X-RAY DIFFRACTION`         |
 | `--max-parallel`     | Parallel workers (default: number of GPUs)                 | `auto`                      |
 | `--dry-run`          | Print jobs without running them                            | off                         |
 | `--force-all`        | Re-run including already-successful jobs                   | off                         |
@@ -65,7 +65,7 @@ name,structure,density,resolution
 | `--track-chiral-features` | Track RF3 chiral gradient magnitude                   | off                         |
 
 > **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory.
-> Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the
+> Some flags (e.g., `--gradient-normalization`) are not reflected in the
 > directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to
 > re-run all jobs regardless. This is under active development and will likely change soon.
 

diff --git a/README.md b/README.md
@@ -88,8 +88,8 @@ Output files appear in `output/boltz2_pure_guidance/`: `refined.cif` (final ense
 ```bash
 pixi run -e boltz python run_grid_search.py \
     --proteins proteins.csv \
-    --models boltz2 \                # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
-    --methods "X-RAY DIFFRACTION" \  # only useful for Boltz-2, ignored otherwise
+    --model boltz2 \                 # options: boltz1, boltz2, protenix, rf3 (make sure env aligns!)
+    --method "X-RAY DIFFRACTION" \   # only useful for Boltz-2, ignored otherwise
     --scalers pure_guidance \        # options: pure_guidance, fk_steering, or both as space-separated list
     --ensemble-sizes "1 4" \
     --gradient-weights "0.1 0.2" \
@@ -113,11 +113,11 @@ name,structure,density,resolution
 | Argument | Description | Default |
 |---|---|---|
 | `--proteins` | CSV with structure/density/resolution columns | required |
-| `--models` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required |
+| `--model` | Model to run. One of `boltz1`, `boltz2`, `protenix`, `rf3` | required |
 | `--scalers` | Guidance method(s) to sweep | `pure_guidance fk_steering` |
 | `--ensemble-sizes` | Space-separated values, e.g. `"1 4"` | `"1 2 4 8"` |
 | `--gradient-weights` | Space-separated values, e.g. `"0.1 0.2"` | `"0.01 0.1 0.2"` |
-| `--methods` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` |
+| `--method` | Boltz-2 sampling method (required for boltz2) | `X-RAY DIFFRACTION` |
 | `--max-parallel` | Parallel workers (default: number of GPUs) | `auto` |
 | `--dry-run` | Print jobs without running them | off |
 | `--force-all` | Re-run including already-successful jobs | off |
@@ -126,7 +126,7 @@ name,structure,density,resolution
 
 Output layout: `grid_search_results/<protein>/<model>[_<method>]/<scaler>/ens<N>_gw<W>/`
 
-> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--use-tweedie`, `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon.
+> **Note**: Jobs are skipped if a `refined.cif` file already exists in the output directory. Some flags (e.g., `--gradient-normalization`) are not reflected in the directory structure, so changing them alone won't trigger a re-run. Use `--force-all` to re-run all jobs regardless. This is under active development and will likely change soon.
 
 Instructions for running evaluation and metrics scripts are coming soon.
 

diff --git a/diffuse.yaml b/diffuse.yaml
@@ -0,0 +1,60 @@
+# diffuse.yaml -- the contract between Sampleworks and Diffuse.
+#
+# Apply with: diffuse apply
+# Run with:   diffuse run sampleworks --params params.json --output-dir /data/results/run-001
+#
+# Idempotent -- safe to apply repeatedly.
+
+version: 1
+
+type: sampleworks
+name: Sampleworks
+description: >
+  Generic Sampleworks experiment runner. Diffuse accepts a flexible params.json
+  blob, stores it for traceability, materializes it inside the pod, and lets
+  Sampleworks own validation and model-specific execution.
+
+profiles:
+  - slug: sampleworks
+    name: Sampleworks
+    container:
+      image: diffuseproject/sampleworks
+      tag: latest
+      gpus:
+        min: 2
+    entrypoint: null
+    input_schema:
+      inputs:
+        - key: params_json
+          type: json
+          required: true
+          description: Arbitrary Sampleworks-owned params.json object.
+        - key: output_dir
+          type: text
+          required: true
+          default: /data/results
+          description: Container output directory, usually under /data/results.
+    run_config_defaults:
+      shared_memory_size: "16Gi"
+      image_pull_policy: IfNotPresent
+      poll_max_retries: 720
+      params_file:
+        param_key: params_json
+        path: /diffuse/input/params.json
+      volumes:
+        - name: data-input
+          hostPath: /data/sampleworks-exp/input
+          mountPath: /data/input
+          readOnly: true
+        - name: data-results
+          hostPath: /data/sampleworks-exp/grid_search_results
+          mountPath: /data/results
+        - name: msa-cache
+          hostPath: /data/sampleworks-exp/msa_cache
+          mountPath: /root/.sampleworks/msa
+      args_template:
+        base_command: [--params, /diffuse/input/params.json]
+        flag_args:
+          output_dir: --output-dir
+        boolean_args: {}
+        static_args: []
diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh
@@ -4,6 +4,7 @@
 # Usage:
 #   docker run sampleworks -e <pixi_env> <script> [args...]
 #   docker run sampleworks -e boltz run_grid_search.py --proteins /data/proteins.csv ...
+#   docker run sampleworks --params /data/input/params.json --output-dir /data/results
 #   docker run sampleworks bash  # interactive shell
 #
 # Available pixi environments: boltz, protenix, rf3
@@ -18,7 +19,6 @@
 #     --ensemble-sizes "1 4" \
 #     --gradient-weights "0.1 0.2" \
 #     --output-dir /data/results \
-#     --use-tweedie \
 #     --rf3-checkpoint /data/checkpoints/rf3.ckpt
 
 set -e
@@ -29,6 +29,7 @@ Sampleworks - Protein structure prediction with diffusion model guidance
 
 USAGE:
     docker run --gpus all --shm-size=16g sampleworks -e <environment> <script> [arguments...]
+    docker run --gpus all --shm-size=16g sampleworks --params <params.json> --output-dir <dir>
     docker run sampleworks bash
     docker run sampleworks --help
 
@@ -37,6 +38,7 @@ IMPORTANT:
 
 OPTIONS:
     -e, --env <env>     Pixi environment to use (boltz, protenix, rf3)
+    --params FILE       Run grid search from a flexible params.json file
     -h, --help          Show this help message
     bash                Start an interactive shell
 
@@ -46,6 +48,11 @@ ENVIRONMENTS:
     rf3         For RF3 model
 
 EXAMPLES:
+    # Run generic Diffuse/Sampleworks params mode. The JSON chooses the model/env.
+    docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
+      --params /data/input/params.json \
+      --output-dir /data/results/run-001
+
     # Run grid search with RF3 model
     docker run --gpus all --shm-size=16g -v /data:/data sampleworks \
       -e rf3 run_grid_search.py \
@@ -55,7 +62,6 @@ EXAMPLES:
       --ensemble-sizes "1 4" \
       --gradient-weights "0.1 0.2" \
       --output-dir /data/results \
-      --use-tweedie \
       --gradient-normalization \
       --augmentation \
       --align-to-input \
@@ -70,7 +76,6 @@ EXAMPLES:
       --ensemble-sizes "1 4" \
       --gradient-weights "0.1 0.2" \
       --output-dir /data/results \
-      --use-tweedie \
       --boltz1-checkpoint /data/checkpoints/boltz1_conf.ckpt
 
     # Run grid search with Boltz2 model
@@ -83,7 +88,6 @@ EXAMPLES:
       --ensemble-sizes "1 4" \
       --gradient-weights "0.1 0.2" \
       --output-dir /data/results \
-      --use-tweedie \
       --boltz2-checkpoint /data/checkpoints/boltz2_conf.ckpt
 
     # Run grid search with Protenix model
@@ -95,7 +99,6 @@ EXAMPLES:
       --ensemble-sizes "1 4" \
       --gradient-weights "0.1 0.2" \
       --output-dir /data/results \
-      --use-tweedie \
       --protenix-checkpoint /data/checkpoints/protenix_base_default_v0.5.0.pt
 
     # Interactive shell
@@ -120,7 +123,6 @@ GRID SEARCH ARGUMENTS (run_grid_search.py):
       --scalers SCALER            Guidance method (pure_guidance, fk_steering)
       --ensemble-sizes "N M..."   Space-separated ensemble sizes (e.g., "1 4")
       --gradient-weights "X Y..." Space-separated gradient weights (e.g., "0.1 0.2")
-      --use-tweedie               Use Tweedie's formula for gradient computation
       --gradient-normalization    Enable gradient normalization
       --augmentation              Enable data augmentation
       --align-to-input            Enable alignment to input structure
@@ -177,6 +179,62 @@ For full argument details, run:
 EOF
 }
 
+infer_env_from_params() {
+    local value="$1"
+    pixi run -e boltz python - "$value" << 'PY'
+import json
+import sys
+
+value = sys.argv[1]
+with open(value) as handle:
+    params = json.load(handle)
+
+if isinstance(params, dict) and isinstance(params.get("params_json"), dict):
+    params = params["params_json"]
+
+def model_value(value):
+    if value is None:
+        return None
+    if isinstance(value, dict):
+        return model_value(value.get("name") or value.get("type") or value.get("model"))
+    if isinstance(value, list):
+        if len(value) != 1:
+            raise SystemExit("Sampleworks params mode supports exactly one model")
+        return str(value[0])
+    return str(value)
+
+model = model_value(params.get("model"))
+models = params.get("models")
+if models is not None:
+    if isinstance(models, str):
+        models = models.split()
+    if not isinstance(models, list) or len(models) != 1:
+        raise SystemExit("Sampleworks params mode supports exactly one model")
+    if model is not None and str(models[0]) != model:
+        raise SystemExit("Sampleworks params JSON defines conflicting model and models values")
+    model = str(models[0])
+
+model_section = params.get("model_config") or params.get("model_settings")
+if isinstance(model_section, dict):
+    nested_model = model_value(
+        model_section.get("name") or model_section.get("type") or model_section.get("model")
+    )
+    if nested_model is not None:
+        if model is not None and nested_model != model:
+            raise SystemExit("Sampleworks params JSON defines conflicting nested model value")
+        model = nested_model
+
+if model in ("boltz1", "boltz2"):
+    print("boltz")
+elif model == "protenix":
+    print("protenix")
+elif model == "rf3":
+    print("rf3")
+else:
+    raise SystemExit("params JSON must include model: boltz1, boltz2, protenix, or rf3")
+PY
+}
+
 # Handle special cases first
 if [ $# -eq 0 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then
     show_help
@@ -188,6 +246,17 @@ if [ "$1" = "bash" ] || [ "$1" = "sh" ]; then
     exec "$@"
 fi
 
+# Generic params mode. Diffuse uses this path: it materializes params.json in
+# the pod, then passes --params plus --output-dir to this entrypoint.
+if [ "$1" = "--params" ]; then
+    if [ -z "$2" ] || [[ "$2" == -* ]]; then
+        echo "Error: $1 requires a value"
+        exit 1
+    fi
+    ENV="$(infer_env_from_params "$2")"
+    exec pixi run -e "$ENV" python /app/run_grid_search.py "$@"
+fi
+
 # Parse -e/--env argument
 ENV=""
 while [[ $# -gt 0 ]]; do
@@ -202,9 +271,10 @@ while [[ $# -gt 0 ]]; do
             break
             ;;
         *)
-            echo "Error: First argument must be -e <environment>, bash, or --help"
+            echo "Error: First argument must be -e <environment>, --params, bash, or --help"
             echo ""
             echo "Usage: docker run sampleworks -e <env> <script> [args...]"
+            echo "       docker run sampleworks --params <params.json> --output-dir <dir>"
             echo "       docker run sampleworks bash"
             echo "       docker run sampleworks --help"
             exit 1