-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathrun_all_models.sh
More file actions
executable file
·164 lines (150 loc) · 5.38 KB
/
run_all_models.sh
File metadata and controls
executable file
·164 lines (150 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/bin/bash
# Run all 4 model grid searches in parallel, 2 GPUs each
# Total: 8 GPUs used (4 jobs x 2 GPUs each)
#
# Models:
# - Boltz2 X-ray diffraction (GPUs 0,1)
# - Boltz2 MD (GPUs 2,3)
# - RosettaFold3 (GPUs 4,5)
# - Protenix (GPUs 6,7)
#
# Checkpoints are BAKED INTO the Docker image at /checkpoints/.
# If missing, the code auto-falls back to mounted paths.
#
# Usage:
# ./run_all_models.sh
set -e
# Configuration
DATA_DIR="/mnt/diffuse-private/raw/sampleworks/initial_dataset_40_occ_sweeps"
RESULTS_DIR="${RESULTS_DIR:-/data/sampleworks-exp/occ_sweep/grid_search_results}"
MSA_CACHE_DIR="${MSA_CACHE_DIR:-/data/sampleworks-exp/msa_cache}"
# Create directories
mkdir -p "$RESULTS_DIR"
mkdir -p "$MSA_CACHE_DIR"
# Pull latest image (no-op if already up to date)
echo "Pulling latest Docker image..."
docker pull diffuseproject/sampleworks:latest
# Common docker options
DOCKER_OPTS="--rm --shm-size=16g"
echo "=========================================="
echo "Starting all model grid searches (4 jobs x 2 GPUs)"
echo "Data: $DATA_DIR"
echo "Results: $RESULTS_DIR"
echo "MSA Cache: $MSA_CACHE_DIR"
echo "Checkpoints: BAKED INTO IMAGE (with mount fallback)"
echo ""
echo "Models:"
echo " - Boltz2 X-ray (GPUs 0,1)"
echo " - Boltz2 MD (GPUs 2,3)"
echo " - RF3 (GPUs 4,5)"
echo " - Protenix (GPUs 6,7)"
echo "=========================================="
PIDS=()
# --- Boltz2 X-ray Diffraction (GPUs 0,1) ---
echo "[$(date)] Starting Boltz2 X-ray on GPUs 0,1"
docker run $DOCKER_OPTS \
--gpus '"device=0,1"' \
-v "$DATA_DIR:/data/inputs:ro" \
-v "$RESULTS_DIR:/data/results" \
-v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
diffuseproject/sampleworks:latest \
-e boltz run_grid_search.py \
--proteins "/data/inputs/proteins.csv" \
--model boltz2 \
--method "X-RAY DIFFRACTION" \
--scalers pure_guidance \
--partial-diffusion-step 120 \
--ensemble-sizes "8" \
--gradient-weights "0.1 0.2 0.5" \
--gradient-normalization --augmentation --align-to-input \
--output-dir /data/results \
2>&1 | tee "$RESULTS_DIR/boltz2_xrd_run.log" &
PIDS+=($!)
echo "[$(date)] Boltz2 X-ray job started (PID: ${PIDS[-1]})"
# --- Boltz2 MD (GPUs 2,3) ---
echo "[$(date)] Starting Boltz2 MD on GPUs 2,3"
docker run $DOCKER_OPTS \
--gpus '"device=2,3"' \
-v "$DATA_DIR:/data/inputs:ro" \
-v "$RESULTS_DIR:/data/results" \
-v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
diffuseproject/sampleworks:latest \
-e boltz run_grid_search.py \
--proteins "/data/inputs/proteins.csv" \
--model boltz2 \
--method "MD" \
--scalers pure_guidance \
--partial-diffusion-step 120 \
--ensemble-sizes "8" \
--gradient-weights "0.1 0.2 0.5" \
--gradient-normalization --augmentation --align-to-input \
--output-dir /data/results \
2>&1 | tee "$RESULTS_DIR/boltz2_md_run.log" &
PIDS+=($!)
echo "[$(date)] Boltz2 MD job started (PID: ${PIDS[-1]})"
# --- RosettaFold3 (GPUs 4,5) ---
echo "[$(date)] Starting RosettaFold3 on GPUs 4,5"
docker run $DOCKER_OPTS \
--gpus '"device=4,5"' \
-v "$DATA_DIR:/data/inputs:ro" \
-v "$RESULTS_DIR:/data/results" \
-v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
diffuseproject/sampleworks:latest \
-e rf3 run_grid_search.py \
--proteins "/data/inputs/proteins.csv" \
--model rf3 \
--partial-diffusion-step 120 \
--scalers pure_guidance \
--ensemble-sizes "8" \
--gradient-weights "0.01 0.02 0.05" \
--gradient-normalization --augmentation --align-to-input \
--output-dir /data/results \
2>&1 | tee "$RESULTS_DIR/rf3_run.log" &
PIDS+=($!)
echo "[$(date)] RosettaFold3 job started (PID: ${PIDS[-1]})"
# --- Protenix (GPUs 6,7) ---
echo "[$(date)] Starting Protenix on GPUs 6,7"
docker run $DOCKER_OPTS \
--gpus '"device=6,7"' \
-v "$DATA_DIR:/data/inputs:ro" \
-v "$RESULTS_DIR:/data/results" \
-v "$MSA_CACHE_DIR:/root/.sampleworks/msa" \
-e SAMPLEWORKS_HOST_INPUT_DIR="$DATA_DIR" \
-e SAMPLEWORKS_HOST_RESULTS_DIR="$RESULTS_DIR" \
diffuseproject/sampleworks:latest \
-e protenix run_grid_search.py \
--proteins "/data/inputs/proteins.csv" \
--model protenix \
--scalers pure_guidance \
--partial-diffusion-step 120 \
--ensemble-sizes "8" \
--gradient-weights "0.1 0.2 0.5" \
--gradient-normalization --augmentation --align-to-input \
--output-dir /data/results \
2>&1 | tee "$RESULTS_DIR/protenix_run.log" &
PIDS+=($!)
echo "[$(date)] Protenix job started (PID: ${PIDS[-1]})"
echo ""
echo "=========================================="
echo "All 4 jobs launched! PIDs: ${PIDS[*]}"
echo "Logs:"
echo " - $RESULTS_DIR/boltz2_xrd_run.log"
echo " - $RESULTS_DIR/boltz2_md_run.log"
echo " - $RESULTS_DIR/rf3_run.log"
echo " - $RESULTS_DIR/protenix_run.log"
echo ""
echo "Monitor GPU usage: nvidia-smi -l 1"
echo "Waiting for all jobs to complete..."
echo "=========================================="
# Wait for all background jobs
wait
echo ""
echo "=========================================="
echo "[$(date)] All jobs completed!"
echo "=========================================="