choosehappy · jacksonjacobs1 · Aug 26, 2025 · Aug 26, 2025 · Aug 26, 2025 · choosehappy
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -37,6 +37,14 @@
 		"-p", "6006:6006"
 	],
 
+	"containerEnv": {
+		// We always want to manage CUDA_VISIBLE_DEVICES ourselves.
+		"RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": "0",
+
+		// We set CUDA_VISIBLE_DEVICES here, as each container will need to set visible GPUs independently.
+		"CUDA_VISIBLE_DEVICES": "0,1"
+	},
+
 	"postCreateCommand": "ln -sf /opt/QuickAnnotator/quickannotator/client/package.json /opt/package.json && ln -sf /opt/QuickAnnotator/quickannotator/client/package-lock.json /opt/package-lock.json && uv pip install -e ."
 
 }
diff --git a/README.md b/README.md
@@ -54,21 +54,26 @@ By default, QuickAnnotator uses a SQLite database. If you would like to use a po
     git checkout v2.0
     ```
 
+2. Modify `devcontainer.json` to suit your use case. Particularly, change the value of `CUDA_VISIBLE_DEVICES` to your desired GPU ids.
+
 2. Within VS Code, open the cloned repository and click on the "Reopen in Container" button to build the devcontainer. This will create a docker container with all the necessary dependencies to run QuickAnnotator.
 ![image](https://github.com/user-attachments/assets/b776577f-a4c2-4eb8-858c-c603ac20cc6d)
 
 
 ### Usage
-1. Connect to a Ray cluster. Ray is used to run operations which require asyncronous processing. There are three ways to connect to a Ray cluster:
-    - **Default**: By default QA will initialize a local Ray cluster within the docker container. 
-        - Note: The default ray cluster does not host the Ray dashboard.
+Once the devcontainer is built, run the following commands within the container terminal to use QuickAnnotator
+
+1. Connect to a Ray cluster. Ray is used to run operations which require asyncronous processing. There are two ways to connect to a Ray cluster:
     - **Manual local cluster**: Run the following command to start a Ray cluster with the Ray dashboard:
         ```bash
         ray start --head --dashboard-host 0.0.0.0
         ```
-    - **Pre-existing cluster**: If you would like QA to connect to an existing Ray cluster, use the `--cluster_address` argument.
+    - **Pre-existing cluster**: To add the container to an existing cluster, use the `--address` argument.
+        ```bash
+        ray start --address <cluster_address>
+        ```
 
-2. Once the devcontainer is built, you can run the following command to start the QuickAnnotator server:
+2. Run the following command to start the QuickAnnotator server:
     ```
     (venv) root@e4392ecdd8ef:/opt/QuickAnnotator# quickannotator
     * Serving Flask app '__main__'

diff --git a/quickannotator/dl/ray_jackson.py b/quickannotator/dl/ray_jackson.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from quickannotator.db.logging import LoggingManager
 import ray
 from ray.train import ScalingConfig
@@ -49,12 +50,13 @@ def start_dlproc(self, allow_pred=True):
         self.setProcRunningSince()
 
         total_gpus = ray.cluster_resources().get("GPU", 0)
-        self.logger.info(f"Total GPUs available: {total_gpus}")
+        self.logger.info(f"{os.environ['RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES']=}")
+        self.logger.info(f"{os.environ['CUDA_VISIBLE_DEVICES']=}")
         scaling_config = ray.train.ScalingConfig(
             num_workers=int(total_gpus),
             use_gpu=True,
             resources_per_worker={"GPU": .01},
-            placement_strategy="STRICT_SPREAD"
+            # placement_strategy="STRICT_SPREAD"  #TODO: remove
         )
 
         trainer = ray.train.torch.TorchTrainer(