diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 9a4240ce..49f33e70 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -37,6 +37,14 @@ "-p", "6006:6006" ], + "containerEnv": { + // We always want to manage CUDA_VISIBLE_DEVICES ourselves. + "RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES": "0", + + // We set CUDA_VISIBLE_DEVICES here, as each container will need to set visible GPUs independently. + "CUDA_VISIBLE_DEVICES": "0,1" + }, + "postCreateCommand": "ln -sf /opt/QuickAnnotator/quickannotator/client/package.json /opt/package.json && ln -sf /opt/QuickAnnotator/quickannotator/client/package-lock.json /opt/package-lock.json && uv pip install -e ." } diff --git a/README.md b/README.md index 72d7a6e3..acdc7f4c 100644 --- a/README.md +++ b/README.md @@ -54,21 +54,26 @@ By default, QuickAnnotator uses a SQLite database. If you would like to use a po git checkout v2.0 ``` +2. Modify `devcontainer.json` to suit your use case. Particularly, change the value of `CUDA_VISIBLE_DEVICES` to your desired GPU ids. + 2. Within VS Code, open the cloned repository and click on the "Reopen in Container" button to build the devcontainer. This will create a docker container with all the necessary dependencies to run QuickAnnotator. ![image](https://github.com/user-attachments/assets/b776577f-a4c2-4eb8-858c-c603ac20cc6d) ### Usage -1. Connect to a Ray cluster. Ray is used to run operations which require asyncronous processing. There are three ways to connect to a Ray cluster: - - **Default**: By default QA will initialize a local Ray cluster within the docker container. - - Note: The default ray cluster does not host the Ray dashboard. +Once the devcontainer is built, run the following commands within the container terminal to use QuickAnnotator + +1. Connect to a Ray cluster. Ray is used to run operations which require asyncronous processing. There are two ways to connect to a Ray cluster: - **Manual local cluster**: Run the following command to start a Ray cluster with the Ray dashboard: ```bash ray start --head --dashboard-host 0.0.0.0 ``` - - **Pre-existing cluster**: If you would like QA to connect to an existing Ray cluster, use the `--cluster_address` argument. + - **Pre-existing cluster**: To add the container to an existing cluster, use the `--address` argument. + ```bash + ray start --address + ``` -2. Once the devcontainer is built, you can run the following command to start the QuickAnnotator server: +2. Run the following command to start the QuickAnnotator server: ``` (venv) root@e4392ecdd8ef:/opt/QuickAnnotator# quickannotator * Serving Flask app '__main__' diff --git a/quickannotator/dl/ray_jackson.py b/quickannotator/dl/ray_jackson.py index 9fef03dd..8c40ce53 100644 --- a/quickannotator/dl/ray_jackson.py +++ b/quickannotator/dl/ray_jackson.py @@ -1,4 +1,5 @@ import logging +import os from quickannotator.db.logging import LoggingManager import ray from ray.train import ScalingConfig @@ -49,12 +50,13 @@ def start_dlproc(self, allow_pred=True): self.setProcRunningSince() total_gpus = ray.cluster_resources().get("GPU", 0) - self.logger.info(f"Total GPUs available: {total_gpus}") + self.logger.info(f"{os.environ['RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES']=}") + self.logger.info(f"{os.environ['CUDA_VISIBLE_DEVICES']=}") scaling_config = ray.train.ScalingConfig( num_workers=int(total_gpus), use_gpu=True, resources_per_worker={"GPU": .01}, - placement_strategy="STRICT_SPREAD" + # placement_strategy="STRICT_SPREAD" #TODO: remove ) trainer = ray.train.torch.TorchTrainer(