From 620f80f26933a137d8768db8633b6f6e930d9400 Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 13:32:58 -0700 Subject: [PATCH 1/8] Restore default CI configuration for VAE and Siamese examples using Accelerator API --- siamese_network/README.md | 10 ++++++---- siamese_network/main.py | 23 +++++++++++------------ vae/README.md | 2 +- vae/main.py | 16 +++++++--------- 4 files changed, 25 insertions(+), 26 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index 8e30d2cd73..553bef5cfa 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -21,17 +21,19 @@ Optionally, you can add the following arguments to customize your execution. --epochs number of epochs to train (default: 14) --lr learning rate (default: 1.0) --gamma learning rate step gamma (default: 0.7) ---accel use accelerator +--no-accel disables accelerator --dry-run quickly check a single pass --seed random seed (default: 1) --log-interval how many batches to wait before logging training status --save-model Saving the current Model ``` -To execute in an GPU, add the --accel argument to the command. For example: +If an accelerator is detected, the example will be executed on the accelerator by default; otherwise,it will runon the CPU + +To disable the accelerator option, add the --no-accel argument to the command. For example: ```bash -python main.py --accel +python main.py --no-accel ``` -This command will execute the example on the detected GPU. \ No newline at end of file +This command will execute the example on the CPU even if your system successfully detects an XPU. \ No newline at end of file diff --git a/siamese_network/main.py b/siamese_network/main.py index 3e3cc1e86c..f8c4f018a5 100644 --- a/siamese_network/main.py +++ b/siamese_network/main.py @@ -247,8 +247,8 @@ def main(): help='learning rate (default: 1.0)') parser.add_argument('--gamma', type=float, default=0.7, metavar='M', help='Learning rate step gamma (default: 0.7)') - parser.add_argument('--accel', action='store_true', - help='use accelerator') + parser.add_argument('--no-accel', action='store_true', + help='disables accelerator') parser.add_argument('--dry-run', action='store_true', default=False, help='quickly check a single pass') parser.add_argument('--seed', type=int, default=1, metavar='S', @@ -258,16 +258,15 @@ def main(): parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') args = parser.parse_args() + + use_accel = not args.no_accel and torch.accelerator.is_available() torch.manual_seed(args.seed) - if args.accel and not torch.accelerator.is_available(): - print("ERROR: accelerator is not available, try running on CPU") - sys.exit(1) - if not args.accel and torch.accelerator.is_available(): - print("WARNING: accelerator is available, run with --accel to enable it") + if args.no_accel and torch.accelerator.is_available(): + print("WARNING: accelerator is available, remove --no-accel to enable accelerator") - if args.accel: + if use_accel: device = torch.accelerator.current_accelerator() else: device = torch.device("cpu") @@ -276,12 +275,12 @@ def main(): train_kwargs = {'batch_size': args.batch_size} test_kwargs = {'batch_size': args.test_batch_size} - if device=="cuda": - cuda_kwargs = {'num_workers': 1, + if use_accel: + accel_kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True} - train_kwargs.update(cuda_kwargs) - test_kwargs.update(cuda_kwargs) + train_kwargs.update(accel_kwargs) + test_kwargs.update(accel_kwargs) train_dataset = APP_MATCHER('../data', train=True, download=True) test_dataset = APP_MATCHER('../data', train=False) diff --git a/vae/README.md b/vae/README.md index 81e6458d7a..1d9acf6663 100644 --- a/vae/README.md +++ b/vae/README.md @@ -13,7 +13,7 @@ The main.py script accepts the following optional arguments: ```bash --batch-size input batch size for training (default: 128) --epochs number of epochs to train (default: 10) ---accel use accelerator +--no-accel disables accelerator --seed random seed (default: 1) --log-interval how many batches to wait before logging training status ``` \ No newline at end of file diff --git a/vae/main.py b/vae/main.py index 9a6850ccd1..ee31a8867b 100644 --- a/vae/main.py +++ b/vae/main.py @@ -13,31 +13,29 @@ help='input batch size for training (default: 128)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') -parser.add_argument('--accel', action='store_true', - help='use accelerator') +parser.add_argument('--no-accel', action='store_true', + help='disables accelerator') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() +use_accel = not args.no_accel and torch.accelerator.is_available() torch.manual_seed(args.seed) -if args.accel and not torch.accelerator.is_available(): - print("ERROR: accelerator is not available, try running on CPU") - sys.exit(1) -if not args.accel and torch.accelerator.is_available(): - print("WARNING: accelerator is available, run with --accel to enable it") +if args.no_accel and torch.accelerator.is_available(): + print("WARNING: accelerator is available, remove --no-accel to enable accelerator") -if args.accel: +if use_accel: device = torch.accelerator.current_accelerator() else: device = torch.device("cpu") print(f"Using device: {device}") -kwargs = {'num_workers': 1, 'pin_memory': True} if device=="cuda" else {} +kwargs = {'num_workers': 1, 'pin_memory': True} if use_accel else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST('../data', train=True, download=True, transform=transforms.ToTensor()), From 35ed44849c8bc24bb4aa5557eaee2cade99f9a0e Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 14:30:44 -0700 Subject: [PATCH 2/8] Update Siamese Readme for consistency with accelerator argument --- siamese_network/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index 553bef5cfa..95d3bcc658 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -28,7 +28,7 @@ Optionally, you can add the following arguments to customize your execution. --save-model Saving the current Model ``` -If an accelerator is detected, the example will be executed on the accelerator by default; otherwise,it will runon the CPU +If an accelerator is detected, the example will be executed on the accelerator by default; otherwise,it will run on the CPU To disable the accelerator option, add the --no-accel argument to the command. For example: @@ -36,4 +36,4 @@ To disable the accelerator option, add the --no-accel argument to the command. F python main.py --no-accel ``` -This command will execute the example on the CPU even if your system successfully detects an XPU. \ No newline at end of file +This command will execute the example on the CPU even if your system successfully detects an accelerator. \ No newline at end of file From 65783738bd1d2a32ab69be573b1b076fa78c5ade Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 16:49:18 -0600 Subject: [PATCH 3/8] Update siamese_network/README.md Co-authored-by: Dmitry Rogozhkin --- siamese_network/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index 95d3bcc658..4d5b618f4d 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -28,9 +28,7 @@ Optionally, you can add the following arguments to customize your execution. --save-model Saving the current Model ``` -If an accelerator is detected, the example will be executed on the accelerator by default; otherwise,it will run on the CPU - -To disable the accelerator option, add the --no-accel argument to the command. For example: +To run the example, execute: ```bash python main.py --no-accel From 8fac44bfc89a554b901e23f9fef038f4b814533a Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 16:49:28 -0600 Subject: [PATCH 4/8] Update siamese_network/README.md Co-authored-by: Dmitry Rogozhkin --- siamese_network/README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index 4d5b618f4d..d5be258a52 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -33,5 +33,3 @@ To run the example, execute: ```bash python main.py --no-accel ``` - -This command will execute the example on the CPU even if your system successfully detects an accelerator. \ No newline at end of file From ec4c3d871749185066d3dea5346db990d3ea3456 Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 16:49:48 -0600 Subject: [PATCH 5/8] Update siamese_network/main.py Co-authored-by: Dmitry Rogozhkin --- siamese_network/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/siamese_network/main.py b/siamese_network/main.py index f8c4f018a5..d2e8e09207 100644 --- a/siamese_network/main.py +++ b/siamese_network/main.py @@ -263,8 +263,6 @@ def main(): torch.manual_seed(args.seed) - if args.no_accel and torch.accelerator.is_available(): - print("WARNING: accelerator is available, remove --no-accel to enable accelerator") if use_accel: device = torch.accelerator.current_accelerator() From ae42dc84ce8cbff4f82f98f48f801056f9272123 Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 16:50:12 -0600 Subject: [PATCH 6/8] Update vae/main.py Co-authored-by: Dmitry Rogozhkin --- vae/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vae/main.py b/vae/main.py index ee31a8867b..6390965810 100644 --- a/vae/main.py +++ b/vae/main.py @@ -25,8 +25,6 @@ torch.manual_seed(args.seed) -if args.no_accel and torch.accelerator.is_available(): - print("WARNING: accelerator is available, remove --no-accel to enable accelerator") if use_accel: device = torch.accelerator.current_accelerator() From 3743fa7f51ad6a7c3af35fc0d7920226a80e4706 Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 16:31:31 -0700 Subject: [PATCH 7/8] Improve Readme files for clearer descriptions --- siamese_network/README.md | 4 +++- vae/README.md | 10 +++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index d5be258a52..ba5089042c 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -28,7 +28,9 @@ Optionally, you can add the following arguments to customize your execution. --save-model Saving the current Model ``` -To run the example, execute: +If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. + +To force execution on the CPU, use `--no-accel` command line argument: ```bash python main.py --no-accel diff --git a/vae/README.md b/vae/README.md index 1d9acf6663..7436e12874 100644 --- a/vae/README.md +++ b/vae/README.md @@ -16,4 +16,12 @@ The main.py script accepts the following optional arguments: --no-accel disables accelerator --seed random seed (default: 1) --log-interval how many batches to wait before logging training status -``` \ No newline at end of file +``` + +If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. + +To force execution on the CPU, use `--no-accel` command line argument: + +```bash +python main.py --no-accel +``` From 3c709de552f5e172886ff7890c5cfd1915b515dd Mon Sep 17 00:00:00 2001 From: eromomon Date: Tue, 6 May 2025 18:45:21 -0700 Subject: [PATCH 8/8] Update Readme file structure to enhance organization --- siamese_network/README.md | 24 ++++++++++++++++-------- vae/README.md | 21 ++++++++++++++------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/siamese_network/README.md b/siamese_network/README.md index ba5089042c..d11f83a9a0 100644 --- a/siamese_network/README.md +++ b/siamese_network/README.md @@ -8,11 +8,27 @@ This implementation varies from FaceNet as we use the `ResNet-18` model from [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) as our feature extractor. In addition, we aren't using `TripletLoss` as the MNIST dataset is simple, so `BCELoss` can do the trick. +### Usage + +Install the required dependencies: ```bash pip install -r requirements.txt +``` + +To run the example, execute: +```bahs python main.py # CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2 ``` + +If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. + +To force execution on the CPU, use `--no-accel` command line argument: + +```bash +python main.py --no-accel +``` + Optionally, you can add the following arguments to customize your execution. ```bash @@ -27,11 +43,3 @@ Optionally, you can add the following arguments to customize your execution. --log-interval how many batches to wait before logging training status --save-model Saving the current Model ``` - -If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. - -To force execution on the CPU, use `--no-accel` command line argument: - -```bash -python main.py --no-accel -``` diff --git a/vae/README.md b/vae/README.md index 7436e12874..fcaae9a286 100644 --- a/vae/README.md +++ b/vae/README.md @@ -3,11 +3,25 @@ This is an improved implementation of the paper [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) by Kingma and Welling. It uses ReLUs and the adam optimizer, instead of sigmoids and adagrad. These changes make the network converge much faster. +### Usage +Install the required dependencies: ```bash pip install -r requirements.txt +``` + +To run the example, execute: +```bash python main.py ``` +If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. + +To force execution on the CPU, use `--no-accel` command line argument: + +```bash +python main.py --no-accel +``` + The main.py script accepts the following optional arguments: ```bash @@ -18,10 +32,3 @@ The main.py script accepts the following optional arguments: --log-interval how many batches to wait before logging training status ``` -If a hardware accelerator device is detected, the example will execute on the accelerator; otherwise, it will run on the CPU. - -To force execution on the CPU, use `--no-accel` command line argument: - -```bash -python main.py --no-accel -```