From b9732353ed1a15044db2c8ccf9a48ad4a14b427d Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dshmulevich@nvidia.com>
Date: Sat, 10 Aug 2024 07:35:56 -0700
Subject: [PATCH] update gang-scheduling benchmark (#92)

Signed-off-by: Dmitry Shmulevich <dshmulevich@nvidia.com>
---
 .../benchmarks/gang-scheduling/README.md      |  35 +++
 .../workflows/config-jobset.yml               |   9 +
 .../workflows/config-kueue.yml                |  50 ++++
 .../workflows/config-nodes.yml                |  13 -
 .../workflows/config-volcano.yml              |  31 +++
 .../workflows/config-yunikorn.yml             |  29 ++
 .../workflows/run-test-common.yml             | 135 ++++++++++
 .../workflows/run-test-runai.yml              |  11 +-
 .../gang-scheduling/workflows/run-test.yml    | 249 ------------------
 9 files changed, 299 insertions(+), 263 deletions(-)
 create mode 100644 resources/benchmarks/gang-scheduling/README.md
 create mode 100644 resources/benchmarks/gang-scheduling/workflows/config-jobset.yml
 create mode 100644 resources/benchmarks/gang-scheduling/workflows/config-kueue.yml
 delete mode 100644 resources/benchmarks/gang-scheduling/workflows/config-nodes.yml
 create mode 100644 resources/benchmarks/gang-scheduling/workflows/config-volcano.yml
 create mode 100644 resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yml
 create mode 100644 resources/benchmarks/gang-scheduling/workflows/run-test-common.yml
 delete mode 100644 resources/benchmarks/gang-scheduling/workflows/run-test.yml

diff --git a/resources/benchmarks/gang-scheduling/README.md b/resources/benchmarks/gang-scheduling/README.md
new file mode 100644
index 0000000..b12b7eb
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/README.md
@@ -0,0 +1,35 @@
+# Gang Scheduling Benchmark Test
+
+This directory contains gang scheduling benchmark tests for the following workload managers and schedulers:
+
+- Jobset
+- Kueue
+- Volcano
+- Yunikorn
+- Run:ai
+
+The gang-scheduling benchmark workflow operates on 32 virtual GPU nodes, submitting a burst of 53 jobs with replica numbers ranging from 1 to 32 in a [predetermined order](workflows/run-test-common.yml).
+
+The workload is designed to fully utilize the cluster under optimal scheduling conditions.
+
+One method to perform benchmarking is to input this workload into clusters that use different schedulers and then compare the average GPU occupancy of the nodes.
+
+## Usage
+
+For all workload managers except Run:ai, the benchmark test involves two sequential workflows. The first workflow registers the CRDs, and the second workflow runs the common part of the test.
+
+### Example
+
+To run the benchmark test for Kueue:
+
+```bash
+./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/config-kueue.yml,resources/benchmarks/gang-scheduling/workflows/run-test-common.yml
+```
+
+### Run:ai
+
+Run:ai requires additional customization and thus has a separate workflow:
+
+```bash
+./bin/knavigator -workflow resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
+```
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-jobset.yml b/resources/benchmarks/gang-scheduling/workflows/config-jobset.yml
new file mode 100644
index 0000000..610b689
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/workflows/config-jobset.yml
@@ -0,0 +1,9 @@
+name: config-jobset
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/k8s/jobset.yml"
+    nameFormat: "jobset{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-workers-[0-9]+-[0-9]+-.+"
+    podCount: "{{.replicas}}"
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-kueue.yml b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yml
new file mode 100644
index 0000000..03cbc14
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/workflows/config-kueue.yml
@@ -0,0 +1,50 @@
+name: config-kueue
+tasks:
+- id: register-cluster-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/cluster-queue.yml"
+- id: register-local-queue
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/local-queue.yml"
+- id: register-resource-flavor
+  type: RegisterObj
+  params:
+    template: "resources/templates/kueue/resource-flavor.yml"
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/kueue/job.yml"
+    nameFormat: "job{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-[0-9]-.*"
+    podCount: "{{.replicas}}"
+- id: create-resource-flavor
+  type: SubmitObj
+  params:
+    refTaskId: register-resource-flavor
+    canExist: true
+    params:
+      name: "gpu-node"
+      nodeLabels:
+        nvidia.com/gpu.count: "8"
+- id: create-cluster-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-cluster-queue
+    canExist: true
+    params:
+      name: team
+      flavor: gpu-node
+      cpu: 8
+      memory: 36Gi
+      gpu: 256
+- id: create-local-queue
+  type: SubmitObj
+  params:
+    refTaskId: register-local-queue
+    canExist: true
+    params:
+      name: team-queue
+      namespace: default
+      clusterQueue: team
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-nodes.yml b/resources/benchmarks/gang-scheduling/workflows/config-nodes.yml
deleted file mode 100644
index a634f52..0000000
--- a/resources/benchmarks/gang-scheduling/workflows/config-nodes.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: config-nodes
-description: |
-  This test is designed to run on a cluster of 32 GPU nodes (8 GPUs per node).
-tasks:
-- id: configure
-  type: Configure
-  params:
-    nodes:
-    - type: dgxa100.80g
-      count: 32
-      labels:
-        nvidia.com/gpu.count: "8"
-    timeout: 1m
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-volcano.yml b/resources/benchmarks/gang-scheduling/workflows/config-volcano.yml
new file mode 100644
index 0000000..53673af
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/workflows/config-volcano.yml
@@ -0,0 +1,31 @@
+name: config-volcano
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/volcano/job.yml"
+    nameFormat: "j{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-test-[0-9]+"
+    podCount: "{{.replicas}}"
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: volcano-scheduler-configmap
+      namespace: volcano-system
+      op: create
+      data:
+        volcano-scheduler.conf: |
+          actions: "enqueue, allocate, backfill"
+          tiers:
+          - plugins:
+            - name: priority
+            - name: gang
+            - name: conformance
+          - plugins:
+            - name: drf
+            - name: predicates
+            - name: proportion
+            - name: nodeorder
+            - name: binpack
+    timeout: 1m
diff --git a/resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yml b/resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yml
new file mode 100644
index 0000000..79a7eeb
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/workflows/config-yunikorn.yml
@@ -0,0 +1,29 @@
+name: config-yunikorn
+tasks:
+- id: register
+  type: RegisterObj
+  params:
+    template: "resources/benchmarks/templates/yunikorn/job.yml"
+    nameFormat: "job{{._ENUM_}}"
+    podNameFormat: "{{._NAME_}}-.*"
+    podCount: "{{.replicas}}"
+- id: configure
+  type: Configure
+  params:
+    configmaps:
+    - name: yunikorn-configs
+      namespace: yunikorn
+      op: create
+      data:
+        queues.yaml: |
+          partitions:
+            - name: default
+              queues:
+              - name: root
+                queues:
+                - name: sandbox
+                  submitacl: '*'
+                  resources:
+                    max:
+                      {memory: 36Gi, vcore: 8000m, nvidia.com/gpu: 256}
+    timeout: 1m
diff --git a/resources/benchmarks/gang-scheduling/workflows/run-test-common.yml b/resources/benchmarks/gang-scheduling/workflows/run-test-common.yml
new file mode 100644
index 0000000..33cd019
--- /dev/null
+++ b/resources/benchmarks/gang-scheduling/workflows/run-test-common.yml
@@ -0,0 +1,135 @@
+name: test-gang-scheduling
+tasks:
+- id: configure
+  type: Configure
+  params:
+    nodes:
+    - type: dgxa100.80g
+      count: 32
+      labels:
+        nvidia.com/gpu.count: "8"
+    timeout: 1m
+- id: sleep
+  type: Sleep
+  params:
+    timeout: 5s
+- id: job1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 1
+    params:
+      replicas: 32
+      ttl: 30s
+- id: job2
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 2
+    params:
+      replicas: 16
+      ttl: 30s
+- id: job3
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 3
+    params:
+      replicas: 10
+      ttl: 30s
+- id: job3.1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 1
+    params:
+      replicas: 2
+      ttl: 30s
+- id: job4
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 4
+    params:
+      replicas: 8
+      ttl: 30s
+- id: job5
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 5
+    params:
+      replicas: 6
+      ttl: 30s
+- id: job5.1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 2
+    params:
+      replicas: 1
+      ttl: 30s
+- id: job6
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 6
+    params:
+      replicas: 5
+      ttl: 30s
+- id: job6.1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 1
+    params:
+      replicas: 2
+      ttl: 30s
+- id: job7
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 7
+    params:
+      replicas: 4
+      ttl: 30s
+- id: job7.1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 1
+    params:
+      replicas: 2
+      ttl: 30s
+- id: job7.2
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 2
+    params:
+      replicas: 1
+      ttl: 30s
+- id: job8
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 8
+    params:
+     replicas: 4
+     ttl: 30s
+- id: job9
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 9
+    params:
+      replicas: 3
+      ttl: 30s
+- id: job9.1
+  type: SubmitObj
+  params:
+    refTaskId: register
+    count: 1
+    params:
+      replicas: 5
+      ttl: 30s
diff --git a/resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml b/resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
index 1a489e9..7798d4b 100644
--- a/resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
+++ b/resources/benchmarks/gang-scheduling/workflows/run-test-runai.yml
@@ -1,5 +1,14 @@
-name: test-gang-scheduling
+name: test-gang-scheduling-runai
 tasks:
+- id: configure
+  type: Configure
+  params:
+    nodes:
+    - type: dgxa100.80g
+      count: 32
+      labels:
+        nvidia.com/gpu.count: "8"
+    timeout: 1m
 - id: register-trainingworkload
   type: RegisterObj
   params:
diff --git a/resources/benchmarks/gang-scheduling/workflows/run-test.yml b/resources/benchmarks/gang-scheduling/workflows/run-test.yml
deleted file mode 100644
index 042dcd4..0000000
--- a/resources/benchmarks/gang-scheduling/workflows/run-test.yml
+++ /dev/null
@@ -1,249 +0,0 @@
-name: test-gang-scheduling
-tasks:
-### Job
-#- id: register
-#  type: RegisterObj
-#  params:
-#    template: "resources/benchmarks/templates/k8s/job.yml"
-#    nameFormat: "job{{._ENUM_}}"
-#    podNameFormat: "{{._NAME_}}-[0-9]-.*"
-#    podCount: "{{.replicas}}"
-
-### Jobset
-#- id: register
-#  type: RegisterObj
-#  params:
-#    template: "resources/benchmarks/templates/k8s/jobset.yml"
-#    nameFormat: "jobset{{._ENUM_}}"
-#    podNameFormat: "{{._NAME_}}-workers-[0-9]+-[0-9]+-.+"
-#    podCount: "{{.replicas}}"
-
-### Kueue
-#- id: register-cluster-queue
-#  type: RegisterObj
-#  params:
-#    template: "resources/templates/kueue/cluster-queue.yml"
-#- id: register-local-queue
-#  type: RegisterObj
-#  params:
-#    template: "resources/templates/kueue/local-queue.yml"
-#- id: register-resource-flavor
-#  type: RegisterObj
-#  params:
-#    template: "resources/templates/kueue/resource-flavor.yml"
-#- id: register
-#  type: RegisterObj
-#  params:
-#    template: "resources/benchmarks/templates/kueue/job.yml"
-#    nameFormat: "job{{._ENUM_}}"
-#    podNameFormat: "{{._NAME_}}-[0-9]-.*"
-#    podCount: "{{.replicas}}"
-#- id: create-resource-flavor
-#  type: SubmitObj
-#  params:
-#    refTaskId: register-resource-flavor
-#    params:
-#      name: "gpu-node"
-#      nodeLabels:
-#        nvidia.com/gpu.count: "8"
-#- id: create-cluster-queue
-#  type: SubmitObj
-#  params:
-#    refTaskId: register-cluster-queue
-#    params:
-#      name: team
-#      flavor: gpu-node
-#      cpu: 8
-#      memory: 36Gi
-#      gpu: 256
-#- id: create-local-queue
-#  type: SubmitObj
-#  params:
-#    refTaskId: register-local-queue
-#    params:
-#      name: team-queue
-#      namespace: default
-#      clusterQueue: team
-
-### Volcano
-#- id: register
-#  type: RegisterObj
-#  params:
-#    template: "resources/benchmarks/templates/volcano/job.yml"
-#    nameFormat: "j{{._ENUM_}}"
-#    podNameFormat: "{{._NAME_}}-test-[0-9]+"
-#    podCount: "{{.replicas}}"
-#- id: configure
-#  type: Configure
-#  params:
-#    configmaps:
-#    - name: volcano-scheduler-configmap
-#      namespace: volcano-system
-#      op: create
-#      data:
-#        volcano-scheduler.conf: |
-#          actions: "enqueue, allocate, backfill"
-#          tiers:
-#          - plugins:
-#            - name: priority
-#            - name: gang
-#            - name: conformance
-#          - plugins:
-#            - name: drf
-#            - name: predicates
-#            - name: proportion
-#            - name: nodeorder
-#            - name: binpack
-#    timeout: 1m
-
-### Yunikorn
-#- id: register
-#  type: RegisterObj
-#  params:
-#    template: "resources/benchmarks/templates/yunikorn/job.yml"
-#    nameFormat: "job{{._ENUM_}}"
-#    podNameFormat: "{{._NAME_}}-.*"
-#    podCount: "{{.replicas}}"
-#- id: configure
-#  type: Configure
-#  params:
-#    configmaps:
-#    - name: yunikorn-configs
-#      namespace: yunikorn
-#      op: create
-#      data:
-#        queues.yaml: |
-#          partitions:
-#            - name: default
-#              queues:
-#              - name: root
-#                queues:
-#                - name: sandbox
-#                  submitacl: '*'
-#                  resources:
-#                    max:
-#                      {memory: 36Gi, vcore: 8000m, nvidia.com/gpu: 256}
-#    timeout: 1m
-#
-### Benchmark test
-#
-- id: job1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 1
-    params:
-      replicas: 32
-      ttl: 30s
-- id: job2
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 2
-    params:
-      replicas: 16
-      ttl: 30s
-- id: job3
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 3
-    params:
-      replicas: 10
-      ttl: 30s
-- id: job3.1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 1
-    params:
-      replicas: 2
-      ttl: 30s
-- id: job4
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 4
-    params:
-      replicas: 8
-      ttl: 30s
-- id: job5
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 5
-    params:
-      replicas: 6
-      ttl: 30s
-- id: job5.1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 2
-    params:
-      replicas: 1
-      ttl: 30s
-- id: job6
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 6
-    params:
-      replicas: 5
-      ttl: 30s
-- id: job6.1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 1
-    params:
-      replicas: 2
-      ttl: 30s
-- id: job7
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 7
-    params:
-      replicas: 4
-      ttl: 30s
-- id: job7.1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 1
-    params:
-      replicas: 2
-      ttl: 30s
-- id: job7.2
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 2
-    params:
-      replicas: 1
-      ttl: 30s
-- id: job8
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 8
-    params:
-     replicas: 4
-     ttl: 30s
-- id: job9
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 9
-    params:
-      replicas: 3
-      ttl: 30s
-- id: job9.1
-  type: SubmitObj
-  params:
-    refTaskId: register
-    count: 1
-    params:
-      replicas: 5
-      ttl: 30s