From 70218aedc1069445c96d595ad2eff8fccbeb746d Mon Sep 17 00:00:00 2001 From: Dmitry Shmulevich Date: Tue, 10 Dec 2024 17:18:12 -0800 Subject: [PATCH] add benchmark for network topology with requiredDuringSchedulingIgnoredDuringExecution Signed-off-by: Dmitry Shmulevich --- .../nwtopo/templates/jobset/jobset-acc.yaml | 67 ++++++++ .../nwtopo/templates/jobset/jobset.yaml | 4 +- .../nwtopo/templates/runai/mpijob.yaml | 4 +- .../nwtopo/workflows/config-jobset-acc.yaml | 23 +++ .../nwtopo/workflows/config-nodes-acc.yaml | 145 ++++++++++++++++++ .../nwtopo/workflows/config-nodes.yaml | 72 ++++----- .../nwtopo/workflows/run-test-acc.yaml | 53 +++++++ 7 files changed, 328 insertions(+), 40 deletions(-) create mode 100644 resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml create mode 100644 resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml create mode 100644 resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml create mode 100644 resources/benchmarks/nwtopo/workflows/run-test-acc.yaml diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml new file mode 100644 index 0000000..b6ec7cd --- /dev/null +++ b/resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml @@ -0,0 +1,67 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: jobset.x-k8s.io/v1alpha2 +kind: JobSet +metadata: + name: "{{._NAME_}}" + namespace: default +spec: + # We want to declare our JobSet successful if workers finish. + # If workers finish we should clean up the remaining replicatedJobs. + successPolicy: + operator: All + targetReplicatedJobs: + - workers + replicatedJobs: + - name: workers + replicas: 1 + template: + spec: + backoffLimit: 0 + completions: {{.replicas}} + parallelism: {{.replicas}} + completionMode: NonIndexed + template: + metadata: + labels: + app: {{._NAME_}} + annotations: + pod-complete.stage.kwok.x-k8s.io/delay: "{{.ttl}}" + pod-complete.stage.kwok.x-k8s.io/jitter-delay: "{{.ttl}}" + spec: + schedulerName: default-scheduler + affinity: + podAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{._NAME_}} + topologyKey: network.topology.kubernetes.io/accelerator + containers: + - name: test + image: ubuntu + imagePullPolicy: IfNotPresent + resources: + limits: + cpu: 100m + memory: 250M + nvidia.com/gpu: "8" + requests: + cpu: 100m + memory: 250M + nvidia.com/gpu: "8" diff --git a/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml b/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml index e52f9b1..413e4ca 100644 --- a/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml +++ b/resources/benchmarks/nwtopo/templates/jobset/jobset.yaml @@ -53,7 +53,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: net-layer-2 + topologyKey: network.topology.kubernetes.io/spine - weight: 90 podAffinityTerm: labelSelector: @@ -62,7 +62,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: net-layer-1 + topologyKey: network.topology.kubernetes.io/block containers: - name: test image: ubuntu diff --git a/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml b/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml index 0d194a5..0ac015a 100644 --- a/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml +++ b/resources/benchmarks/nwtopo/templates/runai/mpijob.yaml @@ -51,7 +51,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: net-layer-2 + topologyKey: network.topology.kubernetes.io/spine - weight: 90 podAffinityTerm: labelSelector: @@ -60,7 +60,7 @@ spec: operator: In values: - {{._NAME_}} - topologyKey: net-layer-1 + topologyKey: network.topology.kubernetes.io/block schedulerName: runai-scheduler containers: - image: runai/mpi-worker:latest diff --git a/resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml b/resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml new file mode 100644 index 0000000..485aa0e --- /dev/null +++ b/resources/benchmarks/nwtopo/workflows/config-jobset-acc.yaml @@ -0,0 +1,23 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: config-jobset +tasks: +- id: register + type: RegisterObj + params: + template: "resources/benchmarks/nwtopo/templates/jobset/jobset-acc.yaml" + nameFormat: "jobset{{._ENUM_}}" + podNameFormat: "{{._NAME_}}-workers-[0-9]+-.+" + podCount: "{{.replicas}}" diff --git a/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml b/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml new file mode 100644 index 0000000..d1fb5bb --- /dev/null +++ b/resources/benchmarks/nwtopo/workflows/config-nodes-acc.yaml @@ -0,0 +1,145 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: config-nw-topo-nodes +description: | + Create a 12-nodes cluster with a tree-like network topology + and mark 5 nodes as busy: + __________ sw31 __________ + / | \ + sw21 sw22 sw23 + / \ / \ / \ + sw11 sw12 sw13 sw14 sw15 sw16 + /\ /\ /\ /\ /\ /\ + n1 n2 n3 n4 n5 n6 n7 n8 n9 n10 n11 n12 + |___|___|___|___|___| |___|___|___|___|___| + | | + nvl1 nvl2 + + Then deploy a 3-replicas job. The optimal nodes from the + network topology perspective for this job are nodes n5, n7, n8. +tasks: +- id: configure + type: Configure + params: + nodes: + - type: dgxa100.80g + count: 1 + labels: + node-id: n1 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw11 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n2 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw11 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n3 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw12 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n4 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw12 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n5 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw13 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n6 + network.topology.kubernetes.io/accelerator: nvl1 + network.topology.kubernetes.io/block: sw13 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n7 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw14 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n8 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw14 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n9 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw15 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n10 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw15 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n11 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw16 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + - type: dgxa100.80g + count: 1 + labels: + node-id: n12 + network.topology.kubernetes.io/accelerator: nvl2 + network.topology.kubernetes.io/block: sw16 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 + nvidia.com/gpu.count: "8" + timeout: 5m diff --git a/resources/benchmarks/nwtopo/workflows/config-nodes.yaml b/resources/benchmarks/nwtopo/workflows/config-nodes.yaml index 108f8b5..25735fc 100644 --- a/resources/benchmarks/nwtopo/workflows/config-nodes.yaml +++ b/resources/benchmarks/nwtopo/workflows/config-nodes.yaml @@ -35,100 +35,100 @@ tasks: count: 1 labels: node-id: n1 - net-layer-1: sw11 - net-layer-2: sw21 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw11 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n2 - net-layer-1: sw11 - net-layer-2: sw21 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw11 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n3 - net-layer-1: sw12 - net-layer-2: sw21 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw12 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n4 - net-layer-1: sw12 - net-layer-2: sw21 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw12 + network.topology.kubernetes.io/spine: sw21 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n5 - net-layer-1: sw13 - net-layer-2: sw22 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw13 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n6 - net-layer-1: sw13 - net-layer-2: sw22 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw13 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n7 - net-layer-1: sw14 - net-layer-2: sw22 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw14 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n8 - net-layer-1: sw14 - net-layer-2: sw22 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw14 + network.topology.kubernetes.io/spine: sw22 + network.topology.kubernetes.io/datacenter: sw31 net-optimal: true nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n9 - net-layer-1: sw15 - net-layer-2: sw23 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw15 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n10 - net-layer-1: sw15 - net-layer-2: sw23 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw15 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n11 - net-layer-1: sw16 - net-layer-2: sw23 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw16 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" - type: dgxa100.80g count: 1 labels: node-id: n12 - net-layer-1: sw16 - net-layer-2: sw23 - net-layer-3: sw31 + network.topology.kubernetes.io/block: sw16 + network.topology.kubernetes.io/spine: sw23 + network.topology.kubernetes.io/datacenter: sw31 nvidia.com/gpu.count: "8" timeout: 5m - id: update diff --git a/resources/benchmarks/nwtopo/workflows/run-test-acc.yaml b/resources/benchmarks/nwtopo/workflows/run-test-acc.yaml new file mode 100644 index 0000000..b21bacf --- /dev/null +++ b/resources/benchmarks/nwtopo/workflows/run-test-acc.yaml @@ -0,0 +1,53 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: test-network-topology +description: deploy a 3-replicas job +tasks: +- id: job1 + type: SubmitObj + params: + refTaskId: register + count: 1 + params: + replicas: 6 + ttl: 2m +- id: sleep + type: Sleep + params: + timeout: 1s +- id: job2 + type: SubmitObj + params: + refTaskId: register + count: 1 + params: + replicas: 6 + ttl: 2m +- id: status + type: CheckPod + params: + refTaskId: job1 + status: Running + timeout: 5s +- id: status + type: CheckPod + params: + refTaskId: job2 + status: Running + timeout: 5s + +# validation: +# for node in $(kubectl get pods -l app=jobset1 -o custom-columns=NODE:.spec.nodeName --no-headers); do kubectl get no $node -o yaml | grep accelerator; done +# for node in $(kubectl get pods -l app=jobset2 -o custom-columns=NODE:.spec.nodeName --no-headers); do kubectl get no $node -o yaml | grep accelerator; done