Skip to content

Commit 682b268

Browse files
committed
Allow billing of H100 GPUs
1 parent 4a9f8e3 commit 682b268

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

openshift_metrics/invoice.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
GPU_A100 = "NVIDIA-A100-40GB"
1010
GPU_A100_SXM4 = "NVIDIA-A100-SXM4-40GB"
1111
GPU_V100 = "Tesla-V100-PCIE-32GB"
12+
GPU_H100 = "NVIDIA-H100-80GB-HBM3"
1213
GPU_UNKNOWN_TYPE = "GPU_UNKNOWN_TYPE"
1314

1415
# GPU Resource - MIG Geometries
@@ -23,6 +24,7 @@
2324
SU_A100_GPU = "OpenShift GPUA100"
2425
SU_A100_SXM4_GPU = "OpenShift GPUA100SXM4"
2526
SU_V100_GPU = "OpenShift GPUV100"
27+
SU_H100_GPU = "OpenShift GPUH100"
2628
SU_UNKNOWN_GPU = "OpenShift Unknown GPU"
2729
SU_UNKNOWN_MIG_GPU = "OpenShift Unknown MIG GPU"
2830
SU_UNKNOWN = "Openshift Unknown"
@@ -65,6 +67,7 @@ def get_service_unit(self) -> ServiceUnit:
6567
GPU_A100: SU_A100_GPU,
6668
GPU_A100_SXM4: SU_A100_SXM4_GPU,
6769
GPU_V100: SU_V100_GPU,
70+
GPU_H100: SU_H100_GPU,
6871
}
6972

7073
A100_SXM4_MIG = {
@@ -79,6 +82,7 @@ def get_service_unit(self) -> ServiceUnit:
7982
SU_A100_GPU: {"gpu": 1, "cpu": 24, "ram": 74},
8083
SU_A100_SXM4_GPU: {"gpu": 1, "cpu": 31, "ram": 240},
8184
SU_V100_GPU: {"gpu": 1, "cpu": 48, "ram": 192},
85+
SU_H100_GPU: {"gpu": 1, "cpu": 63, "ram": 376},
8286
SU_UNKNOWN_GPU: {"gpu": 1, "cpu": 8, "ram": 64},
8387
SU_UNKNOWN_MIG_GPU: {"gpu": 1, "cpu": 8, "ram": 64},
8488
SU_UNKNOWN: {"gpu": -1, "cpu": 1, "ram": 1},
@@ -179,6 +183,7 @@ class Rates:
179183
gpu_a100: Decimal
180184
gpu_a100sxm4: Decimal
181185
gpu_v100: Decimal
186+
gpu_h100: Decimal
182187

183188

184189
@dataclass
@@ -201,6 +206,7 @@ class ProjectInvoce:
201206
SU_A100_GPU: 0,
202207
SU_A100_SXM4_GPU: 0,
203208
SU_V100_GPU: 0,
209+
SU_H100_GPU: 0,
204210
SU_UNKNOWN_GPU: 0,
205211
SU_UNKNOWN_MIG_GPU: 0,
206212
SU_UNKNOWN: 0,
@@ -222,6 +228,8 @@ def get_rate(self, su_type) -> Decimal:
222228
return self.rates.gpu_a100sxm4
223229
if su_type == SU_V100_GPU:
224230
return self.rates.gpu_v100
231+
if su_type == SU_H100_GPU:
232+
return self.rates.gpu_h100
225233
return Decimal(0)
226234

227235
def generate_invoice_rows(self, report_month) -> List[str]:

openshift_metrics/merge.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def main():
6969
parser.add_argument("--rate-gpu-v100-su", type=Decimal)
7070
parser.add_argument("--rate-gpu-a100sxm4-su", type=Decimal)
7171
parser.add_argument("--rate-gpu-a100-su", type=Decimal)
72+
parser.add_argument("--rate-gpu-h100-su", type=Decimal)
7273

7374
args = parser.parse_args()
7475
files = args.files
@@ -115,13 +116,15 @@ def main():
115116
gpu_a100=Decimal(nerc_data.get_value_at("GPUA100 SU Rate", report_month)),
116117
gpu_a100sxm4=Decimal(nerc_data.get_value_at("GPUA100SXM4 SU Rate", report_month)),
117118
gpu_v100=Decimal(nerc_data.get_value_at("GPUV100 SU Rate", report_month)),
119+
gpu_h100=Decimal(nerc_data.get_value_at("GPUH100 SU Rate", report_month)),
118120
)
119121
else:
120122
rates = invoice.Rates(
121123
cpu=Decimal(args.rate_cpu_su),
122124
gpu_a100=Decimal(args.rate_gpu_a100_su),
123125
gpu_a100sxm4=Decimal(args.rate_gpu_a100sxm4_su),
124-
gpu_v100=Decimal(args.rate_gpu_v100_su)
126+
gpu_v100=Decimal(args.rate_gpu_v100_su),
127+
gpu_h100=Decimal(args.rate_gpu_h100_su),
125128
)
126129

127130
if args.invoice_file:

openshift_metrics/tests/test_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
cpu = Decimal("0.013"),
2424
gpu_a100sxm4 = Decimal("2.078"),
2525
gpu_a100 = Decimal("1.803"),
26-
gpu_v100 = Decimal("1.214")
26+
gpu_v100 = Decimal("1.214"),
27+
gpu_h100 = Decimal("6.04"),
2728
)
2829

2930
class TestGetNamespaceAnnotations(TestCase):
@@ -567,6 +568,13 @@ def test_known_gpu_V100(self):
567568
self.assertEqual(su_count, 1)
568569
self.assertEqual(determining_resource, "GPU")
569570

571+
def test_known_gpu_H100(self):
572+
pod = self.make_pod(48, 192, 1, invoice.GPU_H100, invoice.WHOLE_GPU)
573+
su_type, su_count, determining_resource = pod.get_service_unit()
574+
self.assertEqual(su_type, invoice.SU_H100_GPU)
575+
self.assertEqual(su_count, 1)
576+
self.assertEqual(determining_resource, "GPU")
577+
570578
def test_known_gpu_high_cpu(self):
571579
pod = self.make_pod(50, 96, 1, invoice.GPU_A100, invoice.WHOLE_GPU)
572580
su_type, su_count, determining_resource = pod.get_service_unit()

0 commit comments

Comments
 (0)