From 8c197439d74e15f5cac4b51fa9d7084d1d5a14fc Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Wed, 23 Apr 2025 16:06:25 -0700 Subject: [PATCH] Add benchmark values for pct_achievable_gemm_tops and pct_achievable_mem_bw for MI300X --- torchao/testing/float8/roofline_utils.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/torchao/testing/float8/roofline_utils.py b/torchao/testing/float8/roofline_utils.py index 92becb9b94..f0b873c352 100644 --- a/torchao/testing/float8/roofline_utils.py +++ b/torchao/testing/float8/roofline_utils.py @@ -47,12 +47,10 @@ "fp8_peak_tops": 2614e12, # 5.3 TB per second "peak_mem_bw_bytes_sec": 5.3e12, - # for now, copy over from H100 - # TODO(future): run measurement on hardware - "pct_achievable_gemm_tops": 0.78, - # for now, copy over from H100 - # TODO(future): run measurement on hardware - "pct_achievable_mem_bw": 0.92, + # based on microbenchmark (fw + bw gemms) with M,K,N = 3 * (8192,) + "pct_achievable_gemm_tops": 0.47, + # based on microbenchmark with pointwise triton kernel with large inputs + "pct_achievable_mem_bw": 0.72, }, # TODO(future): more GPU names }