From 3766a226a3166353fe41aece3b9d7cde8eeabe5d Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Thu, 30 Jan 2025 16:36:16 +0100 Subject: [PATCH 1/2] [CI] Add initial Strix matmul performance tests --- .github/workflows/ci-linux.yml | 36 +++++++++++++++- build_tools/ci/cpu_comparison/run.py | 61 +++++++++++++++++++++++++--- 2 files changed, 91 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 3754fd72b..622942061 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -249,7 +249,7 @@ jobs: git config user.name "github-actions" git config user.email "github-actions@github.com" git add results_history.json results_history.html - git commit -m "Update performance results and deploy" + git commit -m "Update NPU1 performance results and deploy" git push test_linux_strix: @@ -314,3 +314,37 @@ jobs: --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \ -v + + # Run the 'Performance' tests. These do not check numerical correctness, + # just measure the time to run some workloads. + - name : Performance benchmarks + run: | + source .venv/bin/activate + python build_tools/ci/cpu_comparison/run.py \ + test_aie_vs_cpu \ + $PWD/iree-install \ + --peano_dir=$PWD/llvm-aie \ + --vitis_dir=/opt/Xilinx/Vitis/2024.2 \ + --target_device="npu4" \ + --reset_npu_between_runs -v \ + --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ + --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \ + --tests=Performance > performance_npu4.log + + # Print a summary of the findings. + python build_tools/ci/cpu_comparison/performance_summarizer.py \ + performance_npu4.log results_npu4.json + + # Only publish the performance results on main branch pushes. + - name: Publish performance results + if: github.event_name == 'push' && github.ref_name == 'main' + run: | + cp build_tools/ci/cpu_comparison/performance_publish.py . + git fetch origin gh-pages + git checkout gh-pages + python performance_publish.py results_npu4.json results_history_npu4.json results_history_npu4.html + git config user.name "github-actions" + git config user.email "github-actions@github.com" + git add results_history_npu4.json results_history_npu4.html + git commit -m "Update NPU4 performance results and deploy" + git push diff --git a/build_tools/ci/cpu_comparison/run.py b/build_tools/ci/cpu_comparison/run.py index f137fcaac..9607b07f6 100755 --- a/build_tools/ci/cpu_comparison/run.py +++ b/build_tools/ci/cpu_comparison/run.py @@ -358,6 +358,7 @@ def __init__( aie_compilation_flags=None, n_repeats=1, n_kernel_runs=1, + use_chess=False, ): aie_compilation_flags = ( [] if aie_compilation_flags is None else aie_compilation_flags @@ -375,6 +376,7 @@ def __init__( use_ukernel=use_ukernel, n_repeats=n_repeats, n_kernel_runs=n_kernel_runs, + use_chess=use_chess, ) self.name = f"matmul_benchmark_{M}_{N}_{K}_{input_type}_{acc_type}" @@ -1818,6 +1820,9 @@ def __init__(self): ) performance_tests = [ + ############## + # NPU1 Tests # + ############## { "M": 512, "N": 512, @@ -1993,6 +1998,39 @@ def __init__(self): "skip_numerics": True, "tile_pipeline": "pack-peel-4-level-tiling", }, + ############## + # NPU4 Tests # + ############## + { + "M": 512, + "N": 4096, + "K": 512, + "in_dtype": "i8", + "out_dtype": "i32", + "use_ukernel": True, + "peano_opt_level": 3, + "outline": "all", + "transpose_a": False, + "transpose_b": False, + "tile_pipeline": "pack-peel", + "run_on_target": "npu4", + }, + { + "M": 512, + "N": 4096, + "K": 512, + "in_dtype": "i8", + "out_dtype": "i32", + "use_ukernel": False, + "peano_opt_level": 3, + "outline": "all", + "outline_to_empty_function": True, + "transpose_a": False, + "transpose_b": False, + "tile_pipeline": "pack-peel", + "run_on_target": "npu4", + "skip_numerics": True, + }, ] # Some bf16 Performance tests: @@ -2006,15 +2044,27 @@ def __init__(self): transpose_a = test["transpose_a"] transpose_b = test["transpose_b"] tile_pipeline = test["tile_pipeline"] + run_on_target = ( + test["run_on_target"] if "run_on_target" in test else "npu1_4col" + ) + in_dtype = test["in_dtype"] if "in_dtype" in test else "bf16" + out_dtype = test["out_dtype"] if "out_dtype" in test else "f32" outlining_string = "--iree-amdaie-enable-function-outlining=" + outline peano_opt_level_string = f'"-O{peano_opt_level}"' + name_suffix = "O" + str(peano_opt_level) + name_suffix += "_" + run_on_target + aie_compilation_flags = [ outlining_string, f"--iree-amd-aie-additional-peano-opt-flags={peano_opt_level_string}", ] + if run_on_target == "npu4": + aie_compilation_flags.append("--iree-amdaie-num-rows=4") + aie_compilation_flags.append("--iree-amdaie-num-cols=8") + outline_to_empty_function = False empty_key = "outline_to_empty_function" if empty_key in test and test[empty_key] == True: @@ -2025,7 +2075,6 @@ def __init__(self): "--iree-amdaie-replace-outlined-functions-with-empty" ) - name_suffix = "O" + str(peano_opt_level) if outline != "none": if outline_to_empty_function: name_suffix += "_outline_empty" @@ -2057,8 +2106,9 @@ def __init__(self): M, N, K, - "bf16", - "f32", + in_dtype, + out_dtype, + run_on_target=run_on_target, tile_pipeline=tile_pipeline, use_ukernel=use_ukernel, n_repeats=2, @@ -2073,8 +2123,9 @@ def __init__(self): M, N, K, - "bf16", - "f32", + in_dtype, + out_dtype, + run_on_target=run_on_target, tile_pipeline=tile_pipeline, additional_labels=["Performance"], use_ukernel=use_ukernel, From 192ae41c57c4761645a57f0da00b44800c1a9092 Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Thu, 30 Jan 2025 17:17:37 +0100 Subject: [PATCH 2/2] Fix CI and rename performance files --- .github/workflows/ci-linux.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 622942061..1f7809bd4 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -225,11 +225,11 @@ jobs: --reset_npu_between_runs -v \ --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \ --xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \ - --tests=Performance > performance.log + --tests=Performance > performance_npu1.log # Print a summary of the findings. python build_tools/ci/cpu_comparison/performance_summarizer.py \ - performance.log results.json + performance_npu1.log results_npu1.json - name: XRT-LITE tests run: | @@ -245,10 +245,10 @@ jobs: cp build_tools/ci/cpu_comparison/performance_publish.py . git fetch origin gh-pages git checkout gh-pages - python performance_publish.py results.json results_history.json results_history.html + python performance_publish.py results_npu1.json results_history_npu1.json results_history_npu1.html git config user.name "github-actions" git config user.email "github-actions@github.com" - git add results_history.json results_history.html + git add results_history_npu1.json results_history_npu1.html git commit -m "Update NPU1 performance results and deploy" git push @@ -324,7 +324,7 @@ jobs: test_aie_vs_cpu \ $PWD/iree-install \ --peano_dir=$PWD/llvm-aie \ - --vitis_dir=/opt/Xilinx/Vitis/2024.2 \ + --vitis_dir=/opt/xilinx/Vitis/2024.2 \ --target_device="npu4" \ --reset_npu_between_runs -v \ --xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \