Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Add initial Strix matmul performance tests #1069

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
@@ -249,7 +249,7 @@ jobs:
git config user.name "github-actions"
git config user.email "[email protected]"
git add results_history.json results_history.html
git commit -m "Update performance results and deploy"
git commit -m "Update NPU1 performance results and deploy"
git push
test_linux_strix:
@@ -314,3 +314,37 @@ jobs:
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
-v
# Run the 'Performance' tests. These do not check numerical correctness,
# just measure the time to run some workloads.
- name : Performance benchmarks
run: |
source .venv/bin/activate
python build_tools/ci/cpu_comparison/run.py \
test_aie_vs_cpu \
$PWD/iree-install \
--peano_dir=$PWD/llvm-aie \
--vitis_dir=/opt/Xilinx/Vitis/2024.2 \
--target_device="npu4" \
--reset_npu_between_runs -v \
--xrt_lite_n_core_rows=$XRT_LITE_N_CORE_ROWS \
--xrt_lite_n_core_cols=$XRT_LITE_N_CORE_COLS \
--tests=Performance > performance_npu4.log
# Print a summary of the findings.
python build_tools/ci/cpu_comparison/performance_summarizer.py \
performance_npu4.log results_npu4.json
# Only publish the performance results on main branch pushes.
- name: Publish performance results
if: github.event_name == 'push' && github.ref_name == 'main'
run: |
cp build_tools/ci/cpu_comparison/performance_publish.py .
git fetch origin gh-pages
git checkout gh-pages
python performance_publish.py results_npu4.json results_history_npu4.json results_history_npu4.html
git config user.name "github-actions"
git config user.email "[email protected]"
git add results_history_npu4.json results_history_npu4.html
git commit -m "Update NPU4 performance results and deploy"
git push
61 changes: 56 additions & 5 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
@@ -358,6 +358,7 @@ def __init__(
aie_compilation_flags=None,
n_repeats=1,
n_kernel_runs=1,
use_chess=False,
):
aie_compilation_flags = (
[] if aie_compilation_flags is None else aie_compilation_flags
@@ -375,6 +376,7 @@ def __init__(
use_ukernel=use_ukernel,
n_repeats=n_repeats,
n_kernel_runs=n_kernel_runs,
use_chess=use_chess,
)

self.name = f"matmul_benchmark_{M}_{N}_{K}_{input_type}_{acc_type}"
@@ -1818,6 +1820,9 @@ def __init__(self):
)

performance_tests = [
##############
# NPU1 Tests #
##############
{
"M": 512,
"N": 512,
@@ -1993,6 +1998,39 @@ def __init__(self):
"skip_numerics": True,
"tile_pipeline": "pack-peel-4-level-tiling",
},
##############
# NPU4 Tests #
##############
{
"M": 512,
"N": 4096,
"K": 512,
"in_dtype": "i8",
"out_dtype": "i32",
"use_ukernel": True,
"peano_opt_level": 3,
"outline": "all",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
"run_on_target": "npu4",
},
{
"M": 512,
"N": 4096,
"K": 512,
"in_dtype": "i8",
"out_dtype": "i32",
"use_ukernel": False,
"peano_opt_level": 3,
"outline": "all",
"outline_to_empty_function": True,
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
"run_on_target": "npu4",
"skip_numerics": True,
},
]

# Some bf16 Performance tests:
@@ -2006,15 +2044,27 @@ def __init__(self):
transpose_a = test["transpose_a"]
transpose_b = test["transpose_b"]
tile_pipeline = test["tile_pipeline"]
run_on_target = (
test["run_on_target"] if "run_on_target" in test else "npu1_4col"
)
in_dtype = test["in_dtype"] if "in_dtype" in test else "bf16"
out_dtype = test["out_dtype"] if "out_dtype" in test else "f32"

outlining_string = "--iree-amdaie-enable-function-outlining=" + outline

peano_opt_level_string = f'"-O{peano_opt_level}"'
name_suffix = "O" + str(peano_opt_level)
name_suffix += "_" + run_on_target

aie_compilation_flags = [
outlining_string,
f"--iree-amd-aie-additional-peano-opt-flags={peano_opt_level_string}",
]

if run_on_target == "npu4":
aie_compilation_flags.append("--iree-amdaie-num-rows=4")
aie_compilation_flags.append("--iree-amdaie-num-cols=8")

outline_to_empty_function = False
empty_key = "outline_to_empty_function"
if empty_key in test and test[empty_key] == True:
@@ -2025,7 +2075,6 @@ def __init__(self):
"--iree-amdaie-replace-outlined-functions-with-empty"
)

name_suffix = "O" + str(peano_opt_level)
if outline != "none":
if outline_to_empty_function:
name_suffix += "_outline_empty"
@@ -2057,8 +2106,9 @@ def __init__(self):
M,
N,
K,
"bf16",
"f32",
in_dtype,
out_dtype,
run_on_target=run_on_target,
tile_pipeline=tile_pipeline,
use_ukernel=use_ukernel,
n_repeats=2,
@@ -2073,8 +2123,9 @@ def __init__(self):
M,
N,
K,
"bf16",
"f32",
in_dtype,
out_dtype,
run_on_target=run_on_target,
tile_pipeline=tile_pipeline,
additional_labels=["Performance"],
use_ukernel=use_ukernel,

Unchanged files with check annotations Beta

rhs, VectorOfShapeAndType<[8, 4], BF16>,
acc, VectorOfShapeAndType<[4, 4], F32>>]>>;
class isOperandResultTypePairValidForAIE2MulElem<string lhs, string rhs, string acc> :

Check warning on line 149 in compiler/plugins/target/AMD-AIE/aievec/AIEVecTypeConstraints.td

GitHub Actions / Build and Test (windows, ASSERTIONS)

unused template argument: isOperandResultTypePairValidForAIE2MulElem:rhs

Check warning on line 149 in compiler/plugins/target/AMD-AIE/aievec/AIEVecTypeConstraints.td

GitHub Actions / Build and Test (windows, ASSERTIONS)

unused template argument: isOperandResultTypePairValidForAIE2MulElem:rhs

Check warning on line 149 in compiler/plugins/target/AMD-AIE/aievec/AIEVecTypeConstraints.td

GitHub Actions / Build and Test (windows, ASSERTIONS)

unused template argument: isOperandResultTypePairValidForAIE2MulElem:rhs

Check warning on line 149 in compiler/plugins/target/AMD-AIE/aievec/AIEVecTypeConstraints.td

GitHub Actions / Build and Test (windows, ASSERTIONS)

unused template argument: isOperandResultTypePairValidForAIE2MulElem:rhs
PredOpTrait<acc # " type is a valid accumulator type given the type of the" #
" operands.",
Or<[