Skip to content

Commit

Permalink
Merge branch 'main' into matmul4d_ci
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhang93 authored Feb 17, 2025
2 parents 2fad3f9 + 5755830 commit f12a94e
Show file tree
Hide file tree
Showing 15 changed files with 717 additions and 433 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ jobs:
submodule update --init --recursive --depth 1 --single-branch -j 10
- name: Setup Cpp
uses: aminya/setup-cpp@12e62a1b8da8f1e66acc75305a2621234ef49dd0 # v0.46.0
uses: aminya/setup-cpp@abe2d67f8c619c5f4b9e40358430e33df461d5b8 # v0.46.2
with:
compiler: llvm
vcvarsall: true
Expand Down

This file was deleted.

110 changes: 20 additions & 90 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def run(self, config):
# does not).
if self.use_chess and not config.vitis_dir:
return False
if self.use_chess_for_ukernel and not config.vitis_dir:
if self.use_ukernel and self.use_chess_for_ukernel and not config.vitis_dir:
return False

# If use_chess=0, and config has not provided a valid
Expand Down Expand Up @@ -751,9 +751,9 @@ def _execute(self, config):
return True


class MatmulTrunci(BaseMatmul):
class MatmulScaleTrunci(BaseMatmul):
"""
A test of the form matmul(A,B) + trunci(C) where A:MxK, B:KxN and C:MxN
A test of the form matmul(A,B) + scale(C) + trunci(C) where A:MxK, B:KxN and C:MxN
"""

def __init__(
Expand All @@ -767,18 +767,17 @@ def __init__(
rhs,
expected_out,
test_params=None,
use_scaling=False,
):
super().__init__(
name=f"matmul_trunci_{M}_{N}_{K}_{input_type}_{acc_type}",
name=f"matmul_scale_trunci_{M}_{N}_{K}_{input_type}_{acc_type}",
test_params=test_params,
M=M,
N=N,
K=K,
input_type=input_type,
acc_type=acc_type,
)
self.labels.append("MatmulTrunci")
self.labels.append("MatmulScaleTrunci")

# Assertions on shapes: Check that lhs is MxK, rhs is KxN, and expected_out is MxN
assert lhs.shape == (M, K)
Expand All @@ -788,13 +787,10 @@ def __init__(
self.lhs = lhs
self.rhs = rhs
self.expected_out = expected_out
self.use_scaling = use_scaling

def _execute(self, config):
matmul_template_dir = config.file_dir / "matmul_template"
template_name = matmul_template_dir / "matmul_trunci_MxK_KxN.mlir"
if self.use_scaling:
template_name = matmul_template_dir / "matmul_trunci_scaling_MxK_KxN.mlir"
template_name = matmul_template_dir / "matmul_trunci_scaling_MxK_KxN.mlir"
self.generate(config, template_name)
filename = self.get_filename(config)
input_args = generate_inputs(
Expand Down Expand Up @@ -1601,78 +1597,10 @@ def __init__(self):
self.existing_names = []
self.tests = []

# Tests Matmul + Trunci.
# Phoenix : Ukernel + Peano.
self.register(
MatmulTrunci(
256,
128,
32,
"i8",
"i32",
1 * np.ones([256, 32], dtype=np.int8),
1 * np.ones([32, 128], dtype=np.int8),
32 * np.ones([256, 128], dtype=np.int8),
test_params=TestParams(
tile_pipeline="pack-peel-4-level-tiling",
run_on_target=["npu1_4col"],
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=4",
],
use_ukernel=True,
),
)
)
# Phoenix : Vectorization + Peano.
self.register(
MatmulTrunci(
256,
128,
32,
"i8",
"i32",
1 * np.ones([256, 32], dtype=np.int8),
1 * np.ones([32, 128], dtype=np.int8),
32 * np.ones([256, 128], dtype=np.int8),
test_params=TestParams(
tile_pipeline="pack-peel-4-level-tiling",
run_on_target=["npu1_4col"],
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=4",
],
),
)
)
# Strix : Ukernel + Chess.
self.register(
MatmulTrunci(
256,
128,
32,
"i8",
"i32",
1 * np.ones([256, 32], dtype=np.int8),
1 * np.ones([32, 128], dtype=np.int8),
32 * np.ones([256, 128], dtype=np.int8),
test_params=TestParams(
tile_pipeline="pack-peel-4-level-tiling",
run_on_target=["npu4"],
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
use_ukernel=True,
),
)
)

# Tests Matmul + Trunci with Scaling.
# Phoenix : Ukernel + Peano.
self.register(
MatmulTrunci(
MatmulScaleTrunci(
256,
256,
128,
Expand All @@ -1691,12 +1619,11 @@ def __init__(self):
],
use_ukernel=True,
),
use_scaling=True,
)
)
# Phoenix : Vectorization + Peano.
self.register(
MatmulTrunci(
MatmulScaleTrunci(
256,
256,
128,
Expand All @@ -1713,12 +1640,11 @@ def __init__(self):
"--iree-amdaie-num-cols=4",
],
),
use_scaling=True,
)
)
# Strix : Ukernel + Chess.
# Strix : Ukernel + Peano.
self.register(
MatmulTrunci(
MatmulScaleTrunci(
256,
256,
128,
Expand All @@ -1734,10 +1660,10 @@ def __init__(self):
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
use_chess=False,
use_ukernel=True,
use_chess_for_ukernel=False,
),
use_scaling=True,
)
)
# Matmul with truncf test(s):
Expand Down Expand Up @@ -1963,7 +1889,8 @@ def __init__(self):
"f32",
test_params=TestParams(
use_ukernel=True,
use_chess=True,
use_chess=False,
use_chess_for_ukernel=False,
run_on_target=["npu4"],
),
)
Expand All @@ -1978,11 +1905,12 @@ def __init__(self):
test_params=TestParams(
name_suffix="npu4_4x8",
use_ukernel=True,
use_chess=False,
use_chess_for_ukernel=False,
aie_compilation_flags=[
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
run_on_target=["npu4"],
),
)
Expand Down Expand Up @@ -2024,7 +1952,8 @@ def __init__(self):
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
use_chess=False,
use_chess_for_ukernel=False,
),
)
)
Expand All @@ -2044,7 +1973,8 @@ def __init__(self):
"--iree-amdaie-num-rows=4",
"--iree-amdaie-num-cols=8",
],
use_chess=True,
use_chess=False,
use_chess_for_ukernel=False,
),
)
)
Expand Down
17 changes: 17 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,23 @@ def AMDAIE_NpuDmaCpyNdOp: AMDAIE_Op<"npu.dma_cpy_nd", [
let hasCanonicalizer = 1;
}

def AMDAIE_NpuDmaPlaceHolderOp : AMDAIE_Op<"npu.dma_placeholder"> {
let summary = "Represents a placeholder for a DMA operation.";
let description = [{
This operation acts as a placeholder user for `amdaie.connection` operations to prevent
them from being dead-code eliminated. This is used for control flow connections that are
inserted before control packets are generated because they need to be taken into account
together with data connections for routing. This operation does not have any side effects
on control code size.
}];

let arguments = (
ins Index:$connection
);

let assemblyFormat = [{ `(` $connection `)` attr-dict }];
}

def AMDAIE_NpuHalfDmaCpyNdOp
: AMDAIE_Op<"npu.half_dma_cpy_nd", [AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
let summary = "The NPU uController's DMA operation, operating on a single port";
Expand Down
Loading

0 comments on commit f12a94e

Please sign in to comment.