Skip to content

Commit

Permalink
[CI] Modify matmul4d to use total size as input args (#1112)
Browse files Browse the repository at this point in the history
Before this PR, input args M/N/K are the outer dims for matmul4d ops.
For easier comparison with standard matmul, this PR keeps the use of
M/N/K as total matmul input sizes, while adding inner dim sizes as
another parameter.
  • Loading branch information
yzhang93 authored Feb 17, 2025
1 parent 44cda58 commit d2ccc89
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 41 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// input ${M1}x${K1}x${M0}x${K0}x${TYPE1}
// input ${N1}x${K1}x${K0}x${N0}x${TYPE1}

func.func @matmul4d(%arg0: tensor<${M1}x${K1}x${M0}x${K0}x${TYPE1}>, %arg1: tensor<${N1}x${K1}x${K0}x${N0}x${TYPE1}>) -> tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}> {
%cst = arith.constant ${ZERO} : ${TYPE2}
%0 = tensor.empty() : tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>
%1 = linalg.fill ins(%cst : ${TYPE2}) outs(%0 : tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>) -> tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>
%2 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d3, d5)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d2, d5, d4)>, affine_map<(d0, d1, d2, d3, d4, d5) -> (d1, d0, d3, d4)>], iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<${M1}x${K1}x${M0}x${K0}x${TYPE1}>, tensor<${N1}x${K1}x${K0}x${N0}x${TYPE1}>) outs(%1 : tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>) {
^bb0(%in: ${TYPE1}, %in_1: ${TYPE1}, %out: ${TYPE2}):
%12 = ${EXT} %in : ${TYPE1} to ${TYPE2}
%13 = ${EXT} %in_1 : ${TYPE1} to ${TYPE2}
%14 = ${MUL} %12, %13 : ${TYPE2}
%15 = ${ADD} %out, %14 : ${TYPE2}
linalg.yield %15 : ${TYPE2}
} -> tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>
return %2 : tensor<${N1}x${M1}x${M0}x${N0}x${TYPE2}>
}

This file was deleted.

20 changes: 18 additions & 2 deletions build_tools/ci/cpu_comparison/matmul_template/matmul_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ def get_higher_order_element_type(element_type):
assert False, f"support for {element_type} is missing"


def generate_matmul_test(output_fn, input_fn, m, n, k, lhs_rhs_type, acc_type, b=0):
def generate_matmul_test(
output_fn, input_fn, m, n, k, lhs_rhs_type, acc_type, b=0, m0=0, n0=0, k0=0
):
"""
Generate mlir file (output_fn) from the template file (input_fn).
"""
Expand All @@ -23,13 +25,27 @@ def generate_matmul_test(output_fn, input_fn, m, n, k, lhs_rhs_type, acc_type, b
# Only used for Matmul+Trunc via scaling.
replace["TYPE_MUL_RESULT"] = get_higher_order_element_type(acc_type)

replace["B"] = b # This is only used for batch matmul
acc_is_int = acc_type[0] == "i"
replace["ZERO"] = 0 if acc_is_int else 0.0
replace["ADD"] = "arith.addi" if acc_is_int else "arith.addf"
replace["MUL"] = "arith.muli" if acc_is_int else "arith.mulf"
replace["EXT"] = "arith.extsi" if acc_is_int else "arith.extf"

# This is only used for batch matmul.
replace["B"] = b

# m0, n0, k0 are only used for matmul4d as inner dim sizes.
replace["M0"] = m0
replace["N0"] = n0
replace["K0"] = k0
# matmul4d outer dim sizes can be calculated by `total_size/inner_dim_size`.
if m0 != 0:
replace["M1"] = int(m / m0)
if n0 != 0:
replace["N1"] = int(n / n0)
if k0 != 0:
replace["K1"] = int(k / k0)

key_map = map(lambda s: "${" + s + "}", replace.keys())
key_map_escaped = map(re.escape, key_map)
regex = re.compile("|".join(key_map_escaped))
Expand Down
58 changes: 36 additions & 22 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,14 +457,17 @@ def _execute(self, config):

class Matmul4d(BaseMatmul):
"""
A test of linalg.generic with 4d inputs and output implementing form:
C += matmul4d(A,B) where A:MxKxM0xK0, B:NxKxK0xN0, C:NxMxM0xN0
Note that the outer dims for this operation are transposed to make sure
successful compilation through LogicalObjectFifo pipeline.
For comparison purpose, the input values of inner dims M0/N0/K0 are
fixed as 32/32/64 currently.
TODO(vivian): Generalize the class and the template.
A test of linalg.generic with 4d inputs and output, following the form:
C += matmul4d(A,B) where A:M1xK1xM0xK0, B:N1xK1xK0xN0, C:N1xM1xM0xN0
-- M0/N0/K0 are inner dim sizes, currently fixed at 32/32/64 for comparison purpose.
-- M1/N1/K1 are outer dim sizes.
Note that the outer dims for this operation are transposed to make sure
successful compilation through LogicalObjectFifo pipeline.
-- The input parameters M/N/K are the total size which equals to the product
of outer and inner dim sizes.
"""

def __init__(
Expand All @@ -474,6 +477,9 @@ def __init__(
K,
input_type,
acc_type,
M0=32,
N0=32,
K0=64,
additional_labels=None,
n_kernel_runs=1,
test_params=None,
Expand All @@ -489,6 +495,9 @@ def __init__(
function_name="matmul4d",
n_kernel_runs=n_kernel_runs,
)
self.M0 = M0
self.N0 = N0
self.K0 = K0
self.labels.append("Matmul4d")
if additional_labels:
self.labels += additional_labels
Expand All @@ -500,8 +509,19 @@ def __init__(

def _execute(self, config):
matmul_template_dir = config.file_dir / "matmul_template"
template_name = matmul_template_dir / "matmul4d_MxKxM0xK0_NxKxK0xN0.mlir"
self.generate(config, template_name)
template_name = matmul_template_dir / "matmul4d_M1xK1xM0xK0_N1xK1xK0xN0.mlir"
generate_matmul_test(
self.get_filename(config),
template_name,
m=self.M,
n=self.N,
k=self.K,
lhs_rhs_type=self.input_type,
acc_type=self.acc_type,
m0=self.M0,
n0=self.N0,
k0=self.K0,
)
if self.run_benchmark:
return self.benchmark(config)

Expand Down Expand Up @@ -2149,13 +2169,10 @@ def __init__(self):
"transpose_b": False,
"tile_pipeline": "pack-peel-4-level-tiling",
},
# matmul4d test where the input M/N/K are outer dim values.
# The total input values correspond to a standard matmul
# from the above test are M:512, N:4096, K:512.
{
"M": 16,
"N": 128,
"K": 8,
"M": 512,
"N": 4096,
"K": 512,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": "balanced",
Expand Down Expand Up @@ -2227,13 +2244,10 @@ def __init__(self):
"tile_pipeline": "pack-peel-4-level-tiling",
"run_on_target": "npu4",
},
# matmul4d test where the input M/N/K are outer dim values.
# The total input values correspond to a standard matmul
# from the above test are M:512, N:4096, K:512.
{
"M": 16,
"N": 128,
"K": 8,
"M": 512,
"N": 4096,
"K": 512,
"in_dtype": "i8",
"out_dtype": "i32",
"use_ukernel": True,
Expand Down

0 comments on commit d2ccc89

Please sign in to comment.