Skip to content

Commit

Permalink
Update mlir-air and mlir-aie version; add bf16 conv2d board test (nod…
Browse files Browse the repository at this point in the history
  • Loading branch information
erwei-xilinx authored Jul 5, 2024
1 parent 2210796 commit 485f670
Show file tree
Hide file tree
Showing 22 changed files with 98 additions and 89 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
run: |
python3 -m venv .venv
source .venv/bin/activate
pip install https://github.com/Xilinx/mlir-aie/releases/download/latest-wheels/mlir_aie-0.0.1.2024062422+7917990-py3-none-manylinux_2_35_x86_64.whl
pip install https://github.com/Xilinx/mlir-aie/releases/download/latest-wheels/mlir_aie-0.0.1.2024070222+76460fe-py3-none-manylinux_2_35_x86_64.whl
pip install -r tests/matmul/requirements.txt
Expand Down
6 changes: 3 additions & 3 deletions build_tools/ci/cpu_comparison/run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,6 @@ function run_test() {
# Example of running a test directly from an .mlir file with a function.
run_test --test_file ${THIS_DIR}/test_files/matmul_int32.mlir

# Example of running a test directly from an .mlir file with a function.
run_test --test_file ${THIS_DIR}/test_files/conv_int32.mlir --pipeline "conv-decompose"

# An example of an arbitrary graph with three matmuls which form three dispatches.
run_test --test_file ${THIS_DIR}/test_files/three_matmuls.mlir

Expand Down Expand Up @@ -358,4 +355,7 @@ generate_matmul_test \
--m "1024" --n "1024" --k "512"
run_test --test_file ${test_name} --pipeline "pack-peel"

# Conv2d tests.
run_test --test_file ${THIS_DIR}/test_files/conv_int32.mlir --pipeline "conv-decompose"
run_test --test_file ${THIS_DIR}/test_files/conv_bf16.mlir --pipeline "conv-decompose"

11 changes: 11 additions & 0 deletions build_tools/ci/cpu_comparison/test_files/conv_bf16.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// These 2 lines are required by the script which generates input data:
// input 2x14x14x32xbf16
// input 3x3x32x64xbf16

func.func @conv_2d_nhwc_hwcf(%arg0: tensor<2x14x14x32xbf16>, %arg1: tensor<3x3x32x64xbf16>) -> tensor<2x12x12x64xf32> {
%cst = arith.constant 0.0 : f32
%0 = tensor.empty() : tensor<2x12x12x64xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<2x12x12x64xf32>) -> tensor<2x12x12x64xf32>
%2 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %arg1 : tensor<2x14x14x32xbf16>, tensor<3x3x32x64xbf16>) outs(%1 : tensor<2x12x12x64xf32>) -> tensor<2x12x12x64xf32>
return %2 : tensor<2x12x12x64xf32>
}
4 changes: 2 additions & 2 deletions build_tools/ci/print_ir_aie2xclbin/basic_dma_transpose.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#
aie.end
}
func.func @dummy2(%arg0: memref<4096xi32>, %arg1: memref<4096xi32>, %arg2: memref<4096xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 4096][0, 0, 0]) {id = 0 : i64, metadata = @out} : memref<4096xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 64, 64, 1][1, 1, 64]) {id = 1 : i64, metadata = @in} : memref<4096xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 4096][0, 0, 0, 1]) {id = 0 : i64, metadata = @out} : memref<4096xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 64, 64, 1][1, 1, 64, 1]) {id = 1 : i64, metadata = @in} : memref<4096xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
Expand Down
12 changes: 6 additions & 6 deletions build_tools/ci/print_ir_aie2xclbin/buffers_xclbin.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,14 @@ module attributes {hal.device.targets = [#hal.device.target<"amd-aie-direct", [#
aie.shim_dma_allocation @out2(S2MM, 2, 0)

func.func @dummy2(%arg0: memref<1024xi32>, %arg1: memref<1024xi32>, %arg2: memref<1024xi32>, %arg3: memref<1024xi32>, %arg4: memref<1024xi32>, %arg5: memref<1024xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 0 : i64, metadata = @in0} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 1 : i64, metadata = @out0} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 0 : i64, metadata = @in0} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 1 : i64, metadata = @out0} : memref<1024xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in1} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg3[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out1} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 2 : i64, metadata = @in1} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg3[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 3 : i64, metadata = @out1} : memref<1024xi32>
aiex.npu.sync {channel = 1 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
aiex.npu.dma_memcpy_nd(0, 0, %arg4[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 2 : i64, metadata = @in2} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg5[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0]) {id = 3 : i64, metadata = @out2} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg4[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 2 : i64, metadata = @in2} : memref<1024xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg5[0, 0, 0, 0][1, 1, 1, 1024][0, 0, 0, 1]) {id = 3 : i64, metadata = @out2} : memref<1024xi32>
aiex.npu.sync {channel = 0 : i32, column = 2 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
Expand Down
16 changes: 8 additions & 8 deletions compiler/plugins/target/AMD-AIE/aie/AMDAIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ struct DmaToNpuPattern : OpConversionPattern<NpuDmaMemcpyNdOp> {
auto issue_token = BoolAttr::get(ctx, false);
auto repeat_count = zero;

llvm::SmallVector<int64_t, 3> strides = llvm::map_to_vector(
llvm::SmallVector<int64_t, 4> strides = llvm::map_to_vector(
llvm::reverse(op.getMixedStrides()),
[](OpFoldResult s) { return getConstantIntValue(s).value(); });
llvm::SmallVector<int64_t, 4> sizes = llvm::map_to_vector(
Expand Down Expand Up @@ -359,25 +359,25 @@ struct DmaToNpuPattern : OpConversionPattern<NpuDmaMemcpyNdOp> {
buffer_offset = IntegerAttr::get(i32ty, offset);

// d0_size
if (strides[0]) d0_size = IntegerAttr::get(i32ty, sizes[0]);
if (strides[1]) d0_size = IntegerAttr::get(i32ty, sizes[0]);

// d0_stride
d0_stride = IntegerAttr::get(i32ty, 0);
if (strides[0]) d0_stride = IntegerAttr::get(i32ty, strides[0] - 1);

// d1_size
if (strides[1]) d1_size = IntegerAttr::get(i32ty, sizes[1]);
if (strides[2]) d1_size = IntegerAttr::get(i32ty, sizes[1]);

// d1_stride
if (strides[0]) d1_stride = IntegerAttr::get(i32ty, strides[0] - 1);
if (strides[1]) d1_stride = IntegerAttr::get(i32ty, strides[1] - 1);

// d2_stride
if (strides[1]) d2_stride = IntegerAttr::get(i32ty, strides[1] - 1);
if (strides[2]) d2_stride = IntegerAttr::get(i32ty, strides[2] - 1);

// iteration_size
if (strides[2]) iteration_size = IntegerAttr::get(i32ty, sizes[3] - 1);
if (strides[3]) iteration_size = IntegerAttr::get(i32ty, sizes[3] - 1);

// iteration_stride
if (strides[2]) iteration_stride = IntegerAttr::get(i32ty, strides[2] - 1);
if (strides[3]) iteration_stride = IntegerAttr::get(i32ty, strides[3] - 1);

// valid_bd
valid_bd = IntegerAttr::get(i32ty, 1);
Expand Down
4 changes: 2 additions & 2 deletions compiler/plugins/target/AMD-AIE/aie/test/aiert_insts.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ module {
%c8 = arith.constant 8 : i64
%c16 = arith.constant 16 : i64
%c32 = arith.constant 32 : i64
aiex.npu.dma_memcpy_nd(0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c32][%c0,%c0,%c0]) { metadata = @of_toMem, id = 1 : i64 } : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %in[%c0,%c2,%c0,%c0][%c1,%c2,%c2,%c8][%c0,%c16,%c8]) { metadata = @of_fromMem, id = 0 : i64 } : memref<4x2x8xi32>
aiex.npu.dma_memcpy_nd(0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c32][%c0,%c0,%c0,%c1]) { metadata = @of_toMem, id = 1 : i64 } : memref<64xi32>
aiex.npu.dma_memcpy_nd(0, 0, %in[%c0,%c2,%c0,%c0][%c1,%c2,%c2,%c8][%c0,%c16,%c8,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<4x2x8xi32>
return
}
aie.shim_dma_allocation @of_fromMem (MM2S, 0, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @dma_and_wait(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
return
}
Expand Down
8 changes: 4 additions & 4 deletions compiler/plugins/target/AMD-AIE/aie/test/dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module {
memref.global "public" @toMem : memref<16xi32>
memref.global "public" @fromMem : memref<16xi32>
func.func @dma_memcpy_nd_0(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
return
}
aie.shim_dma_allocation @fromMem (MM2S, 0, 0)
Expand All @@ -39,7 +39,7 @@ module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @dma_wait_s2mm(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
return
}
Expand All @@ -61,7 +61,7 @@ module {
aie.device(npu1_4col) {
memref.global "public" @toMem : memref<16xi32>
func.func @dma_wait_mm2s(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_wait {symbol = @toMem}
return
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module {
memref.global "public" @toMem : memref<16xi32>
memref.global "public" @fromMem : memref<16xi32>
func.func @test1(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64]) { metadata = @toMem, id = 1 : i64, issue_token = true } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64, issue_token = true } : memref<16xi32>
aiex.npu.dma_memcpy_nd(0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
return
}
aie.shim_dma_allocation @fromMem (MM2S, 0, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ aie.device(npu1_4col) {
memref.assume_alignment %arg0, 64 : memref<16x8xi32>
memref.assume_alignment %arg1, 64 : memref<8x16xi32>
memref.assume_alignment %arg2, 64 : memref<16x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 8][0, 0, 8]) {id = 0 : i64, metadata = @airMemcpyId4} : memref<16x8xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 8, 16][0, 0, 16]) {id = 1 : i64, metadata = @airMemcpyId5} : memref<8x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16]) {id = 2 : i64, metadata = @airMemcpyId12} : memref<16x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 8][0, 0, 8, 1]) {id = 0 : i64, metadata = @airMemcpyId4} : memref<16x8xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 8, 16][0, 0, 16, 1]) {id = 1 : i64, metadata = @airMemcpyId5} : memref<8x16xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16, 1]) {id = 2 : i64, metadata = @airMemcpyId12} : memref<16x16xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,9 @@ aie.device(npu1_4col) {
memref.assume_alignment %arg0, 64 : memref<256xi32>
memref.assume_alignment %arg1, 64 : memref<512xi32>
memref.assume_alignment %arg2, 64 : memref<16x32xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16]) {id = 0 : i64, metadata = @airMemcpyId4} : memref<256xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 32, 16][0, 0, 16]) {id = 1 : i64, metadata = @airMemcpyId5} : memref<512xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 16, 32][0, 0, 32]) {id = 2 : i64, metadata = @airMemcpyId12} : memref<16x32xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 16, 1]) {id = 0 : i64, metadata = @airMemcpyId4} : memref<256xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg1[0, 0, 0, 0][1, 1, 32, 16][0, 0, 16, 1]) {id = 1 : i64, metadata = @airMemcpyId5} : memref<512xi32>
aiex.npu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 16, 32][0, 0, 32, 1]) {id = 2 : i64, metadata = @airMemcpyId12} : memref<16x32xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
return
}
Expand Down
Loading

0 comments on commit 485f670

Please sign in to comment.