Skip to content

[DAP] Support both f32 and f64 type for 'dap.fir' operation. #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 108 additions & 85 deletions benchmarks/AudioProcessing/Operations/FIROp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,94 +1,96 @@
#-------------------------------------------------------------------------------
# Generate MLIRFIRScalar
# Generate BuddyFIRTilesVectorization
#-------------------------------------------------------------------------------
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
function(build_buddy_tile_vectorization vector_size tile_size unroll_factor)
add_custom_command(
OUTPUT buddy_vec_${vector_size}_tile_${tile_size}_unroll_${unroll_factor}.o
COMMAND
cat ${BUDDY_SOURCE_DIR}/benchmarks/AudioProcessing/Operations/FIROp/FIR.mlir |
sed -e 's/@buddy_fir_f32/@buddy_fir_vs_${vector_size}_ts_${tile_size}_uf_${unroll_factor}_f32/g'
-e 's/@buddy_fir_f64/@buddy_fir_vs_${vector_size}_ts_${tile_size}_uf_${unroll_factor}_f64/g' |
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
-vectorize-dap="fir-vec-size=${vector_size};fir-tile-size=${tile_size};fir-unroll-factor=${unroll_factor}"
-convert-scf-to-cf
-convert-vector-to-llvm
-llvm-request-c-wrappers
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llc -O3
-mtriple=${BUDDY_OPT_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/AudioProcessing/Operations/FIROp/buddy_vec_${vector_size}_tile_${tile_size}_unroll_${unroll_factor}.o
DEPENDS
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${LLVM_MLIR_BINARY_DIR}/mlir-translate
${LLVM_MLIR_BINARY_DIR}/llc
)
add_library(BuddyVs${vector_size}Ts${tile_size}Uf${unroll_factor} STATIC buddy_vec_${vector_size}_tile_${tile_size}_unroll_${unroll_factor}.o)
set_target_properties(BuddyVs${vector_size}Ts${tile_size}Uf${unroll_factor} PROPERTIES LINKER_LANGUAGE CXX)
endfunction()

add_custom_command(
OUTPUT mlir-fir.o
COMMAND
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${BUDDY_SOURCE_DIR}/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir
-convert-scf-to-cf
-llvm-request-c-wrappers
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llc
-mtriple=${BUDDY_OPT_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/AudioProcessing/Operations/FIROp/mlir-fir.o
DEPENDS
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${LLVM_MLIR_BINARY_DIR}/mlir-translate
${LLVM_MLIR_BINARY_DIR}/llc
)

add_library(MLIRFIRScalar STATIC mlir-fir.o)
set_target_properties(MLIRFIRScalar PROPERTIES LINKER_LANGUAGE CXX)

#-------------------------------------------------------------------------------
# Generate MLIRFIRTiledVectorization
#-------------------------------------------------------------------------------

add_custom_command(
OUTPUT fir-tile-vectorization.o
COMMAND
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${BUDDY_SOURCE_DIR}/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRTiledVectorization.mlir
-convert-scf-to-cf
-convert-vector-to-llvm
-llvm-request-c-wrappers
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llc
-mtriple=${BUDDY_OPT_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/AudioProcessing/Operations/FIROp/fir-tile-vectorization.o
DEPENDS
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${LLVM_MLIR_BINARY_DIR}/mlir-translate
${LLVM_MLIR_BINARY_DIR}/llc
)

add_library(MLIRFIRTiledVectorization STATIC fir-tile-vectorization.o)
set_target_properties(MLIRFIRTiledVectorization PROPERTIES LINKER_LANGUAGE CXX)
build_buddy_tile_vectorization(8 64 1)
build_buddy_tile_vectorization(8 128 1)
build_buddy_tile_vectorization(8 256 1)
build_buddy_tile_vectorization(8 512 1)
build_buddy_tile_vectorization(8 1024 1)
build_buddy_tile_vectorization(8 2048 1)
build_buddy_tile_vectorization(8 4096 1)
build_buddy_tile_vectorization(8 8192 1)
build_buddy_tile_vectorization(16 64 1)
build_buddy_tile_vectorization(16 128 1)
build_buddy_tile_vectorization(16 216 1)
build_buddy_tile_vectorization(16 240 1)
build_buddy_tile_vectorization(16 256 1)
build_buddy_tile_vectorization(16 512 1)
build_buddy_tile_vectorization(16 512 2)
build_buddy_tile_vectorization(16 512 4)
build_buddy_tile_vectorization(16 512 8)
build_buddy_tile_vectorization(16 512 16)
build_buddy_tile_vectorization(16 1024 1)
build_buddy_tile_vectorization(16 2048 1)
build_buddy_tile_vectorization(16 4096 1)
build_buddy_tile_vectorization(16 8192 1)

#-------------------------------------------------------------------------------
# Generate MLIRFIRVectorization
#-------------------------------------------------------------------------------

add_custom_command(
OUTPUT fir-vectorization.o
COMMAND
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${BUDDY_SOURCE_DIR}/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir
-convert-scf-to-cf
-convert-vector-to-llvm
-llvm-request-c-wrappers
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llc
-mtriple=${BUDDY_OPT_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/AudioProcessing/Operations/FIROp/fir-vectorization.o
DEPENDS
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${LLVM_MLIR_BINARY_DIR}/mlir-translate
${LLVM_MLIR_BINARY_DIR}/llc
)
function(build_fir_vectorization type)
add_custom_command(
OUTPUT fir-vectorization-${type}.o
COMMAND
cat ${BUDDY_SOURCE_DIR}/benchmarks/AudioProcessing/Operations/FIROp/MLIRFIRVectorization.mlir |
sed 's/TYPE_PLACEHOLDER/${type}/g' |
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
-convert-scf-to-cf
-convert-vector-to-llvm
-llvm-request-c-wrappers
-convert-arith-to-llvm
-finalize-memref-to-llvm
-convert-func-to-llvm
-reconcile-unrealized-casts |
${LLVM_MLIR_BINARY_DIR}/mlir-translate -mlir-to-llvmir |
${LLVM_MLIR_BINARY_DIR}/llc
-mtriple=${BUDDY_OPT_TRIPLE}
-mattr=${BUDDY_OPT_ATTR}
-filetype=obj
-o ${BUDDY_BINARY_DIR}/../benchmarks/AudioProcessing/Operations/FIROp/fir-vectorization-${type}.o
DEPENDS
${BUDDY_MLIR_BUILD_DIR}/bin/buddy-opt
${LLVM_MLIR_BINARY_DIR}/mlir-translate
${LLVM_MLIR_BINARY_DIR}/llc
)
add_library(MLIRFIRVectorization${type} STATIC fir-vectorization-${type}.o)
set_target_properties(MLIRFIRVectorization${type} PROPERTIES LINKER_LANGUAGE CXX)
endfunction()

add_library(MLIRFIRVectorization STATIC fir-vectorization.o)
set_target_properties(MLIRFIRVectorization PROPERTIES LINKER_LANGUAGE CXX)
build_fir_vectorization(f32)
build_fir_vectorization(f64)

#-------------------------------------------------------------------------------
# Generate dap-op-fir-benchmark
Expand All @@ -105,10 +107,31 @@ target_link_libraries(dap-op-fir-benchmark PRIVATE
# Third-party library
kfr_io
# MLIR hand-written benchmark
MLIRFIRScalar
MLIRFIRTiledVectorization
MLIRFIRVectorization
MLIRFIRVectorizationf32
MLIRFIRVectorizationf64
# Buddy DAP library
BuddyVs8Ts64Uf1
BuddyVs8Ts128Uf1
BuddyVs8Ts256Uf1
BuddyVs8Ts512Uf1
BuddyVs8Ts1024Uf1
BuddyVs8Ts2048Uf1
BuddyVs8Ts4096Uf1
BuddyVs8Ts8192Uf1
BuddyVs16Ts64Uf1
BuddyVs16Ts128Uf1
BuddyVs16Ts216Uf1
BuddyVs16Ts240Uf1
BuddyVs16Ts256Uf1
BuddyVs16Ts512Uf1
BuddyVs16Ts512Uf2
BuddyVs16Ts512Uf4
BuddyVs16Ts512Uf8
BuddyVs16Ts512Uf16
BuddyVs16Ts1024Uf1
BuddyVs16Ts2048Uf1
BuddyVs16Ts4096Uf1
BuddyVs16Ts8192Uf1
BuddyLibDAP
# LLVM/MLIR library
StaticMLIRCRunnerUtils
Expand Down
29 changes: 29 additions & 0 deletions benchmarks/AudioProcessing/Operations/FIROp/FIR.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===- FIR.mlir -----------------------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This file provides the `dap.fir` operations with various types.
//
//===----------------------------------------------------------------------===//

func.func @buddy_fir_f32(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () {
dap.fir %in, %filter, %out : memref<?xf32>, memref<?xf32>, memref<?xf32>
return
}

func.func @buddy_fir_f64(%in : memref<?xf64>, %filter : memref<?xf64>, %out : memref<?xf64>) -> () {
dap.fir %in, %filter, %out : memref<?xf64>, memref<?xf64>, memref<?xf64>
return
}
3 changes: 2 additions & 1 deletion benchmarks/AudioProcessing/Operations/FIROp/MLIRFIR.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
//
//===----------------------------------------------------------------------===//
//
// This file provides the MLIR Fir function.
// This file implements the scalar version of the Fir function, following the
// same algorithm as Buddy's scalar version DAP pass: `--lower-dap`.
//
//===----------------------------------------------------------------------===//

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
//
//===----------------------------------------------------------------------===//
//
// This file provides the vectorized MLIR FIR function with tiling.
// This file implements the vectorized FIR function using a tiling technique.
// following the same algorithm as Buddy's vectorize DAP pass:
// `--vectorize-dap="fir-vec-size=16 fir-tile-size=2048"`
//
//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -110,7 +112,7 @@ func.func @fir_tiled_vectorization(%input : memref<?xf32>, %kernel : memref<?xf3
scf.for %i = %address to %upbound step %vl_step {
%in_vec = vector.load %input[%i] : memref<?xf32>, vector<16xf32>
%out_index = arith.addi %i, %n : index
%out_vec = vector.load %output[%out_index] : memref<?xf32>, vector<16xf32> // 需要计算output的偏移量
%out_vec = vector.load %output[%out_index] : memref<?xf32>, vector<16xf32>
%fma_vec = vector.fma %k_vec, %in_vec, %out_vec : vector<16xf32>
vector.store %fma_vec, %output[%out_index] : memref<?xf32>, vector<16xf32>
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
//
//===----------------------------------------------------------------------===//
//
// This file provides the vectorized MLIR FIR function (without tiling).
// This file implements the vectorized MLIR FIR function (without tiling),
// with a fixed vector size of 16.
//
//===----------------------------------------------------------------------===//

func.func @fir_vectorization(%input : memref<?xf32>, %kernel : memref<?xf32>,
%output : memref<?xf32>) -> () {
func.func @fir_vector_TYPE_PLACEHOLDER(%input : memref<?xTYPE_PLACEHOLDER>,
%kernel : memref<?xTYPE_PLACEHOLDER>, %output : memref<?xTYPE_PLACEHOLDER>) -> () {
// 1. Get the total length of the workload.
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%input_size = memref.dim %input, %c0 : memref<?xf32>
%kernel_size = memref.dim %kernel, %c0 : memref<?xf32>
%input_size = memref.dim %input, %c0 : memref<?xTYPE_PLACEHOLDER>
%kernel_size = memref.dim %kernel, %c0 : memref<?xTYPE_PLACEHOLDER>

// 2. Set the iteration step (vector size).
%vl_step = arith.constant 16 : index
Expand All @@ -40,30 +41,30 @@ func.func @fir_vectorization(%input : memref<?xf32>, %kernel : memref<?xf32>,
// 4. Loop through each kernel element
scf.for %n = %c0 to %kernel_size step %c1
iter_args(%upbound = %upbound_init) -> (index) {
%k_elem = memref.load %kernel[%n] : memref<?xf32>
%k_vec = vector.splat %k_elem : vector<16xf32>
%k_elem = memref.load %kernel[%n] : memref<?xTYPE_PLACEHOLDER>
%k_vec = vector.splat %k_elem : vector<16xTYPE_PLACEHOLDER>

// 5. Perform the vectorization body.
%iter_idx = scf.for %i = %c0 to %upbound step %vl_step
iter_args(%iter_init = %c0) -> (index) {
%in_vec = vector.load %input[%i] : memref<?xf32>, vector<16xf32>
%in_vec = vector.load %input[%i] : memref<?xTYPE_PLACEHOLDER>, vector<16xTYPE_PLACEHOLDER>
%out_index = arith.addi %i, %n : index
%out_vec = vector.load %output[%out_index] : memref<?xf32>, vector<16xf32>
%fma_vec = vector.fma %k_vec, %in_vec, %out_vec : vector<16xf32>
vector.store %fma_vec, %output[%out_index] : memref<?xf32>, vector<16xf32>
%out_vec = vector.load %output[%out_index] : memref<?xTYPE_PLACEHOLDER>, vector<16xTYPE_PLACEHOLDER>
%fma_vec = vector.fma %k_vec, %in_vec, %out_vec : vector<16xTYPE_PLACEHOLDER>
vector.store %fma_vec, %output[%out_index] : memref<?xTYPE_PLACEHOLDER>, vector<16xTYPE_PLACEHOLDER>
%i_next = arith.addi %i, %vl_step : index
scf.yield %i_next : index
}

// 6. Process the remainder of the elements with scalar operations.
%upbound_scalar = arith.addi %upbound, %vl_step_minus_1 : index
scf.for %i = %iter_idx to %upbound_scalar step %c1 {
%in_elem = memref.load %input[%i] : memref<?xf32>
%in_elem = memref.load %input[%i] : memref<?xTYPE_PLACEHOLDER>
%out_index = arith.addi %i, %n : index
%out_elem = memref.load %output[%out_index] : memref<?xf32>
%mul_elem = arith.mulf %in_elem, %k_elem : f32
%add_elem = arith.addf %mul_elem, %out_elem : f32
memref.store %add_elem, %output[%out_index] : memref<?xf32>
%out_elem = memref.load %output[%out_index] : memref<?xTYPE_PLACEHOLDER>
%mul_elem = arith.mulf %in_elem, %k_elem : TYPE_PLACEHOLDER
%add_elem = arith.addf %mul_elem, %out_elem : TYPE_PLACEHOLDER
memref.store %add_elem, %output[%out_index] : memref<?xTYPE_PLACEHOLDER>
}

%upbound_next = arith.subi %upbound, %c1 : index
Expand Down
Loading