Skip to content

Commit

Permalink
[AMDAIEInsertIntoCores] minor simplification (#1118)
Browse files Browse the repository at this point in the history
The arith ops are nested inside linalg ops, so they'll get hoisted into
cores without explicitly listing them here
  • Loading branch information
newling authored Feb 20, 2025
1 parent 22f6ad0 commit d5baa2c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,14 @@ namespace {

/// Utility which returns 'true' is the operation needs to be inserted with an
/// `amdaie.core` op.
/// Some ops are surrrounded by scf.for loop nests. Place the entire
/// Some ops are surrounded by scf.for loop nests. Place the entire
/// loop nest inside the amdaie.core op here. Currently look for a
/// subset of ops which we know should be in the core.
/// TODO(newling) improve this design.
static bool isCoreComputeOp(Operation *op) {
return isa<linalg::LinalgOp, vector::ContractionOp,
memref::ExtractStridedMetadataOp, func::CallOp, arith::ExtFOp,
arith::TruncFOp, arith::TruncIOp, vector::TransferReadOp,
vector::TransferWriteOp>(op);
memref::ExtractStridedMetadataOp, func::CallOp,
vector::TransferReadOp, vector::TransferWriteOp>(op);
}

/// Utility to map the parallel mapping attributes to the corresponding
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,53 +300,3 @@ module {
return
}
}

// -----

// CHECK-LABEL: @insert_truncf_within_core
// CHECK: scf.forall
// CHECK: amdaie.tile
// CHECK: amdaie.core
// CHECK: vector.transfer_read
// CHECK: arith.truncf
// CHECK: vector.transfer_write
// CHECK: amdaie.end
module {
func.func @insert_truncf_within_core(%arg0: memref<10x10xf32, 2 : i32>, %arg1: memref<10x10xbf16, 2 : i32>) {
%cst = arith.constant 0.000000e+00 : f32
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c0 = arith.constant 0 : index
scf.forall (%arg3, %arg4) in (2, 2) {
%read = vector.transfer_read %arg0[%c0, %c1], %cst {in_bounds = [true, true]} : memref<10x10xf32, 2 : i32>, vector<1x1xf32>
%truncf = arith.truncf %read : vector<1x1xf32> to vector<1x1xbf16>
vector.transfer_write %truncf, %arg1[%c0, %c1] {in_bounds = [true, true]} : vector<1x1xbf16>, memref<10x10xbf16, 2 : i32>
} {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
return
}
}

// -----

// CHECK-LABEL: @insert_trunci_within_core
// CHECK: scf.forall
// CHECK: amdaie.tile
// CHECK: amdaie.core
// CHECK: vector.transfer_read
// CHECK: arith.trunci
// CHECK: vector.transfer_write
// CHECK: amdaie.end
module {
func.func @insert_trunci_within_core(%arg0: memref<10x10xi32, 2 : i32>, %arg1: memref<10x10xi8, 2 : i32>) {
%cst = arith.constant 0 : i32
%c1 = arith.constant 1 : index
%c3 = arith.constant 3 : index
%c0 = arith.constant 0 : index
scf.forall (%arg3, %arg4) in (2, 2) {
%read = vector.transfer_read %arg0[%c0, %c1], %cst {in_bounds = [true, true]} : memref<10x10xi32, 2 : i32>, vector<1x1xi32>
%trunci = arith.trunci %read : vector<1x1xi32> to vector<1x1xi8>
vector.transfer_write %trunci, %arg1[%c0, %c1] {in_bounds = [true, true]} : vector<1x1xi8>, memref<10x10xi8, 2 : i32>
} {mapping = [#gpu.thread<y>, #gpu.thread<x>]}
return
}
}

0 comments on commit d5baa2c

Please sign in to comment.