Skip to content

Commit

Permalink
[CtrlPkt] Generate connection operations in the overlay (#1063)
Browse files Browse the repository at this point in the history
Instead of generating `flow` operations, we now directly insert
`connection` operations into the overlay. The motivation behind this
change is that these `connection` operations will be useful for
generating control packet DMAs in subsequent steps.
  • Loading branch information
Yu-Zhewen authored Jan 28, 2025
1 parent 9f5d355 commit 7a059cd
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 52 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ void AMDAIEDialect::initializeAMDAIETypes() {
LogicalResult LogicalObjectFifoType::verify(
llvm::function_ref<mlir::InFlightDiagnostic()> emitError,
mlir::MemRefType elementType, unsigned depth) {
if (llvm::any_of(elementType.getShape(), [](auto dimSize) {
return ShapedType::isDynamic(dimSize);
})) {
return emitError() << "should encapsulate static memref";
}
if (depth < 1 || depth > 4) return emitError() << "depth should be in [1, 4]";
return success();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ func.func @logicalobjectfifo_tensor(!amdaie.logicalobjectfifo<tensor<8x16xi32>>)

// -----

// expected-error @+1 {{should encapsulate static memref}}
func.func @logicalobjectfifo_dynamic(!amdaie.logicalobjectfifo<memref<?x8x16xi32>>)

// -----

func.func @circular_dma_cpy_nd_invalid_src_offsets() {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace mlir::iree_compiler::AMDAIE {
namespace {

/// Initializes the channel generators for the shim tiles, excluding any
/// channels that are already in use by existing circuit flows.
/// channels that are already in use by existing circuit-mode connections.
LogicalResult initializeChannelsGenerators(
AMDAIE::WorkgroupOp workgroupOp, const AMDAIEDeviceModel &deviceModel,
const DenseSet<TileOp> &shimTileOps,
Expand All @@ -29,11 +29,15 @@ LogicalResult initializeChannelsGenerators(
shimTileToGeneratorMap[shimTileOp.getResult()] =
ChannelGenerator(numShimDmaChannels, numShimDmaChannels);
});
// Exclude those channels that are already used by a circuit flow.
workgroupOp->walk([&](AMDAIE::FlowOp flowOp) {
if (flowOp.getIsPacketFlow()) return WalkResult::advance();
// Exclude those channels that are already used by a circuit-mode connection.
workgroupOp->walk([&](AMDAIE::ConnectionOp connectionOp) {
std::optional<AMDAIE::ConnectionType> connectionType =
connectionOp.getConnectionType();
bool isPacketFlow = connectionType && connectionType.value() ==
AMDAIE::ConnectionType::Packet;
if (isPacketFlow) return WalkResult::advance();
SmallVector<AMDAIE::ChannelOp> sourceChannels;
for (Value source : flowOp.getSources()) {
for (Value source : connectionOp.getSourceChannels()) {
if (auto channelOp =
dyn_cast<AMDAIE::ChannelOp>(source.getDefiningOp())) {
sourceChannels.push_back(channelOp);
Expand Down Expand Up @@ -97,8 +101,8 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
}
}

// Create a packet flow from the shim DMA to the tile CTRL, for sending
// control packets.
// Create a packet-mode connection from the shim DMA to the tile CTRL, for
// sending control packets.
if (routeShimToTileCtrl) {
DenseMap<Value, ChannelGenerator> shimTileToGeneratorMap;
DenseSet<TileOp> shimTileOps;
Expand All @@ -111,43 +115,77 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
uint32_t col = getConstantIndexOrAssert(tileOp.getCol());
TileOp shimTileOp = columnToShimTile[col];
// Get the available channel, but do not assign it. Allow it to be
// shared across multiple packet flows as needed.
// shared across multiple packet-mode connections as needed.
std::optional<uint8_t> maybeChannel =
shimTileToGeneratorMap[shimTileOp.getResult()]
.getProducerDMAChannel();
if (!maybeChannel) {
shimTileOp.emitOpError() << "no producer DMA channel available";
return WalkResult::interrupt();
}
auto shimDmaChannelOp = rewriter.create<AMDAIE::ChannelOp>(
auto sourceChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), shimTileOp, maybeChannel.value(),
StrmSwPortType::DMA, AMDAIE::DMAChannelDir::MM2S);
auto tileCtrlChannelOp = rewriter.create<AMDAIE::ChannelOp>(
auto targetChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), tileOp, 0, StrmSwPortType::CTRL,
AMDAIE::DMAChannelDir::S2MM);
rewriter.create<AMDAIE::FlowOp>(
rewriter.getUnknownLoc(), ValueRange{shimDmaChannelOp},
ValueRange{tileCtrlChannelOp},
/*isPacketFlow*/ true, /*packetId*/ nullptr);

// Get the objectfifo placeholder for both the source and target.
MemRefType elementType =
MemRefType::get(ShapedType::kDynamic, rewriter.getI32Type());
auto sourcePlaceholder =
rewriter.create<AMDAIE::LogicalObjectFifoPlaceholderOp>(
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(shimTileOp));
auto targetPlaceholder =
rewriter.create<AMDAIE::LogicalObjectFifoPlaceholderOp>(
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(tileOp));

rewriter.create<AMDAIE::ConnectionOp>(
rewriter.getUnknownLoc(), targetPlaceholder,
ValueRange{targetChannelOp}, sourcePlaceholder,
ValueRange{sourceChannelOp},
ConnectionTypeAttr::get(rewriter.getContext(),
ConnectionType::Packet),
/*flow=*/nullptr);
return WalkResult::advance();
});
if (res.wasInterrupted()) return failure();
}

// Create a circuit flow from the shim CTRL to the shim SOUTH 0, for sending
// Task Completion Tokens (TCTs).
// Create a circuit-mode connection from the shim CTRL to the shim SOUTH 0,
// for sending Task Completion Tokens (TCTs).
if (routeShimCtrlToTct) {
for (auto [_, shimTileOp] : columnToShimTile) {
auto shimCtrlChannelOp = rewriter.create<AMDAIE::ChannelOp>(
auto sourceChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), shimTileOp, 0, StrmSwPortType::CTRL,
AMDAIE::DMAChannelDir::MM2S);
auto shimSouthChannelOp = rewriter.create<AMDAIE::ChannelOp>(
auto targetChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), shimTileOp, 0, StrmSwPortType::SOUTH,
AMDAIE::DMAChannelDir::S2MM);
rewriter.create<AMDAIE::FlowOp>(
rewriter.getUnknownLoc(), ValueRange{shimCtrlChannelOp},
ValueRange{shimSouthChannelOp},
/*isPacketFlow*/ false, /*packetId*/ nullptr);

// Get the objectfifo placeholder for both the source and target.
// Set the shape to dynamic because the size of the control packet
// sequence is unknown and may vary based on the reconfiguration content.
MemRefType elementType =
MemRefType::get(ShapedType::kDynamic, rewriter.getI32Type());
auto sourcePlaceholder =
rewriter.create<AMDAIE::LogicalObjectFifoPlaceholderOp>(
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(shimTileOp));
auto targetPlaceholder =
rewriter.create<AMDAIE::LogicalObjectFifoPlaceholderOp>(
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(shimTileOp));

rewriter.create<AMDAIE::ConnectionOp>(
rewriter.getUnknownLoc(), targetPlaceholder,
ValueRange{targetChannelOp}, sourcePlaceholder,
ValueRange{sourceChannelOp},
ConnectionTypeAttr::get(rewriter.getContext(),
ConnectionType::Circuit),
/*flow=*/nullptr);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -884,12 +884,10 @@ void addAMDAIEObjectFifoLoweringPasses(

passManager.addPass(createAMDAIEObjFifoBufferizationPass());
passManager.addPass(createAMDAIETemporaryAllocBufferizationPass());
passManager.addPass(createAMDAIEConnectionToFlowPass());

passManager.addPass(createAMDAIEGenerateControlOverlayPass());
passManager.addPass(createCSEPass());
passManager.addPass(createCanonicalizerPass());

passManager.addPass(createAMDAIEConnectionToFlowPass());
passManager.addPass(createAMDAIEAssignPacketIdsPass());

passManager.addPass(createAMDAIENpuDmaToHalfDmaCpyNdPass());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-generate-control-overlay{route-shim-to-tct=true route-shim-to-tile-ctrl=true},canonicalize,cse))" --split-input-file --verify-diagnostics %s | FileCheck %s
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-generate-control-overlay{route-shim-to-tct=true route-shim-to-tile-ctrl=true}))" --split-input-file --verify-diagnostics %s | FileCheck %s

// Device attribute is required for route-shim-to-tile-ctrl.
module {
Expand All @@ -16,7 +16,7 @@ module {
// -----

// Shim tile (0, 0) has two producer (MM2S) channels,
// both of which are already utilized by existing circuit flows.
// both of which are already utilized by existing circuit-mode connections.
// No producer DMA channel is available for route-shim-to-tile-ctrl.
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
Expand All @@ -27,12 +27,16 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// expected-error @+1 {{no producer DMA channel available}}
%tile_0_0 = amdaie.tile(%c0, %c0)
%tile_0_1 = amdaie.tile(%c0, %c1)
%0 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo<memref<32xi32>>
%1 = amdaie.logicalobjectfifo.placeholder{%tile_0_1} : !amdaie.logicalobjectfifo<memref<32xi32>>
%2 = amdaie.logicalobjectfifo.placeholder{%tile_0_0} : !amdaie.logicalobjectfifo<memref<32xi32>>
%3 = amdaie.logicalobjectfifo.placeholder{%tile_0_1} : !amdaie.logicalobjectfifo<memref<32xi32>>
%channel_0 = amdaie.channel(%tile_0_0, 0, port_type = DMA, direction = MM2S)
%channel_1 = amdaie.channel(%tile_0_1, 0, port_type = DMA, direction = S2MM)
%flow_0 = amdaie.flow({%channel_0} -> {%channel_1}) {is_packet_flow = false}
%connection_0 = amdaie.connection(%1 {%channel_1}, %0 {%channel_0}) : (!amdaie.logicalobjectfifo<memref<32xi32>>, !amdaie.logicalobjectfifo<memref<32xi32>>)
%channel_2 = amdaie.channel(%tile_0_0, 1, port_type = DMA, direction = MM2S)
%channel_3 = amdaie.channel(%tile_0_1, 1, port_type = DMA, direction = S2MM)
%flow_1 = amdaie.flow({%channel_2} -> {%channel_3}) {is_packet_flow = false}
%connection_1 = amdaie.connection(%3 {%channel_3}, %2 {%channel_2}) : (!amdaie.logicalobjectfifo<memref<32xi32>>, !amdaie.logicalobjectfifo<memref<32xi32>>)
amdaie.controlcode {
amdaie.end
}
Expand Down Expand Up @@ -62,21 +66,25 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// CHECK: %[[TILE_0_5:.*]] = amdaie.tile(%[[C0]], %[[C5]])
// CHECK: %[[CHANNEL_0:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_1:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_0:.*]] = amdaie.flow({%[[CHANNEL_0]]} -> {%[[CHANNEL_1]]}) {is_packet_flow = true}
// CHECK: %[[CONNECT_0:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_1]]}, %{{.+}} {%[[CHANNEL_0]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_2:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_3:.*]] = amdaie.channel(%[[TILE_0_1]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_1:.*]] = amdaie.flow({%[[CHANNEL_2]]} -> {%[[CHANNEL_3]]}) {is_packet_flow = true}
// CHECK: %[[CHANNEL_4:.*]] = amdaie.channel(%[[TILE_0_2]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_2:.*]] = amdaie.flow({%[[CHANNEL_0]]} -> {%[[CHANNEL_4]]}) {is_packet_flow = true}
// CHECK: %[[CHANNEL_5:.*]] = amdaie.channel(%[[TILE_0_3]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_3:.*]] = amdaie.flow({%[[CHANNEL_2]]} -> {%[[CHANNEL_5]]}) {is_packet_flow = true}
// CHECK: %[[CHANNEL_6:.*]] = amdaie.channel(%[[TILE_0_4]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_4:.*]] = amdaie.flow({%[[CHANNEL_0]]} -> {%[[CHANNEL_6]]}) {is_packet_flow = true}
// CHECK: %[[CHANNEL_7:.*]] = amdaie.channel(%[[TILE_0_5]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[FLOW_5:.*]] = amdaie.flow({%[[CHANNEL_2]]} -> {%[[CHANNEL_7]]}) {is_packet_flow = true}
// CHECK: %[[CHANNEL_8:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = CTRL, direction = MM2S)
// CHECK: %[[CHANNEL_9:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = SOUTH, direction = S2MM)
// CHECK: %[[FLOW_6:.*]] = amdaie.flow({%[[CHANNEL_8]]} -> {%[[CHANNEL_9]]}) {is_packet_flow = false}
// CHECK: %[[CONNECT_1:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_3]]}, %{{.+}} {%[[CHANNEL_2]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_4:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_5:.*]] = amdaie.channel(%[[TILE_0_2]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_2:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_5]]}, %{{.+}} {%[[CHANNEL_4]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_6:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_7:.*]] = amdaie.channel(%[[TILE_0_3]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_3:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_7]]}, %{{.+}} {%[[CHANNEL_6]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_8:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_9:.*]] = amdaie.channel(%[[TILE_0_4]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_4:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_9]]}, %{{.+}} {%[[CHANNEL_8]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_10:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_11:.*]] = amdaie.channel(%[[TILE_0_5]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_5:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_11]]}, %{{.+}} {%[[CHANNEL_10]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_12:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = CTRL, direction = MM2S)
// CHECK: %[[CHANNEL_13:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = SOUTH, direction = S2MM)
// CHECK: %[[CONNECT_6:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_13]]}, %{{.+}} {%[[CHANNEL_12]]}) {connection_type = #amdaie<connection_type Circuit>}
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @column_control_overlay() {
Expand Down

0 comments on commit 7a059cd

Please sign in to comment.