Skip to content

Commit

Permalink
[ControlOverlay] Introduce dma_placeholder to preserve connection
Browse files Browse the repository at this point in the history
… ops from Dead-Code Elimination (#1111)

Following the discussion in #1063, `connection` ops generated for the
control overlay do not initially have DMA users, as these are only added
later when the content of control packets is determined. However, since
`connection` ops are marked as pure in the IR, they can be wrongly
eliminated by CSE and Canonicalization if they have no users at that
stage.
  • Loading branch information
Yu-Zhewen authored Feb 16, 2025
1 parent 54b89c2 commit fd4db47
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 17 deletions.
17 changes: 17 additions & 0 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/IR/AMDAIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,23 @@ def AMDAIE_NpuDmaCpyNdOp: AMDAIE_Op<"npu.dma_cpy_nd", [
let hasCanonicalizer = 1;
}

def AMDAIE_NpuDmaPlaceHolderOp : AMDAIE_Op<"npu.dma_placeholder"> {
let summary = "Represents a placeholder for a DMA operation.";
let description = [{
This operation acts as a placeholder user for `amdaie.connection` operations to prevent
them from being dead-code eliminated. This is used for control flow connections that are
inserted before control packets are generated because they need to be taken into account
together with data connections for routing. This operation does not have any side effects
on control code size.
}];

let arguments = (
ins Index:$connection
);

let assemblyFormat = [{ `(` $connection `)` attr-dict }];
}

def AMDAIE_NpuHalfDmaCpyNdOp
: AMDAIE_Op<"npu.half_dma_cpy_nd", [AttrSizedOperandSegments, OffsetSizeAndStrideOpInterface]> {
let summary = "The NPU uController's DMA operation, operating on a single port";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
if (deviceModel.isShimNOCTile(col, row)) columnToShimTile[col] = tileOp;
});

AMDAIE::ControlCodeOp controlCodeOp = workgroupOp.getControlCode();
rewriter.setInsertionPoint(controlCodeOp);
// If the column is occupied, but the shim tile op is not present, then create
// one.
rewriter.setInsertionPoint(workgroupOp.getControlCode());
for (uint32_t col : occupiedCols) {
if (!columnToShimTile.count(col)) {
auto colIndex = rewriter.create<arith::ConstantIndexOp>(
Expand Down Expand Up @@ -104,6 +105,8 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
shimTileOp.emitOpError() << "no producer DMA channel available";
return WalkResult::interrupt();
}

rewriter.setInsertionPoint(controlCodeOp);
auto sourceChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), shimTileOp, maybeChannel.value(),
StrmSwPortType::DMA, AMDAIE::DMAChannelDir::MM2S);
Expand All @@ -123,13 +126,17 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(tileOp));

rewriter.create<AMDAIE::ConnectionOp>(
auto connectionOp = rewriter.create<AMDAIE::ConnectionOp>(
rewriter.getUnknownLoc(), targetPlaceholder,
ValueRange{targetChannelOp}, sourcePlaceholder,
ValueRange{sourceChannelOp},
ConnectionTypeAttr::get(rewriter.getContext(),
ConnectionType::Packet),
/*flow=*/nullptr);

rewriter.setInsertionPoint(controlCodeOp.getBody()->getTerminator());
rewriter.create<AMDAIE::NpuDmaPlaceHolderOp>(rewriter.getUnknownLoc(),
connectionOp.getResult());
return WalkResult::advance();
});
if (res.wasInterrupted()) return failure();
Expand All @@ -139,6 +146,7 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
// for sending Task Completion Tokens (TCTs).
if (routeShimCtrlToTct) {
for (auto [_, shimTileOp] : columnToShimTile) {
rewriter.setInsertionPoint(controlCodeOp);
auto sourceChannelOp = rewriter.create<AMDAIE::ChannelOp>(
rewriter.getUnknownLoc(), shimTileOp, 0, StrmSwPortType::CTRL,
AMDAIE::DMAChannelDir::MM2S);
Expand All @@ -160,13 +168,17 @@ LogicalResult generateControlOverlay(AMDAIE::WorkgroupOp workgroupOp,
rewriter.getUnknownLoc(), LogicalObjectFifoType::get(elementType),
ValueRange(shimTileOp));

rewriter.create<AMDAIE::ConnectionOp>(
auto connectionOp = rewriter.create<AMDAIE::ConnectionOp>(
rewriter.getUnknownLoc(), targetPlaceholder,
ValueRange{targetChannelOp}, sourcePlaceholder,
ValueRange{sourceChannelOp},
ConnectionTypeAttr::get(rewriter.getContext(),
ConnectionType::Circuit),
/*flow=*/nullptr);

rewriter.setInsertionPoint(controlCodeOp.getBody()->getTerminator());
rewriter.create<AMDAIE::NpuDmaPlaceHolderOp>(rewriter.getUnknownLoc(),
connectionOp.getResult());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -565,9 +565,9 @@ LogicalResult AIEDeviceBuilder::bufferToAIE(AMDAIE::BufferOp bufferOp,
return success();
}

/// Convert the `amdaie.connection` operation into `aie.flow` ops and DMA
/// operations. Depending on the location of the source/target of the
/// connection, different DMA ops are created:
/// Convert the `amdaie.connection` operation into DMA operations. Depending on
/// the location of the source/target of the connection, different DMA ops are
/// created:
/// 1. Source/target on a Shim tile: iterate through producer/consumer channels
/// and create corresponding `aie.shim_dma_allocation` ops.
/// 2. Source/target on MemTile: iterate through producer/consumer channels,
Expand Down Expand Up @@ -601,8 +601,17 @@ LogicalResult AIEDeviceBuilder::connectionToAIE(
}

std::optional<AMDAIE::FlowOp> maybeFlowOp = connectionOp.getFlowOp();
std::optional<uint8_t> packetId =
maybeFlowOp ? maybeFlowOp->getPacketId() : std::nullopt;
if (!maybeFlowOp) return connectionOp.emitOpError() << "has no flow op";

FailureOr<bool> isCtrlFlow = maybeFlowOp->isControlFlow();
if (failed(isCtrlFlow)) {
return connectionOp.emitOpError()
<< "could not determine if flow is control";
}
// No DMA op needed for control flow.
if (isCtrlFlow.value()) return success();

std::optional<uint8_t> packetId = maybeFlowOp->getPacketId();

FailureOr<AMDAIE::NpuCircularDmaCpyNdOp> maybeNpuDmaUserOp =
connectionOp.getNpuCircularDmaCpyNdUser();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -839,6 +839,8 @@ void addAMDAIEObjectFifoLoweringPasses(
passManager.addPass(createAMDAIEDmaCSEPass());

passManager.addPass(createAMDAIEGenerateControlOverlayPass());
passManager.addPass(createCSEPass());
passManager.addPass(createCanonicalizerPass());

passManager.addPass(createAMDAIEAssignChannelsPass());
passManager.addPass(createCSEPass());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-generate-control-overlay{route-shim-to-tct=true route-shim-to-tile-ctrl=true}))" --split-input-file --verify-diagnostics %s | FileCheck %s
// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-amdaie-generate-control-overlay{route-shim-to-tct=true route-shim-to-tile-ctrl=true}, canonicalize, cse))" --split-input-file --verify-diagnostics %s | FileCheck %s

// Device attribute is required for route-shim-to-tile-ctrl.
module {
Expand Down Expand Up @@ -65,21 +65,20 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb}
// CHECK: %[[CHANNEL_2:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_3:.*]] = amdaie.channel(%[[TILE_0_1]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_1:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_3]]}, %{{.+}} {%[[CHANNEL_2]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_4:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = MM2S)
// CHECK: %[[CHANNEL_5:.*]] = amdaie.channel(%[[TILE_0_2]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_2:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_5]]}, %{{.+}} {%[[CHANNEL_4]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_6:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CONNECT_2:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_5]]}, %{{.+}} {%[[CHANNEL_0]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_7:.*]] = amdaie.channel(%[[TILE_0_3]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_3:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_7]]}, %{{.+}} {%[[CHANNEL_6]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_8:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = DMA, direction = MM2S)
// CHECK: %[[CONNECT_3:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_7]]}, %{{.+}} {%[[CHANNEL_2]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_9:.*]] = amdaie.channel(%[[TILE_0_4]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_4:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_9]]}, %{{.+}} {%[[CHANNEL_8]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_10:.*]] = amdaie.channel(%[[TILE_0_0]], 1, port_type = DMA, direction = MM2S)
// CHECK: %[[CONNECT_4:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_9]]}, %{{.+}} {%[[CHANNEL_0]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_11:.*]] = amdaie.channel(%[[TILE_0_5]], 0, port_type = CTRL, direction = S2MM)
// CHECK: %[[CONNECT_5:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_11]]}, %{{.+}} {%[[CHANNEL_10]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CONNECT_5:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_11]]}, %{{.+}} {%[[CHANNEL_2]]}) {connection_type = #amdaie<connection_type Packet>}
// CHECK: %[[CHANNEL_12:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = CTRL, direction = MM2S)
// CHECK: %[[CHANNEL_13:.*]] = amdaie.channel(%[[TILE_0_0]], 0, port_type = SOUTH, direction = S2MM)
// CHECK: %[[CONNECT_6:.*]] = amdaie.connection(%{{.+}} {%[[CHANNEL_13]]}, %{{.+}} {%[[CHANNEL_12]]}) {connection_type = #amdaie<connection_type Circuit>}
// CHECK: amdaie.controlcode {
// CHECK-COUNT-6:amdaie.npu.dma_placeholder
// CHECK: amdaie.end
#executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}>
module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} {
func.func @column_control_overlay() {
Expand Down

0 comments on commit fd4db47

Please sign in to comment.