Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LinalgFunctionOutlining] Add none, all and balanced outlining strategies #1062

Merged
merged 1 commit into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 20 additions & 20 deletions build_tools/ci/cpu_comparison/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,9 @@ def _execute(self, config):
"""
Currently without function outlining, we run out of program memory.
"""
self.add_aie_compilation_flags(["--iree-amdaie-enable-function-outlining"])
self.add_aie_compilation_flags(
["--iree-amdaie-enable-function-outlining=balanced"]
)
aie_vs_baseline(
config=config,
aie_compilation_flags=self.aie_compilation_flags,
Expand Down Expand Up @@ -1822,7 +1824,7 @@ def __init__(self):
"K": 4096,
"use_ukernel": False,
"peano_opt_level": 2,
"outline": False,
"outline": "none",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1833,7 +1835,7 @@ def __init__(self):
"K": 4096,
"use_ukernel": False,
"peano_opt_level": 2,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1844,7 +1846,7 @@ def __init__(self):
"K": 4096,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": False,
"outline": "none",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1855,7 +1857,7 @@ def __init__(self):
"K": 4096,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1866,7 +1868,7 @@ def __init__(self):
"K": 4096,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1877,7 +1879,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1888,7 +1890,7 @@ def __init__(self):
"K": 512,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1899,7 +1901,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": True,
"tile_pipeline": "pack-peel",
Expand All @@ -1910,7 +1912,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1921,7 +1923,7 @@ def __init__(self):
"K": 512,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1932,7 +1934,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": True,
"transpose_b": False,
"tile_pipeline": "pack-peel",
Expand All @@ -1946,7 +1948,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"outline_to_empty_function": True,
"transpose_a": False,
"transpose_b": False,
Expand All @@ -1959,7 +1961,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel-4-level-tiling",
Expand All @@ -1970,7 +1972,7 @@ def __init__(self):
"K": 512,
"use_ukernel": True,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"transpose_a": False,
"transpose_b": False,
"tile_pipeline": "pack-peel-4-level-tiling",
Expand All @@ -1984,7 +1986,7 @@ def __init__(self):
"K": 512,
"use_ukernel": False,
"peano_opt_level": 3,
"outline": True,
"outline": "balanced",
"outline_to_empty_function": True,
"transpose_a": False,
"transpose_b": False,
Expand All @@ -2005,9 +2007,7 @@ def __init__(self):
transpose_b = test["transpose_b"]
tile_pipeline = test["tile_pipeline"]

outlining_string = "--iree-amdaie-enable-function-outlining=" + str(
int(outline)
)
outlining_string = "--iree-amdaie-enable-function-outlining=" + outline

peano_opt_level_string = f'"-O{peano_opt_level}"'
aie_compilation_flags = [
Expand All @@ -2026,7 +2026,7 @@ def __init__(self):
)

name_suffix = "O" + str(peano_opt_level)
if outline:
if outline != "none":
if outline_to_empty_function:
name_suffix += "_outline_empty"
else:
Expand Down
14 changes: 11 additions & 3 deletions compiler/plugins/target/AMD-AIE/iree-amd-aie/Target/AIETarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct AMDAIEOptions {
bool enableVectorizationPasses{true};
bool enableCoalescingLoops{false};
bool enableCollapsingUnitDims{false};
bool enableFunctionOutlining{true};
OutliningStrategy enableFunctionOutlining{OutliningStrategy::Balanced};
bool replaceOutlinedFunctionsWithEmpty{false};
bool insertLoopAroundCoreBlock{false};
bool matmulElementwiseFusion{false};
Expand Down Expand Up @@ -197,11 +197,19 @@ struct AMDAIEOptions {
"unit dims of a tensor/memref depending on this pass flag. It is "
"intended for development purposes only."));

binder.opt<bool>(
binder.opt<OutliningStrategy>(
"iree-amdaie-enable-function-outlining", enableFunctionOutlining,
llvm::cl::cat(category),
llvm::cl::desc("Flag to enable/disable linalg-function-outlining pass."
"It is intended for development purposes only."));
"It is intended for development purposes only."),
llvm::cl::values(clEnumValN(OutliningStrategy::None, "none",
"No linalg ops will be outlined."),
clEnumValN(OutliningStrategy::All, "all",
"All linalg ops will be outlined."),
clEnumValN(OutliningStrategy::Balanced, "balanced",
"Will outline some ops, to try to achieve "
"a good balance between "
"performance and program size.")));

binder.opt<bool>(
"iree-amdaie-replace-outlined-functions-with-empty",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ static FailureOr<func::FuncOp> outline(IRRewriter &rewriter, ModuleOp moduleOp,
// clang-format off
// https://github.com/llvm/llvm-project/blob/6b0785390d02193d81d8db7fb12279ffa4651afe/mlir/include/mlir/IR/BuiltinAttributeInterfaces.td#L475
// clang-format on
auto type = dyn_cast<MemRefType>(operand.getType());
assert(type && "we've already checked that all operands are memrefs");
MemRefLayoutAttrInterface layout = type.getLayout();
assert(layout &&
"MemRefType layout attribute interface should always be present");
if (!layout.isIdentity()) return failure();
if (auto type = dyn_cast<MemRefType>(operand.getType())) {
MemRefLayoutAttrInterface layout = type.getLayout();
assert(layout &&
"MemRefType layout attribute interface should always be present");
if (!layout.isIdentity()) return failure();
}
}
auto funcType = FunctionType::get(
rewriter.getContext(), computeOp->getOperandTypes(), /*outputTypes=*/{});
Expand Down Expand Up @@ -72,8 +72,9 @@ static FailureOr<func::FuncOp> outline(IRRewriter &rewriter, ModuleOp moduleOp,
return func;
}

/// Utility to check if the linalg op is one we know should be outlined.
static bool mustOutline(linalg::LinalgOp linalgOp) {
/// Utility to check whether the linalg should be outlined if the balanced
/// strategy is enabled.
static bool mustOutlineBalanced(linalg::LinalgOp linalgOp) {
if (isa<linalg::CopyOp, linalg::FillOp>(linalgOp)) return false;
if (isElementwise(linalgOp)) return false;
// TODO(newling) not all remaining ops should be outlined, not even all
Expand Down Expand Up @@ -158,16 +159,22 @@ void AMDAIELinalgFunctionOutliningPass::runOnOperation() {
MLIRContext *context = &getContext();
IRRewriter rewriter(context);

if (outliningStrategy == OutliningStrategy::None) {
if (emptyFunctions) {
moduleOp.emitWarning()
<< "The option to empty outlined functions is enabled while the "
"outlining strategy specifies to not outline any functions, so no "
"transformation will happen. This combination might not result in "
"the intended behaviour.";
}
return;
}

SmallVector<Operation *> toBeErased;
moduleOp.walk([&](linalg::LinalgOp computeOp) {
if (!mustOutline(computeOp)) return WalkResult::skip();

// Assert that we're in reference semantics, ie that all operands of
// computeOp have MemRefType:
if (!llvm::all_of(computeOp->getOperandTypes(),
[](Type t) { return isa<MemRefType>(t); })) {
computeOp->emitError("expected all operands to be of MemRefType");
return WalkResult::interrupt();
if (outliningStrategy == OutliningStrategy::Balanced &&
!mustOutlineBalanced(computeOp)) {
return WalkResult::skip();
}

FailureOr<func::FuncOp> maybeFunc =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,16 @@ enum class BufferizeOperand {
/// Enum for hardware mapping attributes.
enum class HardwareMapping { Core, Block, None };

enum class OutliningStrategy {
// No outlining.
None,
// Try outlining all ops.
All,
// A balanced strategy trying to achieve good performance and low program
// memory size.
Balanced,
};

LogicalResult initAIELaunchConfig(FunctionOpInterface funcOp,
TilePassPipeline useTilePipeline,
LowerToAIEPassPipeline useLowerToAIEPipeline,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@ void buildAMDAIETransformPassPipeline(
LowerToAIEPassPipeline useLowerToAIEPipeline, bool matmulElementwiseFusion,
bool enableVectorizationPasses, const std::string &pathToUkernels,
bool enablePacketFlow, bool enableCoalescingLoops,
bool enableCollapsingUnitDims, bool enableFunctionOutlining,
bool enableCollapsingUnitDims, OutliningStrategy enableFunctionOutlining,
bool replaceOutlinedFunctionsWithEmpty, bool insertLoopAroundCoreBlock) {
OpPassManager &modulePassManager = variantPassManager.nest<ModuleOp>();
{
Expand Down Expand Up @@ -767,8 +767,9 @@ void addAMDAIEObjectFifoLoweringPasses(
OpPassManager &passManager, bool enablePacketFlow,
TilePassPipeline useTilePipeline, bool enableVectorizationPasses,
bool enableCoalescingLoops, bool enableCollapsingUnitDims,
bool enableFunctionOutlining, bool replaceOutlinedFunctionsWithEmpty,
bool insertLoopAroundCoreBlock, uint32_t numCols) {
OutliningStrategy enableFunctionOutlining,
bool replaceOutlinedFunctionsWithEmpty, bool insertLoopAroundCoreBlock,
uint32_t numCols) {
passManager.addPass(createEraseHALDescriptorTypeFromMemRefPass());
passManager.addPass(memref::createFoldMemRefAliasOpsPass());

Expand All @@ -794,18 +795,11 @@ void addAMDAIEObjectFifoLoweringPasses(
passManager.addPass(createAMDAIENormalizeLoopBoundsPass());
passManager.addPass(createAMDAIEInsertCoresPass());

if (enableFunctionOutlining) {
// Create function outlining options object, etc.
AMDAIELinalgFunctionOutliningOptions options;
if (replaceOutlinedFunctionsWithEmpty) {
options.emptyFunctions = true;
}
passManager.addPass(createAMDAIELinalgFunctionOutliningPass(options));
} else {
assert(!replaceOutlinedFunctionsWithEmpty &&
"`replaceOutlinedFunctionsWithEmpty` is only valid when "
"`enableFunctionOutlining` is true.");
}
// Create function outlining options object, etc.
AMDAIELinalgFunctionOutliningOptions options;
options.outliningStrategy = enableFunctionOutlining;
options.emptyFunctions = replaceOutlinedFunctionsWithEmpty;
passManager.addPass(createAMDAIELinalgFunctionOutliningPass(options));

{
// Vectorization passes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ void addAMDAIEObjectFifoLoweringPasses(
OpPassManager &passManager, bool enablePacketFlow,
TilePassPipeline useTilePipeline, bool enableVectorizationPasses,
bool enableCoalescingLoops, bool enableCollapsingUnitDims,
bool enableFunctionOutlining, bool replaceOutlinedFunctionsWithEmpty,
bool insertLoopAroundCoreBlock, uint32_t numCols);
OutliningStrategy enableFunctionOutlining,
bool replaceOutlinedFunctionsWithEmpty, bool insertLoopAroundCoreBlock,
uint32_t numCols);

/// Add passes to lower from MLIR-AIR through AIE. This is
/// currently the default passes used for lowering after IREEs tiling.
Expand All @@ -42,7 +43,7 @@ void buildAMDAIETransformPassPipeline(
LowerToAIEPassPipeline useLowerToAIEPipeline, bool matmulElementwiseFusion,
bool enableVectorizationPasses, const std::string &pathToUkernels,
bool enablePacketFlow, bool enableCoalescingLoops,
bool enableCollapsingUnitDims, bool enableFunctionOutlining,
bool enableCollapsingUnitDims, OutliningStrategy enableFunctionOutlining,
bool replaceOutlinedFunctionsWithEmpty, bool insertLoopAroundCoreBlock);

/// Populates passes needed to lower the IR via a Pack-Peel based approach.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,21 @@ def AMDAIELinalgFunctionOutlining :
"Replace all outlined functions with a function that does nothing, "
"i.e. it just returns. Useful for measuring the performance of data "
"movement to/from the device -- by doing zero compute, all time is spent "
"moving data to/from the AIE cores.">
];
"moving data to/from the AIE cores.">,
Option<"outliningStrategy", "outlining-strategy",
"mlir::iree_compiler::AMDAIE::OutliningStrategy",
/*default=*/"mlir::iree_compiler::AMDAIE::OutliningStrategy::Balanced",
"The strategy to be used for outlining. The default is balanced to "
"achieve a good tradeoff in performance and program size.",
[{::llvm::cl::values(
clEnumValN(mlir::iree_compiler::AMDAIE::OutliningStrategy::None, "none",
"No ops are outlined."),
clEnumValN(mlir::iree_compiler::AMDAIE::OutliningStrategy::All, "all",
"All ops are outlined."),
clEnumValN(mlir::iree_compiler::AMDAIE::OutliningStrategy::Balanced, "balanced",
"A strategy that tries to achieve a balanced tradeoff between performance and program size.")
)}]>,
];
}

def AMDAIEFoldDmaWaits :
Expand Down
Loading
Loading