diff --git a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td index 440f7d0380eb1..7c1e94d28fbce 100644 --- a/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td +++ b/mlir/include/mlir/Dialect/NVGPU/IR/NVGPU.td @@ -423,9 +423,9 @@ def NVGPU_DeviceAsyncCopyOp : NVGPU_Op<"device_async_copy", [ ``` }]; let results = (outs NVGPU_DeviceAsyncToken:$asyncToken); - let arguments = (ins Arg]>:$dst, + let arguments = (ins Arg]>:$dst, Variadic:$dstIndices, - Arg]>:$src, + Arg]>:$src, Variadic:$srcIndices, IndexAttr:$dstElements, Optional:$srcElements, @@ -642,7 +642,7 @@ def NVGPU_TmaAsyncLoadOp : NVGPU_Op<"tma.async.load", [AttrSizedOperandSegments] The Op uses `$barrier` mbarrier based completion mechanism. }]; - let arguments = (ins Arg]>:$dst, + let arguments = (ins Arg]>:$dst, NVGPU_MBarrierGroup:$barriers, NVGPU_TensorMapDescriptor:$tensorMapDescriptor, Variadic:$coordinates, diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td b/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td index 45a9ffa94363e..bd9a28a225c15 100644 --- a/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td +++ b/mlir/include/mlir/Interfaces/SideEffectInterfaceBase.td @@ -160,7 +160,8 @@ def PartialEffect : EffectRange<0>; // This class is the general base side effect class. This is used by derived // effect interfaces to define their effects. class SideEffect + Resource resourceReference, int effectStage, EffectRange range, + bits<1> isAsync> : OpVariableDecorator { /// The name of the base effects class. string baseEffectName = interface.baseEffectName; @@ -183,6 +184,9 @@ class SideEffect - : SideEffect; + EffectRange range, bits<1> async> + : SideEffect; // This class represents the trait for memory effects that may be placed on // operations. @@ -51,7 +51,7 @@ class MemoryEffects effects = []> // not any visible mutation or dereference. class MemAlloc - : MemoryEffect<"::mlir::MemoryEffects::Allocate", resource, stage, range>; + : MemoryEffect<"::mlir::MemoryEffects::Allocate", resource, stage, range, 0>; def MemAlloc : MemAlloc; class MemAllocAt : MemAlloc; @@ -61,7 +61,7 @@ class MemAllocAt // resource, and not any visible allocation, mutation or dereference. class MemFree - : MemoryEffect<"::mlir::MemoryEffects::Free", resource, stage, range>; + : MemoryEffect<"::mlir::MemoryEffects::Free", resource, stage, range, 0>; def MemFree : MemFree; class MemFreeAt : MemFree; @@ -70,21 +70,21 @@ class MemFreeAt // resource. A 'read' effect implies only dereferencing of the resource, and // not any visible mutation. class MemRead - : MemoryEffect<"::mlir::MemoryEffects::Read", resource, stage, range>; + EffectRange range = PartialEffect, bits<1> async = 0> + : MemoryEffect<"::mlir::MemoryEffects::Read", resource, stage, range, async>; def MemRead : MemRead; -class MemReadAt - : MemRead; +class MemReadAt async = 0> + : MemRead; // The following effect indicates that the operation writes to some // resource. A 'write' effect implies only mutating a resource, and not any // visible dereference or read. class MemWrite - : MemoryEffect<"::mlir::MemoryEffects::Write", resource, stage, range>; + EffectRange range = PartialEffect, bits<1> async = 0> + : MemoryEffect<"::mlir::MemoryEffects::Write", resource, stage, range, async>; def MemWrite : MemWrite; -class MemWriteAt - : MemWrite; +class MemWriteAt async = 0> + : MemWrite; //===----------------------------------------------------------------------===// // Effect Traits diff --git a/mlir/include/mlir/TableGen/SideEffects.h b/mlir/include/mlir/TableGen/SideEffects.h index 5a9a34d4e427c..cceb03eca61fe 100644 --- a/mlir/include/mlir/TableGen/SideEffects.h +++ b/mlir/include/mlir/TableGen/SideEffects.h @@ -41,6 +41,9 @@ class SideEffect : public Operator::VariableDecorator { // Return if this side effect act on every single value of resource. bool getEffectOnfullRegion() const; + // Return if the side effect occurs after op exit. + bool getAsynchronous() const; + static bool classof(const Operator::VariableDecorator *var); }; diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp index 1adc381092bf3..b1d2f0d676874 100644 --- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp @@ -519,6 +519,11 @@ static bool haveConflictingEffects(ArrayRef beforeEffects, ArrayRef afterEffects) { for (const MemoryEffects::EffectInstance &before : beforeEffects) { + // Before may conflict with after, but since it is async, a BarrierOp cannot + // synchronize the effects. If the async field is set, it is presumed that + // some architecture-specific mechanism is needed to synchronize the effect. + if (before.getAsynchronous()) continue; + for (const MemoryEffects::EffectInstance &after : afterEffects) { // If cannot alias, definitely no conflict. if (!mayAlias(before, after)) diff --git a/mlir/lib/TableGen/SideEffects.cpp b/mlir/lib/TableGen/SideEffects.cpp index 55ad59d3d0d01..145f3adbdddd9 100644 --- a/mlir/lib/TableGen/SideEffects.cpp +++ b/mlir/lib/TableGen/SideEffects.cpp @@ -42,6 +42,10 @@ bool SideEffect::getEffectOnfullRegion() const { return def->getValueAsBit("effectOnFullRegion"); } +bool SideEffect::getAsynchronous() const { + return def->getValueAsBit("asynchronous"); +} + bool SideEffect::classof(const Operator::VariableDecorator *var) { return var->getDef().isSubClassOf("SideEffect"); } diff --git a/mlir/test/Dialect/GPU/barrier-elimination.mlir b/mlir/test/Dialect/GPU/barrier-elimination.mlir index 844dc7dd6ac00..8330a7853b118 100644 --- a/mlir/test/Dialect/GPU/barrier-elimination.mlir +++ b/mlir/test/Dialect/GPU/barrier-elimination.mlir @@ -182,3 +182,23 @@ attributes {__parallel_region_boundary_for_test} { %4 = memref.load %C[] : memref return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32 } + +// CHECK-LABEL: @async_copy +func.func @async_copy() -> () +attributes {__parallel_region_boundary_for_test} { + // CHECK: %[[A:.+]] = memref.alloc + // CHECK: %[[B:.+]] = memref.alloc + %A = memref.alloc() : memref + %B = memref.alloc() : memref> + gpu.barrier + // CHECK: %[[T:.+]] = nvgpu.device_async_copy %[[A]][], %[[B]][], 1 + %token = nvgpu.device_async_copy %A[], %B[], 1 : memref to memref> + // This needs to be erased because it can't synchronize the effects on %B. + gpu.barrier + // This does synchronize the effects on %B. + // CHECK-NEXT: nvgpu.device_async_wait %[[T]] + nvgpu.device_async_wait %token + // CHECK-NEXT: linalg.abs ins(%[[B]] : memref>) outs(%[[A]] : memref) + linalg.abs ins(%B: memref>) outs(%A: memref) + return +} diff --git a/mlir/test/lib/Dialect/Test/TestInterfaces.td b/mlir/test/lib/Dialect/Test/TestInterfaces.td index dea26b8dda62a..2fbefb59e85f9 100644 --- a/mlir/test/lib/Dialect/Test/TestInterfaces.td +++ b/mlir/test/lib/Dialect/Test/TestInterfaces.td @@ -128,7 +128,7 @@ def TestEffectOpInterface class TestEffect : SideEffect; + PartialEffect, 0>; class TestEffects effects = []> : SideEffectsTraitBase; diff --git a/mlir/test/mlir-tblgen/op-side-effects.td b/mlir/test/mlir-tblgen/op-side-effects.td index 09612db905899..cca5b1487cc9c 100644 --- a/mlir/test/mlir-tblgen/op-side-effects.td +++ b/mlir/test/mlir-tblgen/op-side-effects.td @@ -26,15 +26,15 @@ def SideEffectOpB : TEST_Op<"side_effect_op_b", // CHECK: void SideEffectOpA::getEffects // CHECK: for (::mlir::Value value : getODSOperands(0)) -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), value, 0, false, ::mlir::SideEffects::DefaultResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), value, 0, false, false, ::mlir::SideEffects::DefaultResource::get()); // CHECK: for (::mlir::Value value : getODSOperands(1)) -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), value, 1, true, ::mlir::SideEffects::DefaultResource::get()); -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), getSymbolAttr(), 0, false, ::mlir::SideEffects::DefaultResource::get()); -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), getFlatSymbolAttr(), 0, false, ::mlir::SideEffects::DefaultResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), value, 1, true, false, ::mlir::SideEffects::DefaultResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), getSymbolAttr(), 0, false, false, ::mlir::SideEffects::DefaultResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), getFlatSymbolAttr(), 0, false, false, ::mlir::SideEffects::DefaultResource::get()); // CHECK: if (auto symbolRef = getOptionalSymbolAttr()) -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), symbolRef, 0, false, ::mlir::SideEffects::DefaultResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Read::get(), symbolRef, 0, false, false, ::mlir::SideEffects::DefaultResource::get()); // CHECK: for (::mlir::Value value : getODSResults(0)) -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Allocate::get(), value, 0, false, CustomResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Allocate::get(), value, 0, false, false, CustomResource::get()); // CHECK: void SideEffectOpB::getEffects -// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), 0, false, CustomResource::get()); +// CHECK: effects.emplace_back(::mlir::MemoryEffects::Write::get(), 0, false, false, CustomResource::get()); diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 842964b853d08..71f06c292fe5c 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -3304,9 +3304,10 @@ void OpEmitter::genSideEffectInterfaceMethods() { // {1}: Optional value or symbol reference. // {2}: The side effect stage. // {3}: Does this side effect act on every single value of resource. - // {4}: The resource class. + // {4}: Is asynchronous + // {5}: The resource class. const char *addEffectCode = - " effects.emplace_back({0}::get(), {1}{2}, {3}, {4}::get());\n"; + " effects.emplace_back({0}::get(), {1}{2}, {3}, {4}, {5}::get());\n"; for (auto &it : interfaceEffects) { // Generate the 'getEffects' method. @@ -3325,10 +3326,11 @@ void OpEmitter::genSideEffectInterfaceMethods() { StringRef resource = location.effect.getResource(); int stage = (int)location.effect.getStage(); bool effectOnFullRegion = (int)location.effect.getEffectOnfullRegion(); + bool async = (int)location.effect.getAsynchronous(); if (location.kind == EffectKind::Static) { // A static instance has no attached value. body << llvm::formatv(addEffectCode, effect, "", stage, - effectOnFullRegion, resource) + effectOnFullRegion, async, resource) .str(); } else if (location.kind == EffectKind::Symbol) { // A symbol reference requires adding the proper attribute. @@ -3337,11 +3339,11 @@ void OpEmitter::genSideEffectInterfaceMethods() { if (attr->attr.isOptional()) { body << " if (auto symbolRef = " << argName << "Attr())\n " << llvm::formatv(addEffectCode, effect, "symbolRef, ", stage, - effectOnFullRegion, resource) + effectOnFullRegion, async, resource) .str(); } else { body << llvm::formatv(addEffectCode, effect, argName + "Attr(), ", - stage, effectOnFullRegion, resource) + stage, effectOnFullRegion, async, resource) .str(); } } else { @@ -3350,7 +3352,7 @@ void OpEmitter::genSideEffectInterfaceMethods() { << (location.kind == EffectKind::Operand ? "Operands" : "Results") << "(" << location.index << "))\n " << llvm::formatv(addEffectCode, effect, "value, ", stage, - effectOnFullRegion, resource) + effectOnFullRegion, async, resource) .str(); } }