@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
3518
3518
3519
3519
def YieldOp : fir_Op<"yield",
3520
3520
[Pure, ReturnLike, Terminator,
3521
- ParentOneOf<["LocalitySpecifierOp"]>]> {
3521
+ ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp" ]>]> {
3522
3522
let summary = "loop yield and termination operation";
3523
3523
let description = [{
3524
3524
"fir.yield" yields SSA values from a fir dialect op region and
@@ -3662,6 +3662,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
3662
3662
let hasRegionVerifier = 1;
3663
3663
}
3664
3664
3665
+ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3666
+ Symbol]> {
3667
+ let summary = "declares a reduction kind";
3668
+ let description = [{
3669
+ Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3670
+ duplication at the moment. TODO Combined both ops into one. See:
3671
+ https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3672
+
3673
+ Declares an `do concurrent` reduction. This requires two mandatory and three
3674
+ optional regions.
3675
+
3676
+ 1. The optional alloc region specifies how to allocate the thread-local
3677
+ reduction value. This region should not contain control flow and all
3678
+ IR should be suitable for inlining straight into an entry block. In
3679
+ the common case this is expected to contain only allocas. It is
3680
+ expected to `fir.yield` the allocated value on all control paths.
3681
+ If allocation is conditional (e.g. only allocate if the mold is
3682
+ allocated), this should be done in the initilizer region and this
3683
+ region not included. The alloc region is not used for by-value
3684
+ reductions (where allocation is implicit).
3685
+ 2. The initializer region specifies how to initialize the thread-local
3686
+ reduction value. This is usually the neutral element of the reduction.
3687
+ For convenience, the region has an argument that contains the value
3688
+ of the reduction accumulator at the start of the reduction. If an alloc
3689
+ region is specified, there is a second block argument containing the
3690
+ address of the allocated memory. The initializer region is expected to
3691
+ `fir.yield` the new value on all control flow paths.
3692
+ 3. The reduction region specifies how to combine two values into one, i.e.
3693
+ the reduction operator. It accepts the two values as arguments and is
3694
+ expected to `fir.yield` the combined value on all control flow paths.
3695
+ 4. The atomic reduction region is optional and specifies how two values
3696
+ can be combined atomically given local accumulator variables. It is
3697
+ expected to store the combined value in the first accumulator variable.
3698
+ 5. The cleanup region is optional and specifies how to clean up any memory
3699
+ allocated by the initializer region. The region has an argument that
3700
+ contains the value of the thread-local reduction accumulator. This will
3701
+ be executed after the reduction has completed.
3702
+
3703
+ Note that the MLIR type system does not allow for type-polymorphic
3704
+ reductions. Separate reduction declarations should be created for different
3705
+ element and accumulator types.
3706
+
3707
+ For initializer and reduction regions, the operand to `fir.yield` must
3708
+ match the parent operation's results.
3709
+ }];
3710
+
3711
+ let arguments = (ins SymbolNameAttr:$sym_name,
3712
+ TypeAttr:$type);
3713
+
3714
+ let regions = (region MaxSizedRegion<1>:$allocRegion,
3715
+ AnyRegion:$initializerRegion,
3716
+ AnyRegion:$reductionRegion,
3717
+ AnyRegion:$atomicReductionRegion,
3718
+ AnyRegion:$cleanupRegion);
3719
+
3720
+ let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3721
+ "( `alloc` $allocRegion^ )? "
3722
+ "`init` $initializerRegion "
3723
+ "`combiner` $reductionRegion "
3724
+ "( `atomic` $atomicReductionRegion^ )? "
3725
+ "( `cleanup` $cleanupRegion^ )? ";
3726
+
3727
+ let extraClassDeclaration = [{
3728
+ mlir::BlockArgument getAllocMoldArg() {
3729
+ auto ®ion = getAllocRegion();
3730
+ return region.empty() ? nullptr : region.getArgument(0);
3731
+ }
3732
+ mlir::BlockArgument getInitializerMoldArg() {
3733
+ return getInitializerRegion().getArgument(0);
3734
+ }
3735
+ mlir::BlockArgument getInitializerAllocArg() {
3736
+ return getAllocRegion().empty() ?
3737
+ nullptr : getInitializerRegion().getArgument(1);
3738
+ }
3739
+ mlir::BlockArgument getReductionLhsArg() {
3740
+ return getReductionRegion().getArgument(0);
3741
+ }
3742
+ mlir::BlockArgument getReductionRhsArg() {
3743
+ return getReductionRegion().getArgument(1);
3744
+ }
3745
+ mlir::BlockArgument getAtomicReductionLhsArg() {
3746
+ auto ®ion = getAtomicReductionRegion();
3747
+ return region.empty() ? nullptr : region.getArgument(0);
3748
+ }
3749
+ mlir::BlockArgument getAtomicReductionRhsArg() {
3750
+ auto ®ion = getAtomicReductionRegion();
3751
+ return region.empty() ? nullptr : region.getArgument(1);
3752
+ }
3753
+ mlir::BlockArgument getCleanupAllocArg() {
3754
+ auto ®ion = getCleanupRegion();
3755
+ return region.empty() ? nullptr : region.getArgument(0);
3756
+ }
3757
+ }];
3758
+
3759
+ let hasRegionVerifier = 1;
3760
+ }
3761
+
3665
3762
def fir_DoConcurrentOp : fir_Op<"do_concurrent",
3666
3763
[SingleBlock, AutomaticAllocationScope]> {
3667
3764
let summary = "do concurrent loop wrapper";
@@ -3700,6 +3797,25 @@ def fir_LocalSpecifier {
3700
3797
);
3701
3798
}
3702
3799
3800
+ def fir_ReduceSpecifier {
3801
+ dag arguments = (ins
3802
+ Variadic<AnyType>:$reduce_vars,
3803
+ OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3804
+
3805
+ // This introduces redundency in how reductions are modelled. In particular,
3806
+ // a single reduction is represented by 2 attributes:
3807
+ //
3808
+ // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3809
+ // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3810
+ //
3811
+ // The first makes it easier to map `do concurrent` to parallization models
3812
+ // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3813
+ // nests of `fir.do_loop ... unodered` ops.
3814
+ OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3815
+ OptionalAttr<ArrayAttr>:$reduce_attrs
3816
+ );
3817
+ }
3818
+
3703
3819
def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3704
3820
[AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
3705
3821
["getLoopInductionVars"]>,
@@ -3709,7 +3825,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3709
3825
let description = [{
3710
3826
An operation that models a Fortran `do concurrent` loop's header and block.
3711
3827
This is a single-region single-block terminator op that is expected to
3712
- terminate the region of a `omp .do_concurrent` wrapper op.
3828
+ terminate the region of a `fir .do_concurrent` wrapper op.
3713
3829
3714
3830
This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
3715
3831
`scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3747,8 +3863,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3747
3863
- `lowerBound`: The group of SSA values for the nest's lower bounds.
3748
3864
- `upperBound`: The group of SSA values for the nest's upper bounds.
3749
3865
- `step`: The group of SSA values for the nest's steps.
3750
- - `reduceOperands`: The reduction SSA values, if any.
3751
- - `reduceAttrs`: Attributes to store reduction operations, if any.
3752
3866
- `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
3753
3867
LLVM.
3754
3868
}];
@@ -3757,12 +3871,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3757
3871
Variadic<Index>:$lowerBound,
3758
3872
Variadic<Index>:$upperBound,
3759
3873
Variadic<Index>:$step,
3760
- Variadic<AnyType>:$reduceOperands,
3761
- OptionalAttr<ArrayAttr>:$reduceAttrs,
3762
3874
OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
3763
3875
);
3764
3876
3765
- let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3877
+ let arguments = !con(opArgs,
3878
+ fir_LocalSpecifier.arguments,
3879
+ fir_ReduceSpecifier.arguments);
3766
3880
3767
3881
let regions = (region SizedRegion<1>:$region);
3768
3882
@@ -3783,12 +3897,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
3783
3897
getNumLocalOperands());
3784
3898
}
3785
3899
3900
+ mlir::Block::BlockArgListType getRegionReduceArgs() {
3901
+ return getBody()->getArguments().slice(getNumInductionVars()
3902
+ + getNumLocalOperands(),
3903
+ getNumReduceOperands());
3904
+ }
3905
+
3786
3906
/// Number of operands controlling the loop
3787
3907
unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
3788
3908
3789
3909
// Get Number of reduction operands
3790
3910
unsigned getNumReduceOperands() {
3791
- return getReduceOperands ().size();
3911
+ return getReduceVars ().size();
3792
3912
}
3793
3913
3794
3914
mlir::Operation::operand_range getLocalOperands() {
0 commit comments