Reshard func returns also when corresponding return operand and func result have different meshes.

Google-ML-Automation · copybara-github · commit c72dfdb74114 · 2025-03-17T09:00:55.000-07:00
Unless both are fully replicated.

PiperOrigin-RevId: 737622545
diff --git a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
@@ -681,6 +681,52 @@ bool shouldReshard(TensorShardingAttr sourceSharding,
   return sourceSharding != targetSharding;
 }
 
+void insertExplicitReshardsOnFuncReturn(Operation* op, func::FuncOp& funcOp,
+                                        IRRewriter& rewriter) {
+  rewriter.setInsertionPoint(op);
+  for (const auto& [index, opOperand] : llvm::enumerate(op->getOpOperands())) {
+    Value operand = opOperand.get();
+    TensorShardingAttr funcResultSharding =
+        getFuncResultSharding(funcOp, index);
+    TensorShardingAttr operandSharding = getSharding(operand);
+    if (shouldReshard(operandSharding, funcResultSharding)) {
+      auto reshardOp = rewriter.create<ReshardOp>(
+          operand.getLoc(), operand,
+          funcResultSharding
+              ? funcResultSharding
+              // Since it should reshard and `funcResultSharding` is empty,
+              // `operandSharding` is guaranteed to be nonempty.
+              : TensorShardingAttr::getFullyClosedLike(operandSharding));
+      opOperand.set(reshardOp);
+    }
+  }
+}
+
+void insertExplicitReshardsOnDataFlowOp(ShardableDataFlowOpInterface& op,
+                                        IRRewriter& rewriter,
+                                        StringRef meshName) {
+  for (Value owner : llvm::concat<Value>(op.getOpResultEdgeOwners(),
+                                         op.getBlockArgumentEdgeOwners())) {
+    TensorShardingAttr ownerSharding = op.transformTargetSharding(
+        owner, op.getEdgeOwnerSharding(owner),
+        DataFlowShardingTransformType::kBeforeEdgePropagation);
+    for (OpOperand* sourceOpOperand : op.getEdgeSources(owner)) {
+      Value source = sourceOpOperand->get();
+      TensorShardingAttr sourceSharding =
+          getOrCreateSharding(source, meshName, /*closedIfMissing=*/true);
+      if (shouldReshard(sourceSharding, ownerSharding)) {
+        rewriter.setInsertionPointAfterValue(source);
+        auto reshardOp = rewriter.create<ReshardOp>(
+            source.getLoc(), source,
+            ownerSharding
+                ? ownerSharding
+                : TensorShardingAttr::getFullyClosedLike(sourceSharding));
+        sourceOpOperand->set(reshardOp);
+      }
+    }
+  }
+}
+
 struct InsertExplicitReshardsPass
     : public impl::InsertExplicitReshardsPassBase<InsertExplicitReshardsPass> {
   using InsertExplicitReshardsPassBase::InsertExplicitReshardsPassBase;
@@ -689,10 +735,12 @@ struct InsertExplicitReshardsPass
     func::FuncOp funcOp = getOperation();
     IRRewriter rewriter(funcOp);
     SymbolTable symbolTable(funcOp->getParentOfType<ModuleOp>());
-    // TODO(enver): Handle data flow ops.
+
     funcOp.walk([&](Operation* op) {
-      // TODO(enver): Check if data flow ops, data flow edge op, manual
-      // computation op require extra check before creating sharding rule.
+      if (isa<func::ReturnOp>(op)) {
+        insertExplicitReshardsOnFuncReturn(op, funcOp, rewriter);
+        return;
+      }
 
       std::optional<StringRef> meshName =
           getCommonMeshName(getShardings(op->getOperands()),
@@ -706,56 +754,12 @@ struct InsertExplicitReshardsPass
         return;
       }
 
-      if (isa<func::ReturnOp>(op)) {
-        rewriter.setInsertionPoint(op);
-        for (const auto& [index, opOperand] :
-             llvm::enumerate(op->getOpOperands())) {
-          Value operand = opOperand.get();
-          TensorShardingAttr funcResultSharding =
-              getFuncResultSharding(funcOp, index);
-          TensorShardingAttr operandSharding =
-              getOrCreateSharding(operand, *meshName, /*closedIfMissing=*/true);
-          if (shouldReshard(operandSharding, funcResultSharding)) {
-            // TODO(enver): Close all shardings and drop replicated axes before
-            // this pass on the export pipeline.
-            auto reshardOp = rewriter.create<ReshardOp>(
-                operand.getLoc(), operand,
-                funcResultSharding
-                    ? funcResultSharding
-                    : TensorShardingAttr::getFullyClosedLike(operandSharding));
-            opOperand.set(reshardOp);
-          }
-        }
-        return;
-      }
-
       // TODO(enver): Prefer resharding the owner when multiple sources are
       // sharded in the same way.
       if (auto shardableDataFlowOp =
               dyn_cast<ShardableDataFlowOpInterface>(op)) {
-        for (Value owner : llvm::concat<Value>(
-                 shardableDataFlowOp.getOpResultEdgeOwners(),
-                 shardableDataFlowOp.getBlockArgumentEdgeOwners())) {
-          TensorShardingAttr ownerSharding =
-              shardableDataFlowOp.transformTargetSharding(
-                  owner, shardableDataFlowOp.getEdgeOwnerSharding(owner),
-                  DataFlowShardingTransformType::kBeforeEdgePropagation);
-          for (OpOperand* sourceOpOperand :
-               shardableDataFlowOp.getEdgeSources(owner)) {
-            Value source = sourceOpOperand->get();
-            TensorShardingAttr sourceSharding = getOrCreateSharding(
-                source, *meshName, /*closedIfMissing=*/true);
-            if (shouldReshard(sourceSharding, ownerSharding)) {
-              rewriter.setInsertionPointAfterValue(source);
-              auto reshardOp = rewriter.create<ReshardOp>(
-                  source.getLoc(), source,
-                  ownerSharding
-                      ? ownerSharding
-                      : TensorShardingAttr::getFullyClosedLike(sourceSharding));
-              sourceOpOperand->set(reshardOp);
-            }
-          }
-        }
+        insertExplicitReshardsOnDataFlowOp(shardableDataFlowOp, rewriter,
+                                           *meshName);
         return;
       }
 
diff --git a/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards.mlir b/shardy/dialect/sdy/transforms/export/test/insert_explicit_reshards.mlir
@@ -1,6 +1,7 @@
 // RUN: sdy_opt %s -sdy-insert-explicit-reshards | FileCheck %s
 
 sdy.mesh @mesh = <["x"=4, "y"=2]>
+sdy.mesh @mesh_xt = <["x"=2, "t"=4]>
 sdy.mesh @mesh_xyz = <["x"=4, "y"=2, "z"=4]>
 sdy.mesh @mesh_xyzt = <["x"=4, "y"=4, "z"=4, "t"=8]>
 
@@ -19,6 +20,41 @@ func.func @funcop_result_sharding_does_not_match(%arg0: tensor<8x16xf32> {sdy.sh
   return %arg0 : tensor<8x16xf32>
 }
 
+// CHECK-LABEL: func @funcop_result_unsharded_but_different_meshes_between_return_and_func_result
+func.func @funcop_result_unsharded_but_different_meshes_between_return_and_func_result(%arg0: tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {}]>}) -> (tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{}, {}]>}) {
+  // CHECK-NOT: sdy.reshard
+  return %arg0 : tensor<8x16xf32>
+}
+
+// CHECK-LABEL: func @funcop_result_sharding_matches_but_different_meshes_between_return_and_func_result
+func.func @funcop_result_sharding_matches_but_different_meshes_between_return_and_func_result(%arg0: tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"x"}, {}]>}) -> (tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{"x"}, {}]>}) {
+  // CHECK: %[[RESHARD:.*]] = sdy.reshard %arg0 <@mesh_xt, [{"x"}, {}]> : tensor<8x16xf32>
+  // CHECK: return %[[RESHARD]] : tensor<8x16xf32>
+  return %arg0 : tensor<8x16xf32>
+}
+
+// CHECK-LABEL: func @funcop_result_sharding_does_not_match_different_meshes_between_return_and_func_result
+func.func @funcop_result_sharding_does_not_match_different_meshes_between_return_and_func_result(%arg0: tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"x"}, {}]>}) -> (tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{}, {"t"}]>}) {
+  // CHECK: %[[RESHARD:.*]] = sdy.reshard %arg0 <@mesh_xt, [{}, {"t"}]> : tensor<8x16xf32>
+  // CHECK: return %[[RESHARD]] : tensor<8x16xf32>
+  return %arg0 : tensor<8x16xf32>
+}
+
+// CHECK-LABEL: func @funcop_result_sharding_does_not_match_different_meshes_between_return_and_func_result_multiple_results
+func.func @funcop_result_sharding_does_not_match_different_meshes_between_return_and_func_result_multiple_results(%arg0: tensor<8x32xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {"y"}]>}, %arg1: tensor<32x16xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{"t"}, {}]>}) -> (tensor<8x32xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{"x"}, {}]>}, tensor<32x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{}, {"x"}]>}) {
+  // CHECK: %[[RESHARD1:.*]] = sdy.reshard %arg0 <@mesh_xt, [{"x"}, {}]> : tensor<8x32xf32>
+  // CHECK: %[[RESHARD2:.*]] = sdy.reshard %arg1 <@mesh, [{}, {"x"}]> : tensor<32x16xf32>
+  // CHECK: return %[[RESHARD1]], %[[RESHARD2]] : tensor<8x32xf32>, tensor<32x16xf32>
+  return %arg0, %arg1 : tensor<8x32xf32>, tensor<32x16xf32>
+}
+
+// CHECK-LABEL: func @funcop_result_identical_sharding_but_different_meshes_between_return_and_func_result
+func.func @funcop_result_identical_sharding_but_different_meshes_between_return_and_func_result(%arg0: tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"x":(1)2}, {"y"}]>}) -> (tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh_xt, [{"x"}, {"t":(2)2}]>}) {
+  // CHECK: %[[RESHARD:.*]] = sdy.reshard %arg0 <@mesh_xt, [{"x"}, {"t":(2)2}]> : tensor<8x16xf32>
+  // CHECK: return %[[RESHARD]] : tensor<8x16xf32>
+  return %arg0 : tensor<8x16xf32>
+}
+
 // CHECK-LABEL: func @funcop_result_sharding_does_not_match_funcop_result_empty
 func.func @funcop_result_sharding_does_not_match_funcop_result_empty(%arg0: tensor<8x16xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"x"}, {}]>}) -> tensor<8x16xf32> {
   // CHECK: %[[RESHARD:.*]] = sdy.reshard %arg0 <@mesh, [{}, {}]> : tensor<8x16xf32>