Refactor to unify insert explicit reshards on op flows for default/minimal and full version.

Google-ML-Automation · copybara-github · commit 4533c318bbaa · 2025-09-26T10:47:42.000-07:00
It prepares for further refactorings to unify parts of the flow.

PiperOrigin-RevId: 811354695
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
@@ -45,9 +45,6 @@ limitations under the License.
 namespace mlir {
 namespace sdy {
 
-namespace {
-
-// Returns true iff any tensor factor sharding has non-empty overflow axes.
 bool hasOverflowAxes(const ShardingProjection& shardingProjection) {
   for (const TensorFactorShardings& tensorFactorSharding :
        llvm::concat<const TensorFactorShardings>(
@@ -62,6 +59,7 @@ bool hasOverflowAxes(const ShardingProjection& shardingProjection) {
   return false;
 }
 
+namespace {
 bool hasShardedPermutationFactors(
     const TensorFactorShardings& tensorFactorSharding,
     OpShardingRuleAttr shardingRule) {
@@ -157,44 +155,8 @@ bool shouldReshardToCommonMesh(TensorShardingAttr sharding, const Mesh& mesh,
          sharding.getMesh(symbolTable).getDeviceIds() !=
              mesh.attr().getDeviceIds();
 }
+}  // namespace
 
-// Insert explicit reshards for operands and results that change by
-// the given `shardingProjection` for a given `op`. The reshards are inserted
-// only to make the given operation compatible.
-//
-// For example,
-//
-// ```mlir
-//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
-//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
-//   %0 = stablehlo.dot %arg0, %arg1 { sdy.sharding = <@mesh, [{"x"}, {}]>,
-//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
-//   %1 = stablehlo.negate %0 {sdy.sharding = <@mesh, [{"x"}, {}]>
-//   return %1
-// ```
-//
-// after a call on the stablehlo.dot operation, by the sharding projection,
-// i: {}, j: {}, k: {"y"}, the module becomes:
-//
-// ```mlir
-//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
-//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
-//   %0 = stablehlo.reshard %arg1 {sdy.sharding = <@mesh, [{"y"}, {}]>}
-//   %1 = stablehlo.dot %arg0, %0 { sdy.sharding = <@mesh, [{}, {}]>,
-//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
-//   %2 = stablehlo.reshard %1 {sdy.sharding = <@mesh, [{"x"}, {}]>}
-//   %3 = stablehlo.negate %2 {sdy.sharding = <@mesh, [{"x"}, {}]>
-//   return %3
-// ```
-//
-// In the above example, note that the operand and result shardings for
-// stablehlo.negate op remained unchanged.
-//
-// Assumes factor shardings do not have overflow axes.
-// TODO(enver): Handle the case when some factor shardings have overflow axes.
-//
-// Assumes all tensor shardings have the same mesh as `mesh` on axes but may be
-// different on device order.
 void insertExplicitReshards(Operation* op,
                             ArrayRef<TensorShardingAttr> inShardings,
                             ArrayRef<TensorShardingAttr> outShardings,
@@ -223,6 +185,7 @@ void insertExplicitReshards(Operation* op,
   }
 }
 
+namespace {
 struct FactorAxesPair {
   constexpr static int64_t kEmptyFactorIndex = -1;
   constexpr static int64_t kTombstoneFactorIndex = -2;
@@ -793,6 +756,7 @@ void distributeAxisRefsToBatchingFactors(
     }
   }
 }
+}  // namespace
 
 // Assumes there are no overflow axes.
 //
@@ -855,8 +819,6 @@ SmallVector<int64_t> getTensorSizes(Operation* op) {
   return tensorSizes;
 }
 
-// Returns reduction axes that are the union of all axes on reduction factors.
-// The result axes are not necessarilly canonicalized.
 SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
                                           OpShardingRuleAttr shardingRule) {
   SmallVector<AxisRefAttr> reductionAxes;
@@ -865,7 +827,6 @@ SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
   }
   return reductionAxes;
 }
-}  // namespace
 
 TensorShardingAttr insertAllReduceIfUnreducedToReplicated(
     OpOperand& use, TensorShardingAttr sourceSharding,
@@ -952,41 +913,5 @@ void insertAllReducesForReductionFactors(Operation* op,
   }
 }
 
-SmallVector<AxisRefAttr> insertExplicitReshardsOnOp(
-    Operation* op, ArrayRef<TensorShardingAttr> inShardings,
-    ArrayRef<TensorShardingAttr> outShardings, IRRewriter& rewriter,
-    const SymbolTable& symbolTable, OpShardingRuleAttr shardingRule,
-    const Mesh& mesh) {
-  ShardingProjection shardingProjection = ShardingProjection::build(
-      inShardings, outShardings, shardingRule, mesh.attr(),
-      /*closedIfMissing=*/true);
-
-  UpdateTensorShardings updateTensorShardings(shardingRule.getNumOperands(),
-                                              shardingRule.getNumResults());
-
-  // Return without inserting reshards if any factor sharding has overflow
-  // axes. This case is not handled yet.
-  // TODO(b/446833985): Handle the case when factor shardings have overflow
-  // axes.
-  if (hasOverflowAxes(shardingProjection)) {
-    return {};
-  }
-
-  AxesPerFactor commonAxesPerFactor =
-      findCommonAxes(inShardings, outShardings, shardingProjection,
-                     shardingRule, getTensorSizes(op), symbolTable, mesh);
-  for (const auto& [index, axes] : llvm::enumerate(commonAxesPerFactor)) {
-    // TODO(enver): Add unit tests to test overflow axes are cleared after
-    // handling the case that some factors have overflow axes.
-    updateTensorShardings |=
-        shardingProjection.updateSharding(index, axes, /*overflowAxes=*/{});
-  }
-  insertExplicitReshards(op, inShardings, outShardings, shardingProjection,
-                         updateTensorShardings, rewriter, shardingRule,
-                         symbolTable, mesh);
-
-  return getReductionAxes(commonAxesPerFactor, shardingRule);
-}
-
 }  // namespace sdy
 }  // namespace mlir
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h
@@ -78,6 +78,63 @@ ArrayRef<AxisRefAttr> getUnreducedAxes(TensorShardingAttr sharding);
 // empty axes.
 ArrayRef<AxisRefAttr> getUnreducedAxes(Value value);
 
+// Returns a concatenated array of operand and result tensor sizes.
+SmallVector<int64_t> getTensorSizes(Operation* op);
+
+// Returns reduction axes that are the union of all axes on reduction factors.
+// The result axes are not necessarilly canonicalized.
+SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
+                                          OpShardingRuleAttr shardingRule);
+
+// Returns true iff any tensor factor sharding has non-empty overflow axes.
+bool hasOverflowAxes(const ShardingProjection& shardingProjection);
+
+// Insert explicit reshards for operands and results that change by
+// the given `shardingProjection` for a given `op`. The reshards are inserted
+// only to make the given operation compatible.
+//
+// For example,
+//
+// ```mlir
+//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
+//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
+//   %0 = stablehlo.dot %arg0, %arg1 { sdy.sharding = <@mesh, [{"x"}, {}]>,
+//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
+//   %1 = stablehlo.negate %0 {sdy.sharding = <@mesh, [{"x"}, {}]>
+//   return %1
+// ```
+//
+// after a call on the stablehlo.dot operation, by the sharding projection,
+// i: {}, j: {}, k: {"y"}, the module becomes:
+//
+// ```mlir
+//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
+//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
+//   %0 = stablehlo.reshard %arg1 {sdy.sharding = <@mesh, [{"y"}, {}]>}
+//   %1 = stablehlo.dot %arg0, %0 { sdy.sharding = <@mesh, [{}, {}]>,
+//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
+//   %2 = stablehlo.reshard %1 {sdy.sharding = <@mesh, [{"x"}, {}]>}
+//   %3 = stablehlo.negate %2 {sdy.sharding = <@mesh, [{"x"}, {}]>
+//   return %3
+// ```
+//
+// In the above example, note that the operand and result shardings for
+// stablehlo.negate op remained unchanged.
+//
+// Assumes factor shardings do not have overflow axes.
+// TODO(enver): Handle the case when some factor shardings have overflow axes.
+//
+// Assumes all tensor shardings have the same mesh as `mesh` on axes but may be
+// different on device order.
+void insertExplicitReshards(Operation* op,
+                            ArrayRef<TensorShardingAttr> inShardings,
+                            ArrayRef<TensorShardingAttr> outShardings,
+                            const ShardingProjection& shardingProjection,
+                            UpdateTensorShardings updateTensorShardings,
+                            IRRewriter& rewriter,
+                            OpShardingRuleAttr shardingRule,
+                            const SymbolTable& symbolTable, const Mesh& mesh);
+
 // Inserts an `sdy.all-reduce` for each result of `op` if `reductionAxes`
 // is non-empty. Assume the followings:
 // - All op results have the same unreduced axes.
@@ -87,7 +144,7 @@ void insertAllReducesForReductionFactors(Operation* op,
                                          const Mesh& mesh,
                                          IRRewriter& rewriter);
 
-// Inserts explicit reshards on the operands and results of `op` such that the
+// Finds common factor axes on the operands and results of `op` so that the
 // sharding of `op` is compatible with its sharding rule.
 //
 // Refer to the documentation of `InsertExplicitReshardsPass` for more details.
@@ -96,14 +153,15 @@ void insertAllReducesForReductionFactors(Operation* op,
 // - All op results have the same unreduced axes.
 // - If the op has no results, none of the operands has unreduced axes.
 // - Operand and result meshes are the same ignoring device id order.
+// - There are no overflow axes.
 //
-// Returns the union of axes along all the reduction factors which may not be
-// canonicalized.
-SmallVector<AxisRefAttr> insertExplicitReshardsOnOp(
-    Operation* op, ArrayRef<TensorShardingAttr> inShardings,
-    ArrayRef<TensorShardingAttr> outShardings, IRRewriter& rewriter,
-    const SymbolTable& symbolTable, OpShardingRuleAttr shardingRule,
-    const Mesh& mesh);
+// Guarantees to return a non-empty AxesPerFactor.
+AxesPerFactor findCommonAxes(ArrayRef<TensorShardingAttr> inShardings,
+                             ArrayRef<TensorShardingAttr> outShardings,
+                             const ShardingProjection& shardingProjection,
+                             OpShardingRuleAttr shardingRule,
+                             ArrayRef<int64_t> tensorSizes,
+                             const SymbolTable& symbolTable, const Mesh& mesh);
 
 }  // namespace sdy
 }  // namespace mlir
diff --git a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
@@ -391,21 +391,53 @@ bool isOnFullVersion(Operation* op, const bool enableFullVersion) {
   return false;
 }
 
+// Inserts explicit reshards on the operands and results of `op` such that the
+// sharding of `op` is compatible with its sharding rule.
+//
+// Refer to the documentation of `InsertExplicitReshardsPass` for more details.
+//
 // Assume the followings:
 // - All op results have the same unreduced axes.
 // - If the op has no results, none of the operands has unreduced axes.
+// - Operand and result meshes are the same ignoring device id order.
 //
-// Returns the union of common reducation axes which may not be canonicalized.
+// Returns the union of axes along all the reduction factors which may not be
+// canonicalized.
 SmallVector<AxisRefAttr> processOp(Operation* op,
                                    ArrayRef<TensorShardingAttr> inShardings,
                                    ArrayRef<TensorShardingAttr> outShardings,
                                    IRRewriter& rewriter,
                                    const SymbolTable& symbolTable,
                                    OpShardingRuleAttr shardingRule,
                                    const Mesh& mesh, const bool onFullVersion) {
+  ShardingProjection shardingProjection = ShardingProjection::build(
+      inShardings, outShardings, shardingRule, mesh.attr(),
+      /*closedIfMissing=*/true);
+
   if (onFullVersion) {
-    return insertExplicitReshardsOnOp(op, inShardings, outShardings, rewriter,
-                                      symbolTable, shardingRule, mesh);
+    // Return without inserting reshards if any factor sharding has overflow
+    // axes. This case is not handled yet.
+    // TODO(b/446833985): Handle the case when factor shardings have overflow
+    // axes.
+    if (hasOverflowAxes(shardingProjection)) {
+      return {};
+    }
+    AxesPerFactor commonAxesPerFactor =
+        findCommonAxes(inShardings, outShardings, shardingProjection,
+                       shardingRule, getTensorSizes(op), symbolTable, mesh);
+    UpdateTensorShardings updateTensorShardings(shardingRule.getNumOperands(),
+                                                shardingRule.getNumResults());
+    for (const auto& [index, axes] : llvm::enumerate(commonAxesPerFactor)) {
+      // TODO(enver): Add unit tests to test overflow axes are cleared after
+      // handling the case that some factors have overflow axes.
+      updateTensorShardings |=
+          shardingProjection.updateSharding(index, axes, /*overflowAxes=*/{});
+    }
+    insertExplicitReshards(op, inShardings, outShardings, shardingProjection,
+                           updateTensorShardings, rewriter, shardingRule,
+                           symbolTable, mesh);
+
+    return getReductionAxes(commonAxesPerFactor, shardingRule);
   }
 
   TypeSwitch<Operation*>(op)
@@ -422,9 +454,6 @@ SmallVector<AxisRefAttr> processOp(Operation* op,
     return {};
   }
 
-  ShardingProjection shardingProjection = ShardingProjection::build(
-      inShardings, outShardings, shardingRule, mesh.attr(),
-      /*closedIfMissing=*/true);
   // TODO(enver): Factor out finding common axes per factor. Share logic with
   // getCompatibleFactorShardings.
   SmallVector<AxisRefAttr> reductionAxes;