Refactor to unify insert explicit reshards on op flows for default/minimal and full version.

Google-ML-Automation · copybara-github · commit 062676776b25 · 2025-09-26T07:56:34.000-07:00
It prepares for further refactorings to unify parts of the flow.

PiperOrigin-RevId: 811354695
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.cc
@@ -158,71 +158,6 @@ bool shouldReshardToCommonMesh(TensorShardingAttr sharding, const Mesh& mesh,
              mesh.attr().getDeviceIds();
 }
 
-// Insert explicit reshards for operands and results that change by
-// the given `shardingProjection` for a given `op`. The reshards are inserted
-// only to make the given operation compatible.
-//
-// For example,
-//
-// ```mlir
-//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
-//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
-//   %0 = stablehlo.dot %arg0, %arg1 { sdy.sharding = <@mesh, [{"x"}, {}]>,
-//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
-//   %1 = stablehlo.negate %0 {sdy.sharding = <@mesh, [{"x"}, {}]>
-//   return %1
-// ```
-//
-// after a call on the stablehlo.dot operation, by the sharding projection,
-// i: {}, j: {}, k: {"y"}, the module becomes:
-//
-// ```mlir
-//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
-//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
-//   %0 = stablehlo.reshard %arg1 {sdy.sharding = <@mesh, [{"y"}, {}]>}
-//   %1 = stablehlo.dot %arg0, %0 { sdy.sharding = <@mesh, [{}, {}]>,
-//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
-//   %2 = stablehlo.reshard %1 {sdy.sharding = <@mesh, [{"x"}, {}]>}
-//   %3 = stablehlo.negate %2 {sdy.sharding = <@mesh, [{"x"}, {}]>
-//   return %3
-// ```
-//
-// In the above example, note that the operand and result shardings for
-// stablehlo.negate op remained unchanged.
-//
-// Assumes factor shardings do not have overflow axes.
-// TODO(enver): Handle the case when some factor shardings have overflow axes.
-//
-// Assumes all tensor shardings have the same mesh as `mesh` on axes but may be
-// different on device order.
-void insertExplicitReshards(Operation* op,
-                            ArrayRef<TensorShardingAttr> inShardings,
-                            ArrayRef<TensorShardingAttr> outShardings,
-                            const ShardingProjection& shardingProjection,
-                            UpdateTensorShardings updateTensorShardings,
-                            IRRewriter& rewriter,
-                            OpShardingRuleAttr shardingRule,
-                            const SymbolTable& symbolTable, const Mesh& mesh) {
-  rewriter.setInsertionPoint(op);
-  for (const auto& [operandIndex, operandSharding] :
-       llvm::enumerate(inShardings)) {
-    if (updateTensorShardings.updateOperands.test(operandIndex) ||
-        shouldReshardToCommonMesh(operandSharding, mesh, symbolTable)) {
-      insertExplicitReshardsOnOperand(op, operandIndex, shardingProjection,
-                                      shardingRule, mesh, rewriter);
-    }
-  }
-  rewriter.setInsertionPointAfter(op);
-  for (const auto& [resultIndex, resultSharding] :
-       llvm::enumerate(outShardings)) {
-    if (updateTensorShardings.updateResults.test(resultIndex) ||
-        shouldReshardToCommonMesh(resultSharding, mesh, symbolTable)) {
-      insertExplicitReshardsOnResult(op, resultIndex, shardingProjection,
-                                     shardingRule, mesh, rewriter);
-    }
-  }
-}
-
 struct FactorAxesPair {
   constexpr static int64_t kEmptyFactorIndex = -1;
   constexpr static int64_t kTombstoneFactorIndex = -2;
@@ -796,6 +731,7 @@ void distributeAxisRefsToBatchingFactors(
     }
   }
 }
+}  // namespace
 
 AxesPerFactor findCommonAxes(ArrayRef<TensorShardingAttr> inShardings,
                              ArrayRef<TensorShardingAttr> outShardings,
@@ -861,8 +797,6 @@ SmallVector<int64_t> getTensorSizes(Operation* op) {
   return tensorSizes;
 }
 
-// Returns reduction axes that are the union of all axes on reduction factors.
-// The result axes are not necessarilly canonicalized.
 SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
                                           OpShardingRuleAttr shardingRule) {
   SmallVector<AxisRefAttr> reductionAxes;
@@ -871,7 +805,6 @@ SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
   }
   return reductionAxes;
 }
-}  // namespace
 
 TensorShardingAttr insertAllReduceIfUnreducedToReplicated(
     OpOperand& use, TensorShardingAttr sourceSharding,
@@ -958,36 +891,32 @@ void insertAllReducesForReductionFactors(Operation* op,
   }
 }
 
-SmallVector<AxisRefAttr> insertExplicitReshardsOnOp(
-    Operation* op, ArrayRef<TensorShardingAttr> inShardings,
-    ArrayRef<TensorShardingAttr> outShardings, IRRewriter& rewriter,
-    const SymbolTable& symbolTable, OpShardingRuleAttr shardingRule,
-    const Mesh& mesh) {
-  ShardingProjection shardingProjection = ShardingProjection::build(
-      inShardings, outShardings, shardingRule, mesh.attr(),
-      /*closedIfMissing=*/true);
-
-  UpdateTensorShardings updateTensorShardings(shardingRule.getNumOperands(),
-                                              shardingRule.getNumResults());
-  AxesPerFactor commonAxesPerFactor =
-      findCommonAxes(inShardings, outShardings, shardingProjection,
-                     shardingRule, getTensorSizes(op), symbolTable, mesh);
-  // TODO(b/446833985): Return common axes factors also when the sharding
-  // projection have overflow axes.
-  if (commonAxesPerFactor.empty()) {
-    return {};
-  }
-  for (const auto& [index, axes] : llvm::enumerate(commonAxesPerFactor)) {
-    // TODO(enver): Add unit tests to test overflow axes are cleared after
-    // handling the case that some factors have overflow axes.
-    updateTensorShardings |=
-        shardingProjection.updateSharding(index, axes, /*overflowAxes=*/{});
-  }
-  insertExplicitReshards(op, inShardings, outShardings, shardingProjection,
-                         updateTensorShardings, rewriter, shardingRule,
-                         symbolTable, mesh);
-
-  return getReductionAxes(commonAxesPerFactor, shardingRule);
+void insertExplicitReshards(Operation* op,
+                            ArrayRef<TensorShardingAttr> inShardings,
+                            ArrayRef<TensorShardingAttr> outShardings,
+                            const ShardingProjection& shardingProjection,
+                            UpdateTensorShardings updateTensorShardings,
+                            IRRewriter& rewriter,
+                            OpShardingRuleAttr shardingRule,
+                            const SymbolTable& symbolTable, const Mesh& mesh) {
+  rewriter.setInsertionPoint(op);
+  for (const auto& [operandIndex, operandSharding] :
+       llvm::enumerate(inShardings)) {
+    if (updateTensorShardings.updateOperands.test(operandIndex) ||
+        shouldReshardToCommonMesh(operandSharding, mesh, symbolTable)) {
+      insertExplicitReshardsOnOperand(op, operandIndex, shardingProjection,
+                                      shardingRule, mesh, rewriter);
+    }
+  }
+  rewriter.setInsertionPointAfter(op);
+  for (const auto& [resultIndex, resultSharding] :
+       llvm::enumerate(outShardings)) {
+    if (updateTensorShardings.updateResults.test(resultIndex) ||
+        shouldReshardToCommonMesh(resultSharding, mesh, symbolTable)) {
+      insertExplicitReshardsOnResult(op, resultIndex, shardingProjection,
+                                     shardingRule, mesh, rewriter);
+    }
+  }
 }
 
 }  // namespace sdy
diff --git a/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h b/shardy/dialect/sdy/transforms/export/explicit_reshards_util.h
@@ -78,6 +78,14 @@ ArrayRef<AxisRefAttr> getUnreducedAxes(TensorShardingAttr sharding);
 // empty axes.
 ArrayRef<AxisRefAttr> getUnreducedAxes(Value value);
 
+// Returns a concatenated array of operand and result tensor sizes.
+SmallVector<int64_t> getTensorSizes(Operation* op);
+
+// Returns reduction axes that are the union of all axes on reduction factors.
+// The result axes are not necessarilly canonicalized.
+SmallVector<AxisRefAttr> getReductionAxes(const AxesPerFactor& axesPerFactor,
+                                          OpShardingRuleAttr shardingRule);
+
 // Inserts an `sdy.all-reduce` for each result of `op` if `reductionAxes`
 // is non-empty. Assume the followings:
 // - All op results have the same unreduced axes.
@@ -87,7 +95,7 @@ void insertAllReducesForReductionFactors(Operation* op,
                                          const Mesh& mesh,
                                          IRRewriter& rewriter);
 
-// Inserts explicit reshards on the operands and results of `op` such that the
+// Finds common factor axes on the operands and results of `op` so that the
 // sharding of `op` is compatible with its sharding rule.
 //
 // Refer to the documentation of `InsertExplicitReshardsPass` for more details.
@@ -97,14 +105,59 @@ void insertAllReducesForReductionFactors(Operation* op,
 // - If the op has no results, none of the operands has unreduced axes.
 // - Operand and result meshes are the same ignoring device id order.
 //
-// Returns the union of axes along all the reduction factors which may not be
-// canonicalized.
-SmallVector<AxisRefAttr> insertExplicitReshardsOnOp(
-    Operation* op, ArrayRef<TensorShardingAttr> inShardings,
-    ArrayRef<TensorShardingAttr> outShardings, IRRewriter& rewriter,
-    const SymbolTable& symbolTable, OpShardingRuleAttr shardingRule,
-    const Mesh& mesh);
-
+// Returns the common axes per factor.
+AxesPerFactor findCommonAxes(ArrayRef<TensorShardingAttr> inShardings,
+                             ArrayRef<TensorShardingAttr> outShardings,
+                             const ShardingProjection& shardingProjection,
+                             OpShardingRuleAttr shardingRule,
+                             ArrayRef<int64_t> tensorSizes,
+                             const SymbolTable& symbolTable, const Mesh& mesh);
+
+// Insert explicit reshards for operands and results that change by
+// the given `shardingProjection` for a given `op`. The reshards are inserted
+// only to make the given operation compatible.
+//
+// For example,
+//
+// ```mlir
+//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
+//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
+//   %0 = stablehlo.dot %arg0, %arg1 { sdy.sharding = <@mesh, [{"x"}, {}]>,
+//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
+//   %1 = stablehlo.negate %0 {sdy.sharding = <@mesh, [{"x"}, {}]>
+//   return %1
+// ```
+//
+// after a call on the stablehlo.dot operation, by the sharding projection,
+// i: {}, j: {}, k: {"y"}, the module becomes:
+//
+// ```mlir
+//   %arg0: tensor<8x32xf32> { sdy.sharding = @mesh, [{}, {"y"}]>}
+//   %arg1: tensor<32x16xf32> { sdy.sharding = <@mesh, [{"y"}, {"x"}]>}
+//   %0 = stablehlo.reshard %arg1 {sdy.sharding = <@mesh, [{"y"}, {}]>}
+//   %1 = stablehlo.dot %arg0, %0 { sdy.sharding = <@mesh, [{}, {}]>,
+//     sdy.sharding_rule = <([i, k], [k, j])->([i, j])> }
+//   %2 = stablehlo.reshard %1 {sdy.sharding = <@mesh, [{"x"}, {}]>}
+//   %3 = stablehlo.negate %2 {sdy.sharding = <@mesh, [{"x"}, {}]>
+//   return %3
+// ```
+//
+// In the above example, note that the operand and result shardings for
+// stablehlo.negate op remained unchanged.
+//
+// Assumes factor shardings do not have overflow axes.
+// TODO(enver): Handle the case when some factor shardings have overflow axes.
+//
+// Assumes all tensor shardings have the same mesh as `mesh` on axes but may be
+// different on device order.
+void insertExplicitReshards(Operation* op,
+                            ArrayRef<TensorShardingAttr> inShardings,
+                            ArrayRef<TensorShardingAttr> outShardings,
+                            const ShardingProjection& shardingProjection,
+                            UpdateTensorShardings updateTensorShardings,
+                            IRRewriter& rewriter,
+                            OpShardingRuleAttr shardingRule,
+                            const SymbolTable& symbolTable, const Mesh& mesh);
 }  // namespace sdy
 }  // namespace mlir
 
diff --git a/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc b/shardy/dialect/sdy/transforms/export/insert_explicit_reshards.cc
@@ -391,21 +391,52 @@ bool isOnFullVersion(Operation* op, const bool enableFullVersion) {
   return false;
 }
 
+// Inserts explicit reshards on the operands and results of `op` such that the
+// sharding of `op` is compatible with its sharding rule.
+//
+// Refer to the documentation of `InsertExplicitReshardsPass` for more details.
+//
 // Assume the followings:
 // - All op results have the same unreduced axes.
 // - If the op has no results, none of the operands has unreduced axes.
+// - Operand and result meshes are the same ignoring device id order.
 //
-// Returns the union of common reducation axes which may not be canonicalized.
+// Returns the union of axes along all the reduction factors which may not be
+// canonicalized.
 SmallVector<AxisRefAttr> processOp(Operation* op,
                                    ArrayRef<TensorShardingAttr> inShardings,
                                    ArrayRef<TensorShardingAttr> outShardings,
                                    IRRewriter& rewriter,
                                    const SymbolTable& symbolTable,
                                    OpShardingRuleAttr shardingRule,
                                    const Mesh& mesh, const bool onFullVersion) {
+  ShardingProjection shardingProjection = ShardingProjection::build(
+      inShardings, outShardings, shardingRule, mesh.attr(),
+      /*closedIfMissing=*/true);
+
   if (onFullVersion) {
-    return insertExplicitReshardsOnOp(op, inShardings, outShardings, rewriter,
-                                      symbolTable, shardingRule, mesh);
+    AxesPerFactor commonAxesPerFactor =
+        findCommonAxes(inShardings, outShardings, shardingProjection,
+                       shardingRule, getTensorSizes(op), symbolTable, mesh);
+    // TODO(b/446833985): Return common axes factors also when the sharding
+    // projection have overflow axes.
+    if (commonAxesPerFactor.empty()) {
+      return {};
+    }
+
+    UpdateTensorShardings updateTensorShardings(shardingRule.getNumOperands(),
+                                                shardingRule.getNumResults());
+    for (const auto& [index, axes] : llvm::enumerate(commonAxesPerFactor)) {
+      // TODO(enver): Add unit tests to test overflow axes are cleared after
+      // handling the case that some factors have overflow axes.
+      updateTensorShardings |=
+          shardingProjection.updateSharding(index, axes, /*overflowAxes=*/{});
+    }
+    insertExplicitReshards(op, inShardings, outShardings, shardingProjection,
+                           updateTensorShardings, rewriter, shardingRule,
+                           symbolTable, mesh);
+
+    return getReductionAxes(commonAxesPerFactor, shardingRule);
   }
 
   TypeSwitch<Operation*>(op)
@@ -422,9 +453,6 @@ SmallVector<AxisRefAttr> processOp(Operation* op,
     return {};
   }
 
-  ShardingProjection shardingProjection = ShardingProjection::build(
-      inShardings, outShardings, shardingRule, mesh.attr(),
-      /*closedIfMissing=*/true);
   // TODO(enver): Factor out finding common axes per factor. Share logic with
   // getCompatibleFactorShardings.
   SmallVector<AxisRefAttr> reductionAxes;