Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions shardy/dialect/sdy/ir/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1108,5 +1108,11 @@ FuncOp cloneFuncRecursively(FuncOp funcOp, SymbolTable& symbolTable) {
return clonedFuncOp;
}

FuncOp getFuncOpOrDie(StringRef funcSymName, const SymbolTable& symbolTable) {
FuncOp funcOp = symbolTable.lookup<FuncOp>(funcSymName);
CHECK(funcOp) << "Failed to lookup function: " << funcSymName.str();
return funcOp;
}

} // namespace sdy
} // namespace mlir
5 changes: 5 additions & 0 deletions shardy/dialect/sdy/ir/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,11 @@ Operation* getCommonSupportedReductionOp(stablehlo::ScatterOp scatter);
mlir::func::FuncOp cloneFuncRecursively(func::FuncOp funcOp,
SymbolTable& symbolTable);

// Returns the funcOp on `funcSymName`. Dies if the func does not exist on the
// `symbolTable`.
func::FuncOp getFuncOpOrDie(StringRef funcSymName,
const SymbolTable& symbolTable);

} // namespace sdy
} // namespace mlir

Expand Down
1 change: 1 addition & 0 deletions shardy/dialect/sdy/transforms/import/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ cc_library(
"add_data_flow_edges.cc",
"apply_sharding_constraints.cc",
"constant_or_scalar_splitter.cc",
"equilise_call_and_func_result_shardings.cc",
"import_func_calls.cc",
"import_pipeline.cc",
"inline_meshes.cc",
Expand Down
23 changes: 16 additions & 7 deletions shardy/dialect/sdy/transforms/import/add_data_flow_edges.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/PatternMatch.h"
Expand All @@ -30,6 +31,9 @@ namespace sdy {

namespace {

using func::CallOp;
using func::FuncOp;

struct AddDataFlowEdgesPass
: public impl::AddDataFlowEdgesPassBase<AddDataFlowEdgesPass> {
using AddDataFlowEdgesPassBase::AddDataFlowEdgesPassBase;
Expand All @@ -51,20 +55,25 @@ struct AddDataFlowEdgesPass
}

void runOnOperation() final {
func::FuncOp funcOp = getOperation();
IRRewriter rewriter(funcOp);
ModuleOp moduleOp = getOperation();
SymbolTable symbolTable(moduleOp);
IRRewriter rewriter(moduleOp);

funcOp.walk([&](ShardableDataFlowOpInterface op) {
moduleOp.walk([&](ShardableDataFlowOpInterface op) {
// Add the data flow edges for result owners and block argument owners.
addDataFlowEdges(op.getBlockArgumentEdgeOwners(), rewriter);
addDataFlowEdges(op.getOpResultEdgeOwners(), rewriter);
});

llvm::SmallDenseSet<StringRef> funcNames;
if (enableNativeNonFlatSupport) {
// TODO(enver): Do not create data flow edge if the func has no callers,
// such as the entry function.
addDataFlowEdges(funcOp.getArguments(), rewriter);
funcOp.walk([&](func::CallOp callOp) {
moduleOp.walk([&](CallOp callOp) {
addDataFlowEdges(callOp.getResults(), rewriter);
StringRef funcName = callOp.getCallee();
if (auto [_, inserted] = funcNames.insert(funcName); inserted) {
FuncOp funcOp = getFuncOpOrDie(funcName, symbolTable);
addDataFlowEdges(funcOp.getArguments(), rewriter);
}
});
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/* Copyright 2026 The OpenXLA Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include <iterator>

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "shardy/dialect/sdy/ir/dialect.h"
#include "shardy/dialect/sdy/ir/utils.h"
#include "shardy/dialect/sdy/transforms/import/passes.h" // IWYU pragma: keep

namespace mlir {
namespace sdy {

#define GEN_PASS_DEF_EQUILISECALLANDFUNCRESULTSHARDINGSPASS
#include "shardy/dialect/sdy/transforms/import/passes.h.inc"

namespace {

using func::CallOp;
using func::FuncOp;

struct EquiliseCallAndFuncResultShardingsPass
: public impl::EquiliseCallAndFuncResultShardingsPassBase<
EquiliseCallAndFuncResultShardingsPass> {
using EquiliseCallAndFuncResultShardingsPassBase::
EquiliseCallAndFuncResultShardingsPassBase;

void runOnOperation() override {
ModuleOp moduleOp = getOperation();
SymbolTable symbolTable(moduleOp);

moduleOp.walk([&](CallOp callOp) {
FuncOp funcOp = getFuncOpOrDie(callOp.getCallee(), symbolTable);
if (!getShardingPerValue(callOp)) {
if (TensorShardingPerValueAttr funcResultShardings =
getFuncResultShardings(funcOp, symbolTable);
funcResultShardings) {
setShardings(callOp, funcResultShardings);
}
}
});
}
};

} // namespace

} // namespace sdy
} // namespace mlir
6 changes: 1 addition & 5 deletions shardy/dialect/sdy/transforms/import/import_func_calls.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,17 +63,13 @@ void importCallOp(
SDY_CHECK(funcOp) << "Failed to lookup function: " << calleeName.str();

rewriter.setInsertionPoint(callOp);
TensorShardingPerValueAttr callOpResultShardings =
getShardingPerValue(callOp);
auto namedCompOp = NamedComputationOp::create(
rewriter, callOp->getLoc(), callOp->getResultTypes(),
getOriginalFuncName(funcOp), callOp.getOperands(),
/*inShardings=*/getFuncArgShardings(funcOp, symbolTable),
// TODO(b/439018088): Take func result shardings if call op result
// shardings are empty.
/*outShardings=*/
callOpResultShardings ? callOpResultShardings
: getFuncResultShardings(funcOp, symbolTable));
/*outShardings=*/getShardingPerValue(callOp));
namedCompOp->setAttrs(namedCompAttrs);

Region& namedCompRegion = namedCompOp.getRegion();
Expand Down
3 changes: 2 additions & 1 deletion shardy/dialect/sdy/transforms/import/import_pipeline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ void addImportPipeline(OpPassManager& pm, int& dumpIndex,
// of the propagation itself.
pm.addPass(mlir::sdy::createSaveModuleOpPass(
options.dumpDirectory, "before_propagation", dumpIndex++));
pm.addPass(createEquiliseCallAndFuncResultShardingsPass());
pm.addPass(createImportFuncCallsPass());

pm.addNestedPass<func::FuncOp>(createAddDataFlowEdgesPass(
pm.addPass(createAddDataFlowEdgesPass(
AddDataFlowEdgesPassOptions{options.enableNativeNonFlatSupport}));
pm.addPass(
createApplyShardingConstraintsPass(ApplyShardingConstraintsPassOptions{
Expand Down
13 changes: 12 additions & 1 deletion shardy/dialect/sdy/transforms/import/passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ def ImportFuncCallsPass : Pass<"sdy-import-func-calls", "ModuleOp"> {
let dependentDialects = ["mlir::sdy::SdyDialect"];
}

def EquiliseCallAndFuncResultShardingsPass : Pass<"sdy-equilise-call-and-func-result-shardings", "ModuleOp"> {
let summary = "Set call result shardings as the func result shardings, if empty.";
let description = [{
Creates a pass to equilise call and func result shardings if the call
result shardings are empty. Notably, it keeps call result sharding if the
call already has result shardings, even if all individual result shardings
are empty.
}];
let dependentDialects = ["mlir::sdy::SdyDialect"];
}

def LiftInlinedMeshesPass : Pass<"sdy-lift-inlined-meshes", "ModuleOp"> {
let summary = "Lifts inlined `MeshAttr`s in shardings as symbol `MeshOp`s.";
let description = [{
Expand All @@ -54,7 +65,7 @@ def InlineMeshesPass : Pass<"sdy-inline-meshes", "ModuleOp"> {
let dependentDialects = ["mlir::sdy::SdyDialect"];
}

def AddDataFlowEdgesPass : Pass<"sdy-add-data-flow-edges", "func::FuncOp"> {
def AddDataFlowEdgesPass : Pass<"sdy-add-data-flow-edges", "ModuleOp"> {
let summary = "Inserts `DataFlowEdgeOp` for every data-flow edge.";
let description = [{
Inserts `DataFlowEdgeOp` for every value that is the owner of a data-flow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_on_func_with_single_argument(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_with_single_argument(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand All @@ -31,7 +31,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_on_func_multiple_users_on_func_result(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_multiple_users_on_func_result(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
// CHECK-NEXT: %[[NEGATE:.*]] = stablehlo.negate %[[EDGE]] : tensor<8xf32>
Expand All @@ -56,7 +56,7 @@ func.func @bar(%arg0: tensor<8xf32>) ->(tensor<8xf32>, tensor<8xf32>) {

// CHECK-LABEL: @simple_call_graph_on_func_with_multiple_results(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_with_multiple_results(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]]:2 = call @bar(%1) : (tensor<8xf32>) -> (tensor<8xf32>, tensor<8xf32>)
// CHECK: %[[CALL:.*]]:2 = call @bar(%0) : (tensor<8xf32>) -> (tensor<8xf32>, tensor<8xf32>)
// CHECK-NEXT: %[[EDGE0:.*]] = sdy.data_flow_edge %[[CALL]]#0 : tensor<8xf32>
// CHECK-NEXT: %[[EDGE1:.*]] = sdy.data_flow_edge %[[CALL]]#1 : tensor<8xf32>
// CHECK-NEXT: %[[ADD:.*]] = stablehlo.add %[[EDGE0]], %[[EDGE1]] : tensor<8xf32>
Expand All @@ -83,7 +83,7 @@ func.func @bar(%arg0: tensor<8xf32> {sdy.sharding = #sdy.sharding<@mesh, [{"a"}]

// CHECK-LABEL: @simple_call_graph_on_func_with_sharded_argument(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_with_sharded_argument(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand All @@ -104,7 +104,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @multiple_calls_on_same_func(%arg0: tensor<8xf32>)
func.func @multiple_calls_on_same_func(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL0:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL0:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE0:.*]] = sdy.data_flow_edge %[[CALL0]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS0:.*]] = stablehlo.abs %[[EDGE0]] : tensor<8xf32>
// CHECK-NEXT: %[[CALL1:.*]] = call @bar(%[[ABS0]]) : (tensor<8xf32>) -> tensor<8xf32>
Expand All @@ -131,7 +131,7 @@ func.func @bar(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_on_func_with_multiple_argument(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_with_multiple_argument(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1, %2) : (tensor<8xf32>, tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0, %1) : (tensor<8xf32>, tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand All @@ -154,7 +154,7 @@ func.func @bar(%arg0: tensor<8xf32>, %arg1: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_on_func_with_multiple_argument_same_operand(%arg0: tensor<8xf32>)
func.func @simple_call_graph_on_func_with_multiple_argument_same_operand(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1, %1) : (tensor<8xf32>, tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0, %0) : (tensor<8xf32>, tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand Down Expand Up @@ -188,7 +188,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_chain_call_graph(%arg0: tensor<8xf32>)
func.func @simple_chain_call_graph(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]] : tensor<8xf32>
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand Down Expand Up @@ -222,8 +222,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_non_flat_call_graph(%arg0: tensor<8xf32>)
func.func @simple_non_flat_call_graph(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK-NEXT: %[[EDGE0:.*]] = sdy.data_flow_edge %arg0 : tensor<8xf32>
// CHECK-NEXT: %[[NEGATE:.*]] = stablehlo.negate %[[EDGE0]] : tensor<8xf32>
// CHECK-NEXT: %[[NEGATE:.*]] = stablehlo.negate %arg0 : tensor<8xf32>
// CHECK-NEXT: %[[CALL0:.*]] = call @bar(%[[NEGATE]]) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE1:.*]] = sdy.data_flow_edge %[[CALL0]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS0:.*]] = stablehlo.abs %[[EDGE1]] : tensor<8xf32>
Expand Down Expand Up @@ -264,7 +263,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_non_flat_call_graph_one_after_the_other(%arg0: tensor<8xf32>)
func.func @simple_non_flat_call_graph_one_after_the_other(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL0:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL0:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE0:.*]] = sdy.data_flow_edge %[[CALL0]] : tensor<8xf32>
// CHECK-NEXT: %[[CALL1:.*]] = call @foo(%[[EDGE0]]) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE1:.*]] = sdy.data_flow_edge %[[CALL1]] : tensor<8xf32>
Expand Down Expand Up @@ -301,7 +300,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @call_on_same_func_twice_input_of_one_is_output_of_the_other(%arg0: tensor<8xf32>)
func.func @call_on_same_func_twice_input_of_one_is_output_of_the_other(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL0:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL0:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE0:.*]] = sdy.data_flow_edge %[[CALL0]] : tensor<8xf32>
// CHECK-NEXT: %[[CALL1:.*]] = call @bar(%[[EDGE0]]) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE1:.*]] = sdy.data_flow_edge %[[CALL1]] : tensor<8xf32>
Expand All @@ -325,7 +324,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_argument_is_input_to_call(%arg0: tensor<8xf32>)
func.func @simple_call_graph_argument_is_input_to_call(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%arg0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: %[[ABS:.*]] = stablehlo.abs %[[EDGE]]
%0 = call @bar(%arg0) : (tensor<8xf32>) -> (tensor<8xf32>)
Expand All @@ -345,7 +344,7 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_result_is_the_output_of_call(%arg0: tensor<8xf32>)
func.func @simple_call_graph_result_is_the_output_of_call(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%1) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: return %[[EDGE]]
%0 = stablehlo.abs %arg0 : tensor<8xf32>
Expand All @@ -366,10 +365,33 @@ func.func @bar(%arg0: tensor<8xf32>) -> tensor<8xf32> {

// CHECK-LABEL: @simple_call_graph_entry_contains_call_only(%arg0: tensor<8xf32>)
func.func @simple_call_graph_entry_contains_call_only(%arg0: tensor<8xf32>) -> tensor<8xf32> {
// CHECK: %[[CALL:.*]] = call @bar(%0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK: %[[CALL:.*]] = call @bar(%arg0) : (tensor<8xf32>) -> tensor<8xf32>
// CHECK-NEXT: %[[EDGE:.*]] = sdy.data_flow_edge %[[CALL]] : tensor<8xf32>
// CHECK-NEXT: return %[[EDGE]]
%0 = call @bar(%arg0) : (tensor<8xf32>) -> (tensor<8xf32>)
return %0 : tensor<8xf32>
}

// -----

// CHECK-LABEL: func private @top_k_gt_f32_comparator
func.func private @top_k_gt_f32_comparator(%arg0: tensor<f32>, %arg1: tensor<f32>, %arg2: tensor<i32>, %arg3: tensor<i32>) -> tensor<i1> {
// CHECK-NEXT: %0 = stablehlo.compare
// CHECK-NEXT: return %0
%0 = stablehlo.compare GT, %arg0, %arg1 : (tensor<f32>, tensor<f32>) -> tensor<i1>
return %0 : tensor<i1>
}

// CHECK-LABEL: func @custom_call_partial_reduce
func.func @custom_call_partial_reduce(%arg0: tensor<16x4xf32>, %arg1: tensor<16x4xf32>, %arg2: tensor<f32>, %arg3: tensor<i32>) -> (tensor<16x2xf32>, tensor<16x2xf32>) {
%0:2 = stablehlo.custom_call @PartialReduce(%arg0, %arg1, %arg2, %arg3) {
mhlo.backend_config = {
aggregate_to_topk = true,
recall_target = 0.9 : f32,
reduction_dim = 1 : i64,
reduction_input_size_override = -1 : i64,
top_k = 2 : i64},
called_computations = [@top_k_gt_f32_comparator]} :
(tensor<16x4xf32>, tensor<16x4xf32>, tensor<f32>, tensor<i32>) -> (tensor<16x2xf32>, tensor<16x2xf32>)
return %0#0, %0#1 : tensor<16x2xf32>, tensor<16x2xf32>
}
Loading
Loading