Skip to content

Commit 27f0295

Browse files
ezhulenevGoogle-ML-Automation
authored andcommitted
PR #40917: [xla:gpu] Print HLO/Executable fingerprints when loading from AOT result
Imported from GitHub PR #40917 This helps with debugging non-determinism at scale by making sure we actually load and execute the same thing! Copybara import of the project: -- fdcddf6 by Eugene Zhulenev <ezhulenev@openxla.org>: [xla:gpu] Print HLO/Executable fingerprints when loading from AOT result Merging this change closes #40917 FUTURE_COPYBARA_INTEGRATE_REVIEW=#40917 from ezhulenev:fingerpint-aot-result-when-load fdcddf6 PiperOrigin-RevId: 900705039
1 parent bf6c157 commit 27f0295

File tree

4 files changed

+78
-34
lines changed

4 files changed

+78
-34
lines changed

xla/service/gpu/BUILD

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2186,6 +2186,8 @@ cc_library(
21862186
":gpu_executable",
21872187
":gpu_executable_proto_cc",
21882188
"//xla:debug_options_flags",
2189+
"//xla:printer",
2190+
"//xla:xla_proto_cc",
21892191
"//xla/hlo/ir:hlo",
21902192
"//xla/pjrt:compiled_memory_stats",
21912193
"//xla/service:buffer_assignment",
@@ -2195,20 +2197,22 @@ cc_library(
21952197
"//xla/stream_executor:kernel_symbol_registry",
21962198
"//xla/stream_executor:platform",
21972199
"//xla/stream_executor/abi:executable_abi_version",
2198-
"//xla/tsl/platform:errors",
2200+
"//xla/tsl/lib/strings:proto_serialization",
2201+
"//xla/tsl/platform:logging",
21992202
"//xla/tsl/platform:status_macros",
2200-
"//xla/tsl/platform:statusor",
22012203
"//xla/util/split_proto:split_gpu_executable_writer",
22022204
"//xla/util/split_proto:split_proto_reader",
22032205
"@com_google_absl//absl/base:nullability",
22042206
"@com_google_absl//absl/functional:overload",
22052207
"@com_google_absl//absl/memory",
22062208
"@com_google_absl//absl/status",
22072209
"@com_google_absl//absl/status:statusor",
2210+
"@com_google_absl//absl/strings:str_format",
22082211
"@com_google_absl//absl/strings:string_view",
22092212
"@com_google_protobuf//:arena",
22102213
"@riegeli//riegeli/bytes:reader",
22112214
"@riegeli//riegeli/bytes:string_writer",
2215+
"@tsl//tsl/platform:fingerprint",
22122216
],
22132217
)
22142218

xla/service/gpu/gpu_aot_compilation_result.cc

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,34 +24,55 @@ limitations under the License.
2424
#include "absl/functional/overload.h"
2525
#include "absl/memory/memory.h"
2626
#include "absl/status/statusor.h"
27+
#include "absl/strings/str_format.h"
2728
#include "absl/strings/string_view.h"
2829
#include "xla/tsl/platform/status_macros.h"
2930
#include "google/protobuf/arena.h"
3031
#include "riegeli/bytes/string_writer.h"
3132
#include "xla/debug_options_flags.h"
3233
#include "xla/hlo/ir/hlo_module.h"
34+
#include "xla/hlo/ir/hlo_print_options.h"
3335
#include "xla/pjrt/compiled_memory_stats.h"
36+
#include "xla/printer.h"
3437
#include "xla/service/buffer_assignment.h"
3538
#include "xla/service/executable.h"
3639
#include "xla/service/gpu/gpu_executable.h"
3740
#include "xla/service/gpu/gpu_executable.pb.h"
3841
#include "xla/stream_executor/device_description.h"
3942
#include "xla/stream_executor/kernel_symbol_registry.h"
4043
#include "xla/stream_executor/platform.h"
41-
#include "xla/tsl/platform/errors.h"
42-
#include "xla/tsl/platform/statusor.h"
44+
#include "xla/tsl/lib/strings/proto_serialization.h"
45+
#include "xla/tsl/platform/logging.h"
4346
#include "xla/util/split_proto/split_gpu_executable_writer.h"
4447
#include "xla/util/split_proto/split_proto_reader.h"
48+
#include "xla/xla.pb.h"
49+
#include "tsl/platform/fingerprint.h"
4550

4651
namespace xla::gpu {
4752

53+
static absl::StatusOr<std::pair<std::unique_ptr<HloModule>, tsl::Fprint128>>
54+
ParseHloModuleAndFingerprint(const HloModuleProtoWithConfig& proto) {
55+
ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
56+
HloModule::CreateFromProtoWithConfig(proto));
57+
HighwayHashPrinter printer;
58+
module->Print(&printer, HloPrintOptions::Canonical()
59+
.set_print_backend_config(true)
60+
.set_sort_backend_config(true));
61+
return std::make_pair(std::move(module), printer.ToFingerprint128());
62+
}
63+
4864
absl::StatusOr<std::unique_ptr<GpuAotCompilationResult>>
4965
GpuAotCompilationResult::FromProto(GpuExecutableProto executable_proto) {
50-
TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
51-
HloModule::CreateFromProtoWithConfig(
52-
executable_proto.hlo_module_with_config()));
66+
tsl::Fprint128 executable_fingerprint = {
67+
tsl::DeterministicProtoHash64(executable_proto),
68+
tsl::DeterministicProtoHash64(executable_proto, /*seed=*/1)};
69+
ASSIGN_OR_RETURN(
70+
auto module_and_fingerprint,
71+
ParseHloModuleAndFingerprint(executable_proto.hlo_module_with_config()));
72+
auto& [module, hlo_fingerprint] = module_and_fingerprint;
5373
return absl::WrapUnique(new GpuAotCompilationResult(
54-
std::move(executable_proto), std::move(module)));
74+
std::move(executable_proto), std::move(module), hlo_fingerprint,
75+
executable_fingerprint));
5576
}
5677

5778
absl::StatusOr<std::unique_ptr<GpuAotCompilationResult>>
@@ -61,20 +82,24 @@ GpuAotCompilationResult::FromSerialized(
6182
GpuExecutableProto* executable_proto =
6283
google::protobuf::Arena::Create<GpuExecutableProto>(arena.get());
6384

64-
TF_RETURN_IF_ERROR(ReadSplitProto(std::move(reader), *executable_proto));
85+
RETURN_IF_ERROR(ReadSplitProto(std::move(reader), *executable_proto));
6586

66-
TF_ASSIGN_OR_RETURN(std::unique_ptr<HloModule> module,
67-
HloModule::CreateFromProtoWithConfig(
68-
executable_proto->hlo_module_with_config()));
69-
return absl::WrapUnique(
70-
new GpuAotCompilationResult(internal::ArenaAllocatedGpuExecutableProto(
71-
std::move(arena), executable_proto),
72-
std::move(module)));
87+
tsl::Fprint128 executable_fingerprint = {
88+
tsl::DeterministicProtoHash64(*executable_proto),
89+
tsl::DeterministicProtoHash64(*executable_proto, /*seed=*/1)};
90+
ASSIGN_OR_RETURN(
91+
auto module_and_fingerprint,
92+
ParseHloModuleAndFingerprint(executable_proto->hlo_module_with_config()));
93+
auto& [module, hlo_fingerprint] = module_and_fingerprint;
94+
return absl::WrapUnique(new GpuAotCompilationResult(
95+
internal::ArenaAllocatedGpuExecutableProto(std::move(arena),
96+
executable_proto),
97+
std::move(module), hlo_fingerprint, executable_fingerprint));
7398
}
7499

75100
absl::StatusOr<std::string> GpuAotCompilationResult::SerializeAsString() const {
76101
std::string serialized;
77-
TF_RETURN_IF_ERROR(WriteSplitGpuExecutable(
102+
RETURN_IF_ERROR(WriteSplitGpuExecutable(
78103
GetExecutableProto(),
79104
std::make_unique<riegeli::StringWriter<>>(&serialized)));
80105
return serialized;
@@ -89,6 +114,15 @@ GpuAotCompilationResult::LoadExecutable(
89114
stream_executor::KernelSymbolRegistry::GetGlobalInstance();
90115
return registry.FindSymbol(symbol_name, platform_id);
91116
};
117+
118+
VLOG(1) << absl::StrFormat(
119+
"GpuAotCompilationResult::LoadExecutable: module=%s "
120+
"num_instructions=%d hlo_fingerprint=%016x%016x "
121+
"executable_fingerprint=%016x%016x",
122+
hlo_module_->name(), hlo_module_->instruction_count(),
123+
hlo_fingerprint_.low64, hlo_fingerprint_.high64,
124+
executable_fingerprint_.low64, executable_fingerprint_.high64);
125+
92126
return GpuExecutable::FromProto(GetExecutableProto(), device_description,
93127
platform_id->ToName(),
94128
GetDebugOptionsFromFlags(), symbol_resolver);

xla/service/gpu/gpu_aot_compilation_result.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ limitations under the License.
3434
#include "xla/stream_executor/abi/executable_abi_version.h"
3535
#include "xla/stream_executor/device_description.h"
3636
#include "xla/stream_executor/platform.h"
37+
#include "tsl/platform/fingerprint.h"
3738

3839
namespace xla::gpu {
3940

@@ -98,13 +99,18 @@ class GpuAotCompilationResult : public CompiledModule {
9899
std::variant<internal::ArenaAllocatedGpuExecutableProto,
99100
GpuExecutableProto>
100101
gpu_executable_proto,
101-
std::shared_ptr<HloModule> hlo_module)
102+
std::shared_ptr<HloModule> hlo_module, tsl::Fprint128 hlo_fingerprint,
103+
tsl::Fprint128 executable_fingerprint)
102104
: gpu_executable_proto_(std::move(gpu_executable_proto)),
103-
hlo_module_(std::move(hlo_module)) {}
105+
hlo_module_(std::move(hlo_module)),
106+
hlo_fingerprint_(hlo_fingerprint),
107+
executable_fingerprint_(executable_fingerprint) {}
104108

105109
std::variant<internal::ArenaAllocatedGpuExecutableProto, GpuExecutableProto>
106110
gpu_executable_proto_;
107111
std::shared_ptr<HloModule> hlo_module_;
112+
tsl::Fprint128 hlo_fingerprint_;
113+
tsl::Fprint128 executable_fingerprint_;
108114
};
109115

110116
} // namespace xla::gpu

xla/service/gpu/gpu_aot_compilation_result_test.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -188,15 +188,15 @@ class GpuAotCompilationResultTest : public ::testing::Test {
188188
};
189189

190190
TEST_F(GpuAotCompilationResultTest, CreateAndSerialize) {
191-
TF_ASSERT_OK_AND_ASSIGN(GpuExecutableProto reference_executable,
192-
CreateGpuExecutableProto());
191+
ASSERT_OK_AND_ASSIGN(GpuExecutableProto reference_executable,
192+
CreateGpuExecutableProto());
193193

194-
TF_ASSERT_OK_AND_ASSIGN(
194+
ASSERT_OK_AND_ASSIGN(
195195
std::unique_ptr<GpuAotCompilationResult> result,
196196
GpuAotCompilationResult::FromProto(reference_executable));
197197

198-
TF_ASSERT_OK_AND_ASSIGN(std::string serialized_result,
199-
result->SerializeAsString());
198+
ASSERT_OK_AND_ASSIGN(std::string serialized_result,
199+
result->SerializeAsString());
200200

201201
GpuExecutableProto deserialized_executable;
202202
ASSERT_OK(ReadSplitProto(
@@ -214,14 +214,14 @@ TEST_F(GpuAotCompilationResultTest, CreateAndSerialize) {
214214
}
215215

216216
TEST_F(GpuAotCompilationResultTest, LoadExecutable) {
217-
TF_ASSERT_OK_AND_ASSIGN(GpuExecutableProto reference_executable,
218-
CreateGpuExecutableProto());
219-
TF_ASSERT_OK_AND_ASSIGN(
217+
ASSERT_OK_AND_ASSIGN(GpuExecutableProto reference_executable,
218+
CreateGpuExecutableProto());
219+
ASSERT_OK_AND_ASSIGN(
220220
std::unique_ptr<GpuAotCompilationResult> result,
221221
GpuAotCompilationResult::FromProto(reference_executable));
222222

223223
{
224-
TF_ASSERT_OK_AND_ASSIGN(
224+
ASSERT_OK_AND_ASSIGN(
225225
stream_executor::ExecutableAbiVersion executable_abi_version,
226226
result->GetExecutableAbiVersion());
227227
EXPECT_EQ(executable_abi_version.platform_name(), "CUDA");
@@ -233,12 +233,12 @@ TEST_F(GpuAotCompilationResultTest, LoadExecutable) {
233233

234234
EnsureCudaSymbolIsRegistered();
235235

236-
TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Executable> executable,
237-
std::move(*result).LoadExecutable(
238-
platform_.id(), GetDeviceDescription()));
236+
ASSERT_OK_AND_ASSIGN(std::unique_ptr<Executable> executable,
237+
std::move(*result).LoadExecutable(
238+
platform_.id(), GetDeviceDescription()));
239239

240240
{
241-
TF_ASSERT_OK_AND_ASSIGN(
241+
ASSERT_OK_AND_ASSIGN(
242242
stream_executor::ExecutableAbiVersion executable_abi_version,
243243
executable->GetExecutableAbiVersion());
244244
EXPECT_EQ(executable_abi_version.platform_name(), "CUDA");
@@ -251,8 +251,8 @@ TEST_F(GpuAotCompilationResultTest, LoadExecutable) {
251251
auto* gpu_executable = dynamic_cast<GpuExecutable*>(executable.get());
252252
ASSERT_NE(gpu_executable, nullptr) << "Executable is not a GpuExecutable.";
253253

254-
TF_ASSERT_OK_AND_ASSIGN(GpuExecutableProto executable_proto,
255-
gpu_executable->ToProto());
254+
ASSERT_OK_AND_ASSIGN(GpuExecutableProto executable_proto,
255+
gpu_executable->ToProto());
256256
// HLO module is re-created from proto, and will have a new ID, so we clear
257257
// it for comparison purposes.
258258
executable_proto.mutable_hlo_module_with_config()

0 commit comments

Comments
 (0)