Skip to content

Commit 86c07af

Browse files
matthiaskrammcopybara-github
authored andcommitted
hlo_proto_to_memory_visualization_utils: Rely on buffer_assignment to compute unpadded sizes and indefinite allocations.
PiperOrigin-RevId: 896635419
1 parent e0617f3 commit 86c07af

File tree

3 files changed

+51
-25
lines changed

3 files changed

+51
-25
lines changed

xprof/convert/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,7 @@ cc_library(
15941594
"@com_google_absl//absl/container:flat_hash_map",
15951595
"@com_google_absl//absl/container:flat_hash_set",
15961596
"@com_google_absl//absl/log",
1597+
"@com_google_absl//absl/log:check",
15971598
"@com_google_absl//absl/status",
15981599
"@com_google_absl//absl/status:statusor",
15991600
"@com_google_absl//absl/strings",

xprof/convert/hlo_proto_to_memory_visualization_utils.cc

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ limitations under the License.
3030

3131
#include "absl/container/flat_hash_map.h"
3232
#include "absl/container/flat_hash_set.h"
33+
#include "absl/log/check.h"
3334
#include "absl/log/log.h"
3435
#include "absl/status/status.h"
3536
#include "absl/status/statusor.h"
@@ -95,18 +96,6 @@ std::string ShapeDescription(const Shape& shape) {
9596
return ShapeUtil::HumanStringWithLayout(shape);
9697
}
9798

98-
// A wrapper around ShapeUtil::ByteSizeOf that clears out the layout/padding,
99-
// since that is considered in the ByteSizeOf calculation.
100-
int64_t ShapeUnpaddedSize(Shape shape) {
101-
// Ensure the layout has no padding by making it the default layout.
102-
LayoutUtil::SetToDefaultLayout(&shape);
103-
// Note: we make a simplifying assumption here that a "minimal" size for a
104-
// tuple member would be the size of a `void*` -- there may be even fancier
105-
// ways of doing things, but this should give a good enough approximation of
106-
// what a minimal tuple size is.
107-
return ShapeUtil::ByteSizeOf(shape, /*pointer_size=*/sizeof(void*));
108-
}
109-
11099
class BufferAllocationStruct {
111100
public:
112101
explicit BufferAllocationStruct(const BufferAllocationProto& proto)
@@ -157,19 +146,21 @@ class BufferAllocationStruct {
157146
struct LogicalBufferStruct {
158147
LogicalBufferStruct(const LogicalBufferProto& p,
159148
const BufferAllocationStruct& b,
160-
const ::xla::HloInstructionProto& i, uint64_t offset)
149+
const ::xla::HloInstructionProto& i, uint64_t offset,
150+
int64_t unpadded_size)
161151
: proto(p),
162152
buffer_allocation(b),
163153
hlo_instruction(i),
164154
offset(offset),
165155
shape(ResolveShapeIndex(hlo_instruction.shape(),
166-
proto.defined_at().shape_index())) {}
156+
proto.defined_at().shape_index())),
157+
unpadded_size_(unpadded_size) {}
167158

168159
absl::string_view instruction_name() const { return hlo_instruction.name(); }
169160

170161
int64_t color() const { return proto.color(); }
171162
size_t size() const { return proto.size(); }
172-
size_t unpadded_size() const { return ShapeUnpaddedSize(shape); }
163+
size_t unpadded_size() const { return unpadded_size_; }
173164

174165
// reference counting related
175166
int64_t inc() {
@@ -217,6 +208,7 @@ struct LogicalBufferStruct {
217208
xla::Shape shape;
218209
int64_t ref_count = 0;
219210
LogicalBufferStruct* canonical_buffer = nullptr;
211+
int64_t unpadded_size_;
220212
};
221213

222214
// A wrapper of HLO BufferAssignment, with lookup maps for logical buffers and
@@ -312,6 +304,11 @@ class HloProtoBufferWrapper {
312304
id_to_logical_buffer_proto[logical_buffer.id()] = &logical_buffer;
313305
}
314306

307+
absl::StatusOr<absl::flat_hash_map<int64_t, int64_t>>
308+
logical_buffer_unpadded_sizes = ComputeLogicalBufferUnpaddedSizes(
309+
hlo_proto_.hlo_module(), hlo_proto_.buffer_assignment());
310+
CHECK_OK(logical_buffer_unpadded_sizes);
311+
315312
for (const auto& buffer_allocation :
316313
hlo_proto_.buffer_assignment().buffer_allocations()) {
317314
auto& buffer_allocation_s =
@@ -333,7 +330,8 @@ class HloProtoBufferWrapper {
333330
const auto* instruction = unique_id_to_hlo.at(inst_id);
334331
id_to_logical_buffer_[id] = std::make_unique<LogicalBufferStruct>(
335332
*logical_buffer, *buffer_allocation_s, *instruction,
336-
assigned.offset());
333+
assigned.offset(),
334+
logical_buffer_unpadded_sizes->at(logical_buffer->id()));
337335
}
338336
}
339337

@@ -514,7 +512,6 @@ void NoteSpecialAllocations(const HloProtoBufferWrapper& wrapper,
514512
int64_t entry_parameters_bytes = 0;
515513
int64_t non_reusable_bytes = 0;
516514
int64_t maybe_live_out_bytes = 0;
517-
int64_t indefinite_buffer_allocation_bytes = 0;
518515
for (const auto* buffer_allocation_struct :
519516
wrapper.GetBufferAllocations(memory_color)) {
520517
const auto& buffer_allocation = buffer_allocation_struct->proto();
@@ -533,7 +530,6 @@ void NoteSpecialAllocations(const HloProtoBufferWrapper& wrapper,
533530
maybe_live_out_bytes += buffer_allocation.size();
534531
}
535532
if (buffer_allocation_struct->IsIndefinite()) {
536-
indefinite_buffer_allocation_bytes += buffer_allocation.size();
537533
Convert(buffer_allocation, wrapper, result->add_indefinite_lifetimes());
538534
}
539535
}
@@ -546,7 +542,8 @@ void NoteSpecialAllocations(const HloProtoBufferWrapper& wrapper,
546542
BytesToMiB(xla::ComputeTotalAllocationBytes(
547543
wrapper.GetHloProto().buffer_assignment(), memory_color)));
548544
result->set_indefinite_buffer_allocation_mib(
549-
BytesToMiB(indefinite_buffer_allocation_bytes));
545+
BytesToMiB(xla::ComputeIndefiniteAllocationsInBytes(
546+
wrapper.GetHloProto().buffer_assignment(), memory_color)));
550547
}
551548

552549
// Memory usage statistics collected from heap simulator trace.

xprof/convert/hlo_proto_to_memory_visualization_utils_test.cc

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ namespace tensorflow {
2828
namespace profiler {
2929
namespace {
3030

31-
// 1 buffer allocation of 1MB
32-
// 2 logical buffers, each is 0.5MB
31+
// 2 buffer allocations of 1MB, one of which is indefinite (constant).
32+
// 3 logical buffers
3333
static constexpr char kHLOBase[] = R"pb(
3434
hlo_module {
3535
name: "test_module"
@@ -46,6 +46,11 @@ static constexpr char kHLOBase[] = R"pb(
4646
id: 1
4747
shape { tuple_shapes { element_type: U64 } }
4848
}
49+
instructions {
50+
name: "constant.1"
51+
id: 2
52+
shape { tuple_shapes { element_type: U64 } }
53+
}
4954
}
5055
}
5156
buffer_assignment {
@@ -56,6 +61,13 @@ static constexpr char kHLOBase[] = R"pb(
5661
assigned { logical_buffer_id: 1 offset: 0 size: 524288 }
5762
assigned { logical_buffer_id: 2 offset: 524288 size: 524288 }
5863
}
64+
buffer_allocations {
65+
index: 1
66+
size: 1048576
67+
color: 0
68+
is_constant: true
69+
assigned { logical_buffer_id: 3 offset: 0 size: 1048576 }
70+
}
5971
logical_buffers {
6072
id: 1
6173
size: 524288
@@ -68,6 +80,12 @@ static constexpr char kHLOBase[] = R"pb(
6880
color: 0
6981
defined_at { instruction_id: 1 shape_index: 0 }
7082
}
83+
logical_buffers {
84+
id: 3
85+
size: 1048576
86+
color: 0
87+
defined_at { instruction_id: 2 shape_index: 0 }
88+
}
7189
heap_simulator_traces { %s }
7290
}
7391
)pb";
@@ -86,8 +104,13 @@ TEST(MemoryViewerTest, TestHeapSimulatorTraceShareWith_1) {
86104
TF_ASSERT_OK_AND_ASSIGN(
87105
PreprocessResult preprocess_result,
88106
ConvertHloProtoToPreprocessResult(hlo_proto, {.small_buffer_size = 0}));
89-
EXPECT_EQ(preprocess_result.peak_heap_mib(), 0.5);
90-
EXPECT_EQ(preprocess_result.total_buffer_allocation_mib(), 1);
107+
EXPECT_EQ(preprocess_result.peak_heap_mib(), 1.5);
108+
// [Peak unpadded heap] = [peak of unpadded heap-simulated buffer sizes] +
109+
// [padded size of indefinite buffers]. In this case, the computations are on
110+
// a single U64 (8 bytes), and we have 1MB of indefinite buffers.
111+
EXPECT_EQ(preprocess_result.peak_unpadded_heap_mib(), 8.0 / (1 << 20) + 1);
112+
EXPECT_EQ(preprocess_result.total_buffer_allocation_mib(), 2);
113+
EXPECT_EQ(preprocess_result.indefinite_buffer_allocation_mib(), 1);
91114
}
92115

93116
TEST(MemoryViewerTest, TestHeapSimulatorTraceShareWith_2) {
@@ -105,8 +128,13 @@ TEST(MemoryViewerTest, TestHeapSimulatorTraceShareWith_2) {
105128
ASSERT_TRUE(ParseTextFormatFromString(hlo_string, &hlo_proto).ok());
106129
TF_ASSERT_OK_AND_ASSIGN(PreprocessResult preprocess_result,
107130
ConvertHloProtoToPreprocessResult(hlo_proto, option));
108-
EXPECT_EQ(preprocess_result.peak_heap_mib(), 0.5);
109-
EXPECT_EQ(preprocess_result.total_buffer_allocation_mib(), 1);
131+
EXPECT_EQ(preprocess_result.peak_heap_mib(), 1.5);
132+
// [Peak unpadded heap] = [peak of unpadded heap-simulated buffer sizes] +
133+
// [padded size of indefinite buffers]. In this case, the computations are on
134+
// a single U64 (8 bytes), and we have 1MB of indefinite buffers.
135+
EXPECT_EQ(preprocess_result.peak_unpadded_heap_mib(), 8.0 / (1 << 20) + 1);
136+
EXPECT_EQ(preprocess_result.total_buffer_allocation_mib(), 2);
137+
EXPECT_EQ(preprocess_result.indefinite_buffer_allocation_mib(), 1);
110138
EXPECT_FALSE(preprocess_result.allocation_timeline().empty());
111139
}
112140

0 commit comments

Comments
 (0)