Skip to content

Commit d6676da

Browse files
pytorchbotlucylq
authored andcommitted
[executorch][flat_tensor] flat tensor header (#7869)
Pull Request resolved: #7764 Parse a FlatTensor header. The FlatTensor header contains: - flatbuffer offset - flatbuffer size - segment base offset - segment data size (total size of all segments) Mostly taken from extended_header.cpp/.h ghstack-source-id: 262588577 Differential Revision: [D67064570](https://our.internmc.facebook.com/intern/diff/D67064570/) Co-authored-by: lucylq <[email protected]>
1 parent 33f6a08 commit d6676da

File tree

8 files changed

+312
-0
lines changed

8 files changed

+312
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>
10+
11+
#include <cinttypes>
12+
#include <cstring>
13+
14+
#include <executorch/runtime/core/error.h>
15+
#include <executorch/runtime/core/result.h>
16+
17+
#pragma clang diagnostic ignored "-Wdeprecated"
18+
19+
namespace executorch {
20+
using runtime::Error;
21+
using runtime::Result;
22+
namespace extension {
23+
namespace {
24+
25+
/// The expected location of the header length field relative to the beginning
26+
/// of the header.
27+
static constexpr size_t kHeaderLengthOffset = FlatTensorHeader::kMagicSize;
28+
29+
/// The expected location of the flatbuffer_offset field relative to the
30+
/// beginning of the header.
31+
static constexpr size_t kHeaderFlatbufferStartOffset =
32+
kHeaderLengthOffset + sizeof(uint32_t);
33+
34+
/// The expected location of the flatbuffer_size field relative to the beginning
35+
/// of the header.
36+
static constexpr size_t kHeaderFlatbufferSizeOffset =
37+
kHeaderFlatbufferStartOffset + sizeof(uint64_t);
38+
39+
/// The expected location of the segment_base_offset field relative to the
40+
/// beginning of the header.
41+
static constexpr size_t kHeaderSegmentBaseOffsetOffset =
42+
kHeaderFlatbufferSizeOffset + sizeof(uint64_t);
43+
44+
/// The expected location of the segment_data_size field relative to the
45+
/// beginning of the header.
46+
static constexpr size_t kHeaderSegmentDataSizeOffset =
47+
kHeaderSegmentBaseOffsetOffset + sizeof(uint64_t);
48+
49+
/**
50+
* The size of the header that covers the fields known of by this version of
51+
* the code. It's ok for a header to be larger as long as the fields stay in
52+
* the same place, but this code will ignore any new fields.
53+
*/
54+
static constexpr size_t kMinimumHeaderLength =
55+
kHeaderSegmentDataSizeOffset + sizeof(uint64_t);
56+
57+
/// Interprets the 4 bytes at `data` as a little-endian uint32_t.
58+
uint32_t GetUInt32LE(const uint8_t* data) {
59+
return (uint32_t)data[0] | ((uint32_t)data[1] << 8) |
60+
((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24);
61+
}
62+
63+
/// Interprets the 8 bytes at `data` as a little-endian uint64_t.
64+
uint64_t GetUInt64LE(const uint8_t* data) {
65+
return (uint64_t)data[0] | ((uint64_t)data[1] << 8) |
66+
((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24) |
67+
((uint64_t)data[4] << 32) | ((uint64_t)data[5] << 40) |
68+
((uint64_t)data[6] << 48) | ((uint64_t)data[7] << 56);
69+
}
70+
71+
} // namespace
72+
73+
/* static */ Result<FlatTensorHeader> FlatTensorHeader::Parse(
74+
const void* data,
75+
size_t size) {
76+
if (size < FlatTensorHeader::kNumHeadBytes) {
77+
return Error::InvalidArgument;
78+
}
79+
const uint8_t* header = reinterpret_cast<const uint8_t*>(data);
80+
81+
// Check magic bytes.
82+
if (std::memcmp(
83+
header, FlatTensorHeader::kMagic, FlatTensorHeader::kMagicSize) !=
84+
0) {
85+
return Error::NotFound;
86+
}
87+
88+
// Check header length.
89+
uint32_t header_length = GetUInt32LE(header + kHeaderLengthOffset);
90+
if (header_length < kMinimumHeaderLength) {
91+
ET_LOG(
92+
Error,
93+
"FlatTensor header length %" PRIu32 " < %zu",
94+
header_length,
95+
kMinimumHeaderLength);
96+
return Error::InvalidExternalData;
97+
}
98+
99+
// The header is present and apparently valid.
100+
return FlatTensorHeader{
101+
/*flatbuffer_offset=*/GetUInt64LE(header + kHeaderFlatbufferStartOffset),
102+
/*flatbuffer_size=*/GetUInt64LE(header + kHeaderFlatbufferSizeOffset),
103+
/*segment_base_offset=*/
104+
GetUInt64LE(header + kHeaderSegmentBaseOffsetOffset),
105+
/*segment_data_size=*/GetUInt64LE(header + kHeaderSegmentDataSizeOffset),
106+
};
107+
}
108+
109+
// Define storage for the static.
110+
// @lint-ignore CLANGTIDY facebook-hte-CArray
111+
constexpr char FlatTensorHeader::kMagic[kMagicSize];
112+
113+
} // namespace extension
114+
} // namespace executorch
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <executorch/runtime/core/result.h>
12+
13+
namespace executorch {
14+
namespace extension {
15+
16+
/**
17+
* A FlatTensor header found at the beginning of a flat_tensor-serialized blob.
18+
*/
19+
struct FlatTensorHeader {
20+
/**
21+
* To find the header, callers should provide at least this many bytes of the
22+
* head of the serialized FlatTensor data.
23+
*/
24+
static constexpr size_t kNumHeadBytes = 64;
25+
26+
/**
27+
* The magic bytes that identify the header. This should be in sync with
28+
* the magic in executorch/extension/flat_tensor/serialize/serialize.py
29+
*
30+
* This is the canonical definition of the expected value. If the header
31+
* layout ever changes in a compatibility-breaking way, increment the digits
32+
* in the magic. But, doing so will prevent older binaries from recognizing
33+
* the presence of the header. The compatibility-preserving way to make
34+
* changes is to increase the header's length field and add new fields at the
35+
* end.
36+
*/
37+
static constexpr size_t kMagicSize = 4;
38+
// @lint-ignore CLANGTIDY facebook-hte-CArray
39+
static constexpr char kMagic[kMagicSize] = {'F', 'H', '0', '1'};
40+
41+
/**
42+
* Look for and parse a FlatTensorHeader in the provided data.
43+
*
44+
* @param[in] data The contents of the beginning of the serialized binary
45+
* FlatTensor data, starting at offset 0 (i.e., the head of the file).
46+
* @param[in] size Length of `data` in bytes. Must be >= kNumHeadBytes or this
47+
* call will fail.
48+
*
49+
* @returns a FlatTensorHeader if the header was found and is valid. Returns
50+
* an error if size was too short, if the header was not found, or if the
51+
* header appeared to be corrupt.
52+
*/
53+
static runtime::Result<FlatTensorHeader> Parse(const void* data, size_t size);
54+
55+
/// Offset of the FlatTensor flatbuffer in the serialized binary.
56+
uint64_t flatbuffer_offset;
57+
58+
/// Size of the flatbuffer in bytes.
59+
uint64_t flatbuffer_size;
60+
61+
/// The offset in bytes of the first segment.
62+
uint64_t segment_base_offset;
63+
64+
/// Size of all the segment data, in bytes.
65+
uint64_t segment_data_size;
66+
};
67+
68+
} // namespace extension
69+
} // namespace executorch

extension/flat_tensor/serialize/serialize.py

+2
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ class FlatTensorConfig:
9393
class FlatTensorHeader:
9494
# Class constants.
9595
# The magic bytes that should be at the beginning of the header.
96+
# This should be in sync with the magic in
97+
# executorch/extension/flat_tensor/serialize/flat_tensor_header.h
9698
EXPECTED_MAGIC: ClassVar[bytes] = b"FH01"
9799
EXPECTED_LENGTH: ClassVar[int] = (
98100
# Header magic

extension/flat_tensor/serialize/targets.bzl

+8
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,11 @@ def define_common_targets():
3434
},
3535
exported_external_deps = ["flatbuffers-api"],
3636
)
37+
38+
runtime.cxx_library(
39+
name = "flat_tensor_header",
40+
srcs = ["flat_tensor_header.cpp"],
41+
exported_headers = ["flat_tensor_header.h"],
42+
visibility = ["//executorch/..."],
43+
exported_deps = ["//executorch/runtime/core:core"],
44+
)

extension/flat_tensor/test/TARGETS

+6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
1+
# Any targets that should be shared between fbcode and xplat must be defined in
2+
# targets.bzl. This file can contain fbcode-only targets.
3+
14
load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
5+
load(":targets.bzl", "define_common_targets")
26

37
oncall("executorch")
48

9+
define_common_targets()
10+
511
python_unittest(
612
name = "serialize",
713
srcs = [
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>
10+
11+
#include <gtest/gtest.h>
12+
13+
#include <executorch/runtime/core/result.h>
14+
#include <executorch/runtime/platform/runtime.h>
15+
16+
using namespace ::testing;
17+
using executorch::extension::FlatTensorHeader;
18+
using executorch::runtime::Error;
19+
using executorch::runtime::Result;
20+
21+
class FlatTensorHeaderTest : public ::testing::Test {
22+
protected:
23+
void SetUp() override {
24+
// Since these tests cause ET_LOG to be called, the PAL must be initialized
25+
// first.
26+
executorch::runtime::runtime_init();
27+
}
28+
};
29+
30+
/**
31+
* An example, valid flat_tensor header.
32+
*
33+
* This data is intentionally fragile. If the header layout or magic changes,
34+
* this test data must change too. The layout of the header is a contract, not
35+
* an implementation detail.
36+
*/
37+
// clang-format off
38+
// @lint-ignore CLANGTIDY facebook-hte-CArray
39+
constexpr char kExampleHeaderData[] = {
40+
// Magic bytes
41+
'F', 'H', '0', '1',
42+
// uint32_t header size (little endian)
43+
0x28, 0x00, 0x00, 0x00,
44+
// uint64_t flatbuffer_offset
45+
0x71, 0x61, 0x51, 0x41, 0x31, 0x21, 0x11, 0x01,
46+
// uint64_t flatbuffer_size
47+
0x72, 0x62, 0x52, 0x42, 0x32, 0x22, 0x12, 0x02,
48+
// uint64_t segment_base_offset
49+
0x73, 0x63, 0x53, 0x43, 0x33, 0x23, 0x13, 0x03,
50+
// uint64_t segment_data_size
51+
0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04,
52+
};
53+
54+
constexpr uint64_t kExampleFlatbufferOffset = 0x0111213141516171;
55+
constexpr uint64_t kExampleFlatbufferSize = 0x0212223242526272;
56+
constexpr uint64_t kExampleSegmentBaseOffset = 0x0313233343536373;
57+
constexpr uint64_t kExampleSegmentDataSize = 0x0414243444546474;
58+
59+
/**
60+
* Returns fake serialized FlatTensor data that contains kExampleHeaderData at
61+
* the expected offset.
62+
*/
63+
std::vector<uint8_t> CreateExampleFlatTensorHeader() {
64+
// Allocate memory representing the FlatTensor header.
65+
std::vector<uint8_t> ret(FlatTensorHeader::kNumHeadBytes);
66+
// Write non-zeros into it to make it more obvious if we read outside the
67+
// header.
68+
memset(ret.data(), 0x55, ret.size());
69+
// Copy the example header into the right offset.
70+
memcpy(
71+
ret.data(),
72+
kExampleHeaderData,
73+
sizeof(kExampleHeaderData));
74+
return ret;
75+
}
76+
77+
TEST_F(FlatTensorHeaderTest, ValidHeaderParsesCorrectly) {
78+
std::vector<uint8_t> flat_tensor = CreateExampleFlatTensorHeader();
79+
80+
Result<FlatTensorHeader> header = FlatTensorHeader::Parse(flat_tensor.data(), flat_tensor.size());
81+
82+
// The header should be present.
83+
ASSERT_EQ(header.error(), Error::Ok);
84+
85+
// Since each byte of these fields is unique, success demonstrates that the
86+
// endian-to-int conversion is correct and looks at the expected bytes of the
87+
// header.
88+
EXPECT_EQ(header->flatbuffer_offset, kExampleFlatbufferOffset);
89+
EXPECT_EQ(header->flatbuffer_size, kExampleFlatbufferSize);
90+
EXPECT_EQ(header->segment_base_offset, kExampleSegmentBaseOffset);
91+
EXPECT_EQ(header->segment_data_size, kExampleSegmentDataSize);
92+
}
+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
def define_common_targets():
4+
"""Defines targets that should be shared between fbcode and xplat.
5+
6+
The directory containing this targets.bzl file should also contain both
7+
TARGETS and BUCK files that call this function.
8+
"""
9+
10+
runtime.cxx_test(
11+
name = "flat_tensor_header_test",
12+
srcs = [
13+
"flat_tensor_header_test.cpp",
14+
],
15+
deps = [
16+
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
17+
],
18+
)

runtime/core/error.h

+3
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ enum class Error : error_code_t {
7979
/// Error caused by the contents of a program.
8080
InvalidProgram = 0x23,
8181

82+
/// Error caused by the contents of external data.
83+
InvalidExternalData = 0x24,
84+
8285
/*
8386
* Delegate errors.
8487
*/

0 commit comments

Comments
 (0)