Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[executorch][flat_tensor] flat tensor header #7764

Merged
merged 6 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions extension/flat_tensor/serialize/flat_tensor_header.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>

#include <cinttypes>
#include <cstring>

#include <executorch/runtime/core/error.h>
#include <executorch/runtime/core/result.h>

#pragma clang diagnostic ignored "-Wdeprecated"

namespace executorch {
using runtime::Error;
using runtime::Result;
namespace extension {
namespace {

/// The expected location of the header length field relative to the beginning
/// of the header.
static constexpr size_t kHeaderLengthOffset = FlatTensorHeader::kMagicSize;

/// The expected location of the flatbuffer_offset field relative to the
/// beginning of the header.
static constexpr size_t kHeaderFlatbufferStartOffset =
kHeaderLengthOffset + sizeof(uint32_t);

/// The expected location of the flatbuffer_size field relative to the beginning
/// of the header.
static constexpr size_t kHeaderFlatbufferSizeOffset =
kHeaderFlatbufferStartOffset + sizeof(uint64_t);

/// The expected location of the segment_base_offset field relative to the
/// beginning of the header.
static constexpr size_t kHeaderSegmentBaseOffsetOffset =
kHeaderFlatbufferSizeOffset + sizeof(uint64_t);

/// The expected location of the segment_data_size field relative to the
/// beginning of the header.
static constexpr size_t kHeaderSegmentDataSizeOffset =
kHeaderSegmentBaseOffsetOffset + sizeof(uint64_t);

/**
* The size of the header that covers the fields known of by this version of
* the code. It's ok for a header to be larger as long as the fields stay in
* the same place, but this code will ignore any new fields.
*/
static constexpr size_t kMinimumHeaderLength =
kHeaderSegmentDataSizeOffset + sizeof(uint64_t);

/// Interprets the 4 bytes at `data` as a little-endian uint32_t.
uint32_t GetUInt32LE(const uint8_t* data) {
return (uint32_t)data[0] | ((uint32_t)data[1] << 8) |
((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 24);
}

/// Interprets the 8 bytes at `data` as a little-endian uint64_t.
uint64_t GetUInt64LE(const uint8_t* data) {
return (uint64_t)data[0] | ((uint64_t)data[1] << 8) |
((uint64_t)data[2] << 16) | ((uint64_t)data[3] << 24) |
((uint64_t)data[4] << 32) | ((uint64_t)data[5] << 40) |
((uint64_t)data[6] << 48) | ((uint64_t)data[7] << 56);
}

} // namespace

/* static */ Result<FlatTensorHeader> FlatTensorHeader::Parse(
const void* data,
size_t size) {
if (size < FlatTensorHeader::kNumHeadBytes) {
return Error::InvalidArgument;
}
const uint8_t* header = reinterpret_cast<const uint8_t*>(data);

// Check magic bytes.
if (std::memcmp(
header, FlatTensorHeader::kMagic, FlatTensorHeader::kMagicSize) !=
0) {
return Error::NotFound;
}

// Check header length.
uint32_t header_length = GetUInt32LE(header + kHeaderLengthOffset);
if (header_length < kMinimumHeaderLength) {
ET_LOG(
Error,
"FlatTensor header length %" PRIu32 " < %zu",
header_length,
kMinimumHeaderLength);
return Error::InvalidExternalData;
}

// The header is present and apparently valid.
return FlatTensorHeader{
/*flatbuffer_offset=*/GetUInt64LE(header + kHeaderFlatbufferStartOffset),
/*flatbuffer_size=*/GetUInt64LE(header + kHeaderFlatbufferSizeOffset),
/*segment_base_offset=*/
GetUInt64LE(header + kHeaderSegmentBaseOffsetOffset),
/*segment_data_size=*/GetUInt64LE(header + kHeaderSegmentDataSizeOffset),
};
}

// Define storage for the static.
// @lint-ignore CLANGTIDY facebook-hte-CArray
constexpr char FlatTensorHeader::kMagic[kMagicSize];

} // namespace extension
} // namespace executorch
69 changes: 69 additions & 0 deletions extension/flat_tensor/serialize/flat_tensor_header.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/runtime/core/result.h>

namespace executorch {
namespace extension {

/**
* A FlatTensor header found at the beginning of a flat_tensor-serialized blob.
*/
struct FlatTensorHeader {
/**
* To find the header, callers should provide at least this many bytes of the
* head of the serialized FlatTensor data.
*/
static constexpr size_t kNumHeadBytes = 64;

/**
* The magic bytes that identify the header. This should be in sync with
* the magic in executorch/extension/flat_tensor/serialize/serialize.py
*
* This is the canonical definition of the expected value. If the header
* layout ever changes in a compatibility-breaking way, increment the digits
* in the magic. But, doing so will prevent older binaries from recognizing
* the presence of the header. The compatibility-preserving way to make
* changes is to increase the header's length field and add new fields at the
* end.
*/
static constexpr size_t kMagicSize = 4;
// @lint-ignore CLANGTIDY facebook-hte-CArray
static constexpr char kMagic[kMagicSize] = {'F', 'H', '0', '1'};

/**
* Look for and parse a FlatTensorHeader in the provided data.
*
* @param[in] data The contents of the beginning of the serialized binary
* FlatTensor data, starting at offset 0 (i.e., the head of the file).
* @param[in] size Length of `data` in bytes. Must be >= kNumHeadBytes or this
* call will fail.
*
* @returns a FlatTensorHeader if the header was found and is valid. Returns
* an error if size was too short, if the header was not found, or if the
* header appeared to be corrupt.
*/
static runtime::Result<FlatTensorHeader> Parse(const void* data, size_t size);

/// Offset of the FlatTensor flatbuffer in the serialized binary.
uint64_t flatbuffer_offset;

/// Size of the flatbuffer in bytes.
uint64_t flatbuffer_size;

/// The offset in bytes of the first segment.
uint64_t segment_base_offset;

/// Size of all the segment data, in bytes.
uint64_t segment_data_size;
};

} // namespace extension
} // namespace executorch
2 changes: 2 additions & 0 deletions extension/flat_tensor/serialize/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ class FlatTensorConfig:
class FlatTensorHeader:
# Class constants.
# The magic bytes that should be at the beginning of the header.
# This should be in sync with the magic in
# executorch/extension/flat_tensor/serialize/flat_tensor_header.h
EXPECTED_MAGIC: ClassVar[bytes] = b"FH01"
EXPECTED_LENGTH: ClassVar[int] = (
# Header magic
Expand Down
8 changes: 8 additions & 0 deletions extension/flat_tensor/serialize/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,11 @@ def define_common_targets():
},
exported_external_deps = ["flatbuffers-api"],
)

runtime.cxx_library(
name = "flat_tensor_header",
srcs = ["flat_tensor_header.cpp"],
exported_headers = ["flat_tensor_header.h"],
visibility = ["//executorch/..."],
exported_deps = ["//executorch/runtime/core:core"],
)
6 changes: 6 additions & 0 deletions extension/flat_tensor/test/TARGETS
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# Any targets that should be shared between fbcode and xplat must be defined in
# targets.bzl. This file can contain fbcode-only targets.

load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
load(":targets.bzl", "define_common_targets")

oncall("executorch")

define_common_targets()

python_unittest(
name = "serialize",
srcs = [
Expand Down
92 changes: 92 additions & 0 deletions extension/flat_tensor/test/flat_tensor_header_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/extension/flat_tensor/serialize/flat_tensor_header.h>

#include <gtest/gtest.h>

#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/runtime.h>

using namespace ::testing;
using executorch::extension::FlatTensorHeader;
using executorch::runtime::Error;
using executorch::runtime::Result;

class FlatTensorHeaderTest : public ::testing::Test {
protected:
void SetUp() override {
// Since these tests cause ET_LOG to be called, the PAL must be initialized
// first.
executorch::runtime::runtime_init();
}
};

/**
* An example, valid flat_tensor header.
*
* This data is intentionally fragile. If the header layout or magic changes,
* this test data must change too. The layout of the header is a contract, not
* an implementation detail.
*/
// clang-format off
// @lint-ignore CLANGTIDY facebook-hte-CArray
constexpr char kExampleHeaderData[] = {
// Magic bytes
'F', 'H', '0', '1',
// uint32_t header size (little endian)
0x28, 0x00, 0x00, 0x00,
// uint64_t flatbuffer_offset
0x71, 0x61, 0x51, 0x41, 0x31, 0x21, 0x11, 0x01,
// uint64_t flatbuffer_size
0x72, 0x62, 0x52, 0x42, 0x32, 0x22, 0x12, 0x02,
// uint64_t segment_base_offset
0x73, 0x63, 0x53, 0x43, 0x33, 0x23, 0x13, 0x03,
// uint64_t segment_data_size
0x74, 0x64, 0x54, 0x44, 0x34, 0x24, 0x14, 0x04,
};

constexpr uint64_t kExampleFlatbufferOffset = 0x0111213141516171;
constexpr uint64_t kExampleFlatbufferSize = 0x0212223242526272;
constexpr uint64_t kExampleSegmentBaseOffset = 0x0313233343536373;
constexpr uint64_t kExampleSegmentDataSize = 0x0414243444546474;

/**
* Returns fake serialized FlatTensor data that contains kExampleHeaderData at
* the expected offset.
*/
std::vector<uint8_t> CreateExampleFlatTensorHeader() {
// Allocate memory representing the FlatTensor header.
std::vector<uint8_t> ret(FlatTensorHeader::kNumHeadBytes);
// Write non-zeros into it to make it more obvious if we read outside the
// header.
memset(ret.data(), 0x55, ret.size());
// Copy the example header into the right offset.
memcpy(
ret.data(),
kExampleHeaderData,
sizeof(kExampleHeaderData));
return ret;
}

TEST_F(FlatTensorHeaderTest, ValidHeaderParsesCorrectly) {
std::vector<uint8_t> flat_tensor = CreateExampleFlatTensorHeader();

Result<FlatTensorHeader> header = FlatTensorHeader::Parse(flat_tensor.data(), flat_tensor.size());

// The header should be present.
ASSERT_EQ(header.error(), Error::Ok);

// Since each byte of these fields is unique, success demonstrates that the
// endian-to-int conversion is correct and looks at the expected bytes of the
// header.
EXPECT_EQ(header->flatbuffer_offset, kExampleFlatbufferOffset);
EXPECT_EQ(header->flatbuffer_size, kExampleFlatbufferSize);
EXPECT_EQ(header->segment_base_offset, kExampleSegmentBaseOffset);
EXPECT_EQ(header->segment_data_size, kExampleSegmentDataSize);
}
18 changes: 18 additions & 0 deletions extension/flat_tensor/test/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.

The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""

runtime.cxx_test(
name = "flat_tensor_header_test",
srcs = [
"flat_tensor_header_test.cpp",
],
deps = [
"//executorch/extension/flat_tensor/serialize:flat_tensor_header",
],
)
3 changes: 3 additions & 0 deletions runtime/core/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ enum class Error : error_code_t {
/// Error caused by the contents of a program.
InvalidProgram = 0x23,

/// Error caused by the contents of external data.
InvalidExternalData = 0x24,

/*
* Delegate errors.
*/
Expand Down
Loading