Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/consensus_types/phase0.zig
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub const Checkpoint = ssz.FixedContainerType(struct {
root: p.Root,
});

pub const Validator = ssz.FixedContainerType(struct {
pub const Validator = ssz.StructContainerType(struct {
pubkey: p.BLSPubkey,
withdrawal_credentials: p.Root,
effective_balance: p.Gwei,
Expand Down
146 changes: 136 additions & 10 deletions src/persistent_merkle_tree/Node.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ const Depth = @import("hashing").Depth;
const Gindex = @import("gindex.zig").Gindex;

hash: [32]u8,
// `left` and `right` are child node IDs for leaf/zero/regular branch nodes.
// For branch-struct nodes, `left` stores the high pointer bits and `right`
// stores the low pointer bits of the packed BranchStructRef pointer.
left: Id,
right: Id,
state: State,
Expand All @@ -34,8 +37,8 @@ pub const Error = error{
///
/// `[1, next_free]`
///
/// If the high bit is not set, the next two bits determine the `node_type`
/// The following 29 bits are used for the `ref_count`.
/// If the high bit is not set, the next 3 bits determine the `node_type`
/// The following 28 bits are used for the `ref_count`.
///
/// `[0, node_type, ref_count]`
pub const State = enum(u32) {
Expand All @@ -45,14 +48,16 @@ pub const State = enum(u32) {

pub const max_next_free = 0x7FFFFFFF;

// four types of nodes
const node_type = 0x60000000;
// five types of nodes, use 3 bits
const node_type = 0x70000000;
pub const zero: State = @enumFromInt(0x00000000);
pub const leaf: State = @enumFromInt(0x20000000);
pub const branch_lazy: State = @enumFromInt(0x40000000);
pub const branch_computed: State = @enumFromInt(0x60000000);
pub const leaf: State = @enumFromInt(0x10000000);
pub const branch_lazy: State = @enumFromInt(0x20000000);
pub const branch_computed: State = @enumFromInt(0x30000000);
pub const branch_struct_lazy: State = @enumFromInt(0x40000000);
pub const branch_struct_computed: State = @enumFromInt(0x50000000);

pub const max_ref_count = 0x1FFFFFFF;
pub const max_ref_count = 0x0FFFFFFF;

pub inline fn isFree(node: State) bool {
return @intFromEnum(node) & @intFromEnum(free) != 0;
Expand All @@ -75,7 +80,8 @@ pub const State = enum(u32) {
}

pub inline fn isBranch(node: State) bool {
return @intFromEnum(node) & @intFromEnum(branch_lazy) != 0;
const nt = @intFromEnum(node) & node_type;
return nt == @intFromEnum(branch_lazy) or nt == @intFromEnum(branch_computed);
}

pub inline fn isBranchLazy(node: State) bool {
Expand All @@ -86,10 +92,27 @@ pub const State = enum(u32) {
return @intFromEnum(node) & node_type == @intFromEnum(branch_computed);
}

pub inline fn isBranchStruct(node: State) bool {
const nt = @intFromEnum(node) & node_type;
return nt == @intFromEnum(branch_struct_lazy) or nt == @intFromEnum(branch_struct_computed);
}

pub inline fn isBranchStructLazy(node: State) bool {
Comment thread
twoeths marked this conversation as resolved.
return @intFromEnum(node) & node_type == @intFromEnum(branch_struct_lazy);
}

pub inline fn isBranchStructComputed(node: State) bool {
return @intFromEnum(node) & node_type == @intFromEnum(branch_struct_computed);
}

pub inline fn setBranchComputed(node: *State) void {
node.* = @enumFromInt(@intFromEnum(node.*) | @intFromEnum(branch_computed));
}

pub inline fn setBranchStructComputed(node: *State) void {
node.* = @enumFromInt(@intFromEnum(node.*) | @intFromEnum(branch_struct_computed));
}

pub inline fn initRefCount(node: State) State {
return node;
}
Expand Down Expand Up @@ -123,6 +146,15 @@ pub const Pool = struct {
nodes: std.MultiArrayList(Node).Slice,
next_free_node: Id,

pub const BranchStructRef = struct {
ptr: *anyopaque,
get_root: *const fn (ptr: *const anyopaque, out: *[32]u8) void,
// Proof generation may need a traversable tree even though branch-struct
// nodes are normally opaque to left/right navigation.
to_tree: *const fn (ptr: *const anyopaque, pool: *Pool) Error!Id,
deinit: *const fn (ptr: *anyopaque, allocator: Allocator) void,
};

pub const free_bit: u32 = 0x80000000;
pub const max_ref_count: u32 = 0x7FFFFFFF;

Expand Down Expand Up @@ -246,6 +278,53 @@ pub const Pool = struct {
return node_id;
}

/// The pool allocates and owns a clone of `ptr`; the caller retains ownership of its data.
pub fn createBranchStruct(self: *Pool, comptime T: type, ptr: *const T) Error!Id {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The style guide (rule 51) requires asserting all function arguments. The ptr argument is not checked for null. Please add std.debug.assert(ptr != null); at the beginning of this function to prevent potential null pointer dereferences.

References
  1. The style guide mandates asserting all function arguments to detect programmer errors early. This function is missing a null check for the ptr argument. (link)

const cloned = try T.init(self.allocator, ptr);
errdefer @constCast(cloned).deinit(self.allocator);

const branch_struct_ref = try self.allocator.create(BranchStructRef);
errdefer self.allocator.destroy(branch_struct_ref);

branch_struct_ref.* = .{
.ptr = @ptrCast(@constCast(cloned)),
.get_root = struct {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Pointer split across left/right — consider a side table.

Encoding a heap pointer into two u32 node fields works but couples node storage with pointer representation. Every pool operation touching left/right now needs to know that some nodes store pointers, not child IDs.

Alternative: a HashMap(Id, *BranchStructRef) or compact ArrayList indexed by node ID. Memory cost is one pointer per struct-branch node (negligible vs the struct itself), but pool operations stay clean — left/right always mean child node IDs.

Not blocking since the current approach works correctly on 64-bit, but worth considering for maintainability.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Encoding a heap pointer into two u32 node fields works but couples node storage with pointer representation

I agree if we go from the current/old design point of view. With the new approach of this PR, it changes semantic of node storage: it could be either left/right nodes or pointer to some data

Every pool operation touching left/right now needs to know that some nodes store pointers, not child IDs.

good call on it. The only getLeft()/getRight() calls that affect is proof, which is addressed in 91b030a
for ssz, we call getRight() to get length of a list, and that's encoded as a regular branch node, not branch_struct node

fn call(ptr_erased: *const anyopaque, out: *[32]u8) void {
const typed_ptr: *const T = @ptrCast(@alignCast(ptr_erased));
typed_ptr.getRoot(out);
}
}.call,
.to_tree = struct {
fn call(ptr_erased: *const anyopaque, pool: *Pool) Error!Id {
const typed_ptr: *const T = @ptrCast(@alignCast(ptr_erased));
return try typed_ptr.toTree(pool);
}
}.call,
.deinit = struct {
fn call(ptr_erased: *anyopaque, allocator: Allocator) void {
const typed_ptr: *T = @ptrCast(@alignCast(ptr_erased));
typed_ptr.deinit(allocator);
}
}.call,
};

const node_id = try self.create();
const ptr_usize = @intFromPtr(branch_struct_ref);
// branch-struct nodes do not store child node IDs in left/right. Instead we
// pack the BranchStructRef pointer into those fields and rely on noChild()
// to keep generic traversal from treating them as normal branches.
const right_ptr_value: u32 = @intCast(ptr_usize & 0xFFFFFFFF);
self.nodes.items(.right)[@intFromEnum(node_id)] = @enumFromInt(right_ptr_value);
if (comptime @sizeOf(usize) == 8) {
const left_ptr_value: u32 = @intCast(ptr_usize >> 32);
self.nodes.items(.left)[@intFromEnum(node_id)] = @enumFromInt(left_ptr_value);
} else {
self.nodes.items(.left)[@intFromEnum(node_id)] = @enumFromInt(0);
}
self.nodes.items(.state)[@intFromEnum(node_id)] = State.branch_struct_lazy.initRefCount();
return node_id;
}

/// Allocates nodes into the pool.
///
/// All nodes are allocated with refcount=0.
Expand Down Expand Up @@ -333,6 +412,39 @@ pub const Pool = struct {
_ = try states[@intFromEnum(node_id)].incRefCount();
}

pub fn getStructPtr(self: *Pool, node_id: Id, comptime T: type) Error!*const T {
const state = self.nodes.items(.state)[@intFromEnum(node_id)];
if (!state.isBranchStruct()) {
return Error.InvalidNode;
}
const struct_ref = self.getBranchStructRefUnsafe(node_id);
const ptr: *const T = @ptrCast(@alignCast(struct_ref.ptr));
return ptr;
}

pub fn getBranchStructRefUnsafe(self: *Pool, node_id: Id) *BranchStructRef {
const left_ptr_value: u32 = @intFromEnum(self.nodes.items(.left)[@intFromEnum(node_id)]);
const right_ptr_value: u32 = @intFromEnum(self.nodes.items(.right)[@intFromEnum(node_id)]);
// This reverses the packing performed in createBranchStruct(). Callers must
// ensure node_id is a branch-struct node before decoding pointer bits.
const ptr_int: usize = if (comptime @sizeOf(usize) == 8)
@as(u64, left_ptr_value) << 32 | @as(u64, right_ptr_value)
else
right_ptr_value;
return @ptrFromInt(ptr_int);
}

pub fn materializeBranchStruct(self: *Pool, node_id: Id) Error!Id {
const state = self.nodes.items(.state)[@intFromEnum(node_id)];
if (!state.isBranchStruct()) {
return Error.InvalidNode;
}
// Materialization is only for specialized flows such as proof generation.
// Generic tree navigation should keep branch-struct nodes opaque.
const struct_ref = self.getBranchStructRefUnsafe(node_id);
return try struct_ref.to_tree(struct_ref.ptr, self);
}

pub fn unref(self: *Pool, node_id: Id) void {
const states = self.nodes.items(.state);
const lefts = self.nodes.items(.left);
Expand Down Expand Up @@ -381,6 +493,13 @@ pub const Pool = struct {
} else {
current = null;
}

if (states[@intFromEnum(id)].isBranchStruct()) {
const struct_ref = self.getBranchStructRefUnsafe(id);
struct_ref.deinit(struct_ref.ptr, self.allocator);
self.allocator.destroy(struct_ref);
}

// Return the node to the free list
states[@intFromEnum(id)] = State.initNextFree(self.next_free_node);
self.next_free_node = id;
Expand All @@ -396,7 +515,8 @@ pub const Id = enum(u32) {

/// Returns true if navigation to the child node is not possible
pub inline fn noChild(node_id: Id, state: State) bool {
return state.isLeaf() or @intFromEnum(node_id) == 0;
// branch-struct nodes keep packed pointer bits in left/right, not child IDs.
return state.isLeaf() or state.isBranchStruct() or @intFromEnum(node_id) == 0;
}

/// Returns the root hash of the tree, computing any lazy branches as needed.
Expand All @@ -410,6 +530,12 @@ pub const Id = enum(u32) {
hashOne(hash, left, right);
state.setBranchComputed();
}

if (state.isBranchStructLazy()) {
const struct_ref = pool.getBranchStructRefUnsafe(node_id);
struct_ref.get_root(struct_ref.ptr, hash);
state.setBranchStructComputed();
}
return hash;
}

Expand Down
52 changes: 47 additions & 5 deletions src/persistent_merkle_tree/proof.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub const Error = error{
InvalidGindex,
/// Witness list length does not match the gindex path length.
InvalidWitnessLength,
/// Single-proof traversal encountered a branch-struct node after already materializing one.
NestedBranchStruct,
};

pub const ProofType = enum {
Expand Down Expand Up @@ -53,6 +55,24 @@ pub const SingleProof = struct {
}
};

/// Proof traversal needs real left/right child nodes. For a branch-struct node,
/// materialize a temporary plain tree and keep its root alive until proof creation finishes.
fn materializeIfBranchStruct(
allocator: Allocator,
pool: *Node.Pool,
node_id: Node.Id,
temporary_roots: *std.ArrayListUnmanaged(Node.Id),
) (Node.Error || Error)!Node.Id {
if (!node_id.getState(pool).isBranchStruct()) {
return node_id;
}

const materialized = try pool.materializeBranchStruct(node_id);
errdefer pool.unref(materialized);
temporary_roots.append(allocator, materialized) catch return error.OutOfMemory;
return materialized;
}

/// Produces a single Merkle proof for the node at `gindex`.
pub fn createSingleProof(
allocator: Allocator,
Expand All @@ -67,6 +87,9 @@ pub fn createSingleProof(
const path_len = gindex.pathLen();
var witnesses = try allocator.alloc([32]u8, path_len);
errdefer allocator.free(witnesses);
// there should not be more than 1 branch-struct node on the path
var materialized_root: ?Node.Id = null;
defer if (materialized_root) |temp_root| pool.unref(temp_root);

if (path_len == 0) {
return SingleProof{
Expand All @@ -80,6 +103,15 @@ pub fn createSingleProof(

for (0..path_len) |depth_idx| {
const witness_index = path_len - 1 - depth_idx;
if (node_id.getState(pool).isBranchStruct()) {
if (materialized_root) |_| {
// Single proofs only support one branch-struct hop. If the
// materialized subtree would require another one, fail fast.
return error.NestedBranchStruct;
}
materialized_root = try pool.materializeBranchStruct(node_id);
node_id = materialized_root.?;
}

if (path.left()) {
const right_id = try node_id.getRight(pool);
Expand Down Expand Up @@ -420,6 +452,7 @@ fn nodeToCompactMultiProof(
node_id: Node.Id,
bitlist: []const bool,
bit_index: usize,
temporary_roots: *std.ArrayListUnmanaged(Node.Id),
) (Node.Error || Error)![][32]u8 {
// If bit is 1, this node is a leaf in the proof
if (bitlist[bit_index]) {
Expand All @@ -428,13 +461,15 @@ fn nodeToCompactMultiProof(
return leaves;
}

const current = try materializeIfBranchStruct(allocator, pool, node_id, temporary_roots);

// Otherwise, recurse into children
const left_id = try node_id.getLeft(pool);
const left = try nodeToCompactMultiProof(allocator, pool, left_id, bitlist, bit_index + 1);
const left_id = try current.getLeft(pool);
const left = try nodeToCompactMultiProof(allocator, pool, left_id, bitlist, bit_index + 1, temporary_roots);
defer allocator.free(left);

const right_id = try node_id.getRight(pool);
const right = try nodeToCompactMultiProof(allocator, pool, right_id, bitlist, bit_index + left.len * 2);
const right_id = try current.getRight(pool);
const right = try nodeToCompactMultiProof(allocator, pool, right_id, bitlist, bit_index + left.len * 2, temporary_roots);
defer allocator.free(right);

const result = try allocator.alloc([32]u8, left.len + right.len);
Expand All @@ -452,8 +487,15 @@ pub fn createCompactMultiProof(
) (Node.Error || Error)![][32]u8 {
const bitlist = try descriptorToBitlist(allocator, descriptor);
defer allocator.free(bitlist);
var temporary_roots = std.ArrayListUnmanaged(Node.Id){};
defer {
for (temporary_roots.items) |temp_root| {
pool.unref(temp_root);
}
temporary_roots.deinit(allocator);
}

return nodeToCompactMultiProof(allocator, pool, root, bitlist, 0);
return nodeToCompactMultiProof(allocator, pool, root, bitlist, 0, &temporary_roots);
}

/// Pointer to track position in bitlist and leaves during reconstruction
Expand Down
2 changes: 2 additions & 0 deletions src/ssz/root.zig
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub const VariableVectorType = types.VariableVectorType;

pub const FixedContainerType = types.FixedContainerType;
pub const VariableContainerType = types.VariableContainerType;
pub const StructContainerType = types.StructContainerType;

pub const getPathGindex = types.getPathGindex;

Expand All @@ -42,6 +43,7 @@ pub const HasherData = hasher.HasherData;

const tree_view = @import("tree_view/root.zig");
pub const ContainerTreeView = tree_view.ContainerTreeView;
pub const StructContainerTreeView = tree_view.StructContainerTreeView;
pub const ArrayBasicTreeView = tree_view.ArrayBasicTreeView;
pub const ArrayCompositeTreeView = tree_view.ArrayCompositeTreeView;
pub const ListBasicTreeView = tree_view.ListBasicTreeView;
Expand Down
Loading
Loading