Skip to content

Commit fcee571

Browse files
TomNicholasclaude
andauthored
Spec structure improvements (#1984)
Mostly moves around content to make the `format` section more focused on the actual format definitions. Also adds hooks in the flatbuffers files that we can use to inline sections in the docs (following @ianhi 's approach in a previous PR) cc @ianhi --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c3f8540 commit fcee571

File tree

6 files changed

+135
-66
lines changed

6 files changed

+135
-66
lines changed

icechunk-format/flatbuffers/common.fbs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
namespace generated;
22

3+
// --8<-- [start:object_id_12]
34
// used for SnapshotIds, ChunkIds, etc
45
struct ObjectId12 {
56
bytes:[uint8:12];
67
}
8+
// --8<-- [end:object_id_12]
79

10+
// --8<-- [start:object_id_8]
811
// used for NodeIds
912
struct ObjectId8 {
1013
bytes:[uint8:8];
1114
}
15+
// --8<-- [end:object_id_8]
1216

17+
// --8<-- [start:metadata_item]
1318
// a single key-value of snapshot metadata
1419
table MetadataItem {
1520
// the name of the attribute
@@ -19,3 +24,4 @@ table MetadataItem {
1924
// TODO: better serialization format
2025
value: [uint8] (required);
2126
}
27+
// --8<-- [end:metadata_item]

icechunk-format/flatbuffers/manifest.fbs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include "common.fbs";
22

33
namespace generated;
44

5+
// --8<-- [start:chunk_ref]
56
// We don't use unions and datastructures for the different types of refs
67
// If we do that, the manifest grows in size a lot, because of the extra
78
// offsets needed. This makes the code more complex because we need to
@@ -44,7 +45,9 @@ table ChunkRef {
4445
// Introduced in spec version 2
4546
extra: [uint8];
4647
}
48+
// --8<-- [end:chunk_ref]
4749

50+
// --8<-- [start:array_manifest]
4851
table ArrayManifest {
4952
// the id of the node the chunk refs belong to
5053
node_id: ObjectId8 (required);
@@ -57,7 +60,9 @@ table ArrayManifest {
5760
// Introduced in spec version 2
5861
extra: [uint8];
5962
}
63+
// --8<-- [end:array_manifest]
6064

65+
// --8<-- [start:manifest_table]
6166
table Manifest {
6267
// the manifest id
6368
id: ObjectId12 (required);
@@ -79,5 +84,6 @@ table Manifest {
7984
// Introduced in spec version 2
8085
extra: [uint8];
8186
}
87+
// --8<-- [end:manifest_table]
8288

8389
root_type Manifest;

icechunk-format/flatbuffers/repo.fbs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@ include "common.fbs";
22

33
namespace generated;
44

5+
// --8<-- [start:ref]
56
table Ref {
67
name: string (required);
78
snapshot_index: uint32;
89
}
10+
// --8<-- [end:ref]
911

12+
// --8<-- [start:snapshot_info]
1013
table SnapshotInfo {
1114
id: ObjectId12 (required);
1215
// -1 means no parent
@@ -16,15 +19,21 @@ table SnapshotInfo {
1619
message: string (required);
1720
metadata: [MetadataItem];
1821
}
22+
// --8<-- [end:snapshot_info]
1923

24+
// --8<-- [start:repo_availability]
2025
enum RepoAvailability : ubyte { Online = 0, ReadOnly, Offline }
26+
// --8<-- [end:repo_availability]
2127

28+
// --8<-- [start:repo_status]
2229
table RepoStatus {
2330
availability: RepoAvailability;
2431
set_at: uint64;
2532
limited_availability_reason: string;
2633
}
34+
// --8<-- [end:repo_status]
2735

36+
// --8<-- [start:update_types]
2837
table RepoInitializedUpdate {
2938
}
3039
table RepoMigratedUpdate {
@@ -78,7 +87,9 @@ table FeatureFlagChangedUpdate {
7887
new_value: bool;
7988
is_set: bool;
8089
}
90+
// --8<-- [end:update_types]
8191

92+
// --8<-- [start:update_type_union]
8293
union UpdateType {
8394
RepoInitializedUpdate,
8495
RepoMigratedUpdate,
@@ -97,7 +108,9 @@ union UpdateType {
97108
FeatureFlagChangedUpdate,
98109
RepoStatusChangedUpdate,
99110
}
111+
// --8<-- [end:update_type_union]
100112

113+
// --8<-- [start:update]
101114
table Update {
102115
update_type: UpdateType (required);
103116

@@ -107,7 +120,9 @@ table Update {
107120
// on updates the repo object is backed up
108121
backup_path: string;
109122
}
123+
// --8<-- [end:update]
110124

125+
// --8<-- [start:repo_table]
111126
table Repo {
112127

113128
spec_version: uint8;
@@ -145,5 +160,6 @@ table Repo {
145160
// Introduced in spec version 2
146161
extra: [uint8];
147162
}
163+
// --8<-- [end:repo_table]
148164

149165
root_type Repo;

icechunk-format/flatbuffers/snapshot.fbs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ include "common.fbs";
22

33
namespace generated;
44

5+
// --8<-- [start:manifest_file_info]
56
// a pointer to a manifest file
67
struct ManifestFileInfo {
78
// id of the object in the repo's object store
@@ -13,7 +14,9 @@ struct ManifestFileInfo {
1314
// number of chunk refs in the manifest
1415
num_chunk_refs: uint32;
1516
}
17+
// --8<-- [end:manifest_file_info]
1618

19+
// --8<-- [start:manifest_file_info_v2]
1720
// table version of ManifestFileInfo, allowing optional extra data
1821
// introduced in spec V2
1922
table ManifestFileInfoV2 {
@@ -29,7 +32,9 @@ table ManifestFileInfoV2 {
2932
// Reserved for future use. Opaque byte vector for extensibility.
3033
extra: [uint8];
3134
}
35+
// --8<-- [end:manifest_file_info_v2]
3236

37+
// --8<-- [start:chunk_index_range]
3338
// A range of chunk indexes
3439
struct ChunkIndexRange {
3540
// inclusive
@@ -38,7 +43,9 @@ struct ChunkIndexRange {
3843
// exclusive
3944
to: uint32;
4045
}
46+
// --8<-- [end:chunk_index_range]
4147

48+
// --8<-- [start:manifest_ref]
4249
// a pointer to a manifest
4350
table ManifestRef {
4451
// id of the object in the repo's object store
@@ -47,7 +54,9 @@ table ManifestRef {
4754
// one element per dimension of the array, same order as in metadata
4855
extents: [ChunkIndexRange] (required);
4956
}
57+
// --8<-- [end:manifest_ref]
5058

59+
// --8<-- [start:dimension_shape]
5160
// the shape of the array along a given dimension
5261
struct DimensionShape {
5362
array_length: uint64;
@@ -60,20 +69,28 @@ struct DimensionShape {
6069
// It was only ever used to estimate `num_chunks`.
6170
chunk_length: uint64;
6271
}
72+
// --8<-- [end:dimension_shape]
6373

74+
// --8<-- [start:dimension_shape_v2]
6475
table DimensionShapeV2 {
6576
array_length: uint64;
6677
num_chunks: uint32;
6778
}
79+
// --8<-- [end:dimension_shape_v2]
6880

81+
// --8<-- [start:dimension_name]
6982
table DimensionName {
7083
// optional
7184
name: string;
7285
}
86+
// --8<-- [end:dimension_name]
7387

88+
// --8<-- [start:group_node_data]
7489
// a marker for a group node
7590
table GroupNodeData {}
91+
// --8<-- [end:group_node_data]
7692

93+
// --8<-- [start:array_node_data]
7794
// data for an array node
7895
table ArrayNodeData {
7996
shape: [DimensionShape] (required);
@@ -90,12 +107,15 @@ table ArrayNodeData {
90107
// ultimately determines whether this is a V2 snapshot.
91108
shape_v2: [DimensionShapeV2];
92109
}
110+
// --8<-- [end:array_node_data]
93111

112+
// --8<-- [start:node_data]
94113
// the node contents, that can be either a group or an array
95114
union NodeData {
96115
Array :ArrayNodeData,
97116
Group :GroupNodeData,
98117
}
118+
// --8<-- [end:node_data]
99119

100120
// --8<-- [start:node_snapshot]
101121
// a node

icechunk-format/flatbuffers/transaction_log.fbs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ include "common.fbs";
22

33
namespace generated;
44

5+
// --8<-- [start:chunk_indices]
56
table ChunkIndices {
67
coords: [uint32] (required);
78
}
9+
// --8<-- [end:chunk_indices]
810

11+
// --8<-- [start:array_updated_chunks]
912
table ArrayUpdatedChunks {
1013
// the node id of the array to which the chunks belong to
1114
node_id: ObjectId8 (required);
@@ -14,9 +17,13 @@ table ArrayUpdatedChunks {
1417
// sorted in ascending lexicographical order
1518
chunks: [ChunkIndices] (required);
1619
}
20+
// --8<-- [end:array_updated_chunks]
1721

22+
// --8<-- [start:node_type]
1823
enum NodeType: ubyte { Group = 0, Array }
24+
// --8<-- [end:node_type]
1925

26+
// --8<-- [start:move_operation]
2027
// moves are fully collapsed: there is only one from/to pair,
2128
// and overlapping moves (from -> to, to -> new_to) get merged
2229
// (from -> new_to in this example).
@@ -28,7 +35,9 @@ table MoveOperation {
2835
node_id: ObjectId8;
2936
node_type: NodeType;
3037
}
38+
// --8<-- [end:move_operation]
3139

40+
// --8<-- [start:transaction_log_table]
3241
table TransactionLog {
3342
// id of the transaction log file,
3443
// it will be the same as the corresponding snapshot
@@ -70,5 +79,6 @@ table TransactionLog {
7079
// Introduced in spec version 2
7180
extra: [uint8];
7281
}
82+
// --8<-- [end:transaction_log_table]
7383

7484
root_type TransactionLog;

0 commit comments

Comments
 (0)