Skip to content

Commit c3c0df4

Browse files
committed
feat: ADR parsing
1 parent 1c62383 commit c3c0df4

File tree

11 files changed

+215
-172
lines changed

11 files changed

+215
-172
lines changed

src/CommonDataFormat.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@ export Majority, CompressionType, DataType
77

88
include("enums.jl")
99
include("parsing.jl")
10-
include("records.jl")
10+
include("records/records.jl")
1111
include("variable.jl")
1212
include("attribute.jl")
1313
include("dataset.jl")
14-
include("loading/cdr.jl")
15-
include("loading/gdr.jl")
16-
include("loading/vdr.jl")
1714
include("loading/vxr.jl")
1815
include("loading/vvr.jl")
1916
include("loading/variable.jl")

src/dataset.jl

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ function CDFDataset(filename)
2525
compression_bytes = read_be(io, UInt32)
2626
compression = CompressionType(compression_bytes)
2727
RecordSizeType = is_cdf_v3(magic_bytes) ? UInt64 : UInt32
28-
# Parse CDF header to extract version, majority, and compression
28+
# Parse CDF header
2929
cdr = CDR(io, 8, RecordSizeType)
3030
gdr = GDR(io, cdr.gdr_offset, RecordSizeType)
3131
return CDFDataset{compression, RecordSizeType}(filename, cdr, gdr)
@@ -40,6 +40,10 @@ function Base.getproperty(cdf::CDFDataset{CT}, name::Symbol) where {CT}
4040
return Majority(cdf.cdr)
4141
elseif name === :compression
4242
return CT
43+
elseif name === :adr
44+
return open(cdf.filename, "r") do io
45+
return ADR(io, cdf.gdr.ADRhead, recordsize_type(cdf))
46+
end
4347
else
4448
return getfield(cdf, name)
4549
end
@@ -54,7 +58,7 @@ function Base.getindex(cdf::CDFDataset, var_name::String)
5458
vdr = nothing
5559
for current_offset in (gdr.rVDRhead, gdr.zVDRhead)
5660
while current_offset != 0
57-
_vdr = load_zVDR(io, current_offset, RecordSizeType)
61+
_vdr = zVDR(io, current_offset, RecordSizeType)
5862
if _vdr.name == var_name
5963
vdr = _vdr
6064
break
@@ -66,32 +70,11 @@ function Base.getindex(cdf::CDFDataset, var_name::String)
6670

6771
isnothing(vdr) && throw(KeyError(var_name))
6872

69-
# Determine dimensions based on variable type
70-
dimensions = if vdr.z_num_dims > 0
71-
# Z-variable - use its own dimensions
72-
collect(Int, vdr.z_dim_sizes)
73-
else
74-
# R-variable - use GDR dimensions (if any)
75-
if length(gdr.r_dim_sizes) > 0
76-
collect(Int, gdr.r_dim_sizes)
77-
else
78-
[1] # Scalar
79-
end
80-
end
81-
8273
# Calculate number of records
83-
num_records = vdr.max_rec >= 0 ? vdr.max_rec + 1 : 0
84-
8574
data = load_variable_data(io, vdr, RecordSizeType, gdr.r_dim_sizes, cdf.cdr.encoding)
8675

8776
# Create and return CDFVariable
88-
return CDFVariable(
89-
var_name,
90-
data,
91-
DataType(vdr.data_type),
92-
dimensions,
93-
num_records,
94-
)
77+
return CDFVariable(var_name, data, vdr)
9578
end
9679
end
9780

@@ -116,3 +99,12 @@ function Base.keys(cdf::CDFDataset)
11699
end
117100

118101
Base.haskey(cdf::CDFDataset, var_name::String) = var_name in keys(cdf)
102+
103+
# CommonDataModel.jl Interface
104+
function attribnames(cdf::CDFDataset)
105+
return open(cdf.filename, "r") do io
106+
gdr = cdf.gdr
107+
adr = ADR(io, gdr.ADRhead, recordsize_type(cdf))
108+
return adr.attrib_names
109+
end
110+
end

src/loading/cdr.jl

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/loading/gdr.jl

Lines changed: 0 additions & 24 deletions
This file was deleted.

src/records/adr.jl

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
"""
2+
Attribute Descriptor Record (ADR)
3+
4+
Contains a description of an attribute in a CDF. There will be one ADR per attribute. The ADRhead field of the ADR contains the file offset of the first ADR.
5+
"""
6+
struct ADR <: Record
7+
header::Header
8+
ADRnext::Int64 # Offset to next ADR in chain
9+
AgrEDRhead::Int64 # The offset of the first Attribute g/rEntry Descriptor Record (AgrEDR) for this attribute.
10+
Scope::Int32 # Offset to first attribute descriptor record
11+
Num::Int32 # Attribute number
12+
NgrEntries::Int32 # Number of r-variables
13+
MAXgrEntry::Int32 # Number of attributes
14+
rfuA::Int32 # Reserved field A
15+
AzEDRhead::Int64 # The offset of the first Attribute zEntry Descriptor Record (AzEDR) for this attribute.
16+
NzEntries::Int32 # Number of z-variables
17+
MAXzEntry::Int32 # Number of z-entries
18+
rfuE::Int32 # Reserved field E
19+
Name::String
20+
end
21+
22+
"""
23+
ADR(io::IO, RecordSizeType)
24+
25+
Load an Attribute Descriptor Record from the IO stream at the specified offset.
26+
"""
27+
function ADR(io::IO, RecordSizeType)
28+
# Read header
29+
header = Header(io, RecordSizeType)
30+
@assert header.record_type == 4
31+
32+
# Read ADR fields
33+
ADRnext = read_be(io, Int64)
34+
AgrEDRhead = read_be(io, Int64)
35+
fields1 = read_be(io, 5, Int32)
36+
az_edrhead = read_be(io, Int64)
37+
fields2 = read_be(io, 3, Int32)
38+
name = readname(io)
39+
40+
return ADR(
41+
header, ADRnext, AgrEDRhead, fields1..., az_edrhead, fields2..., name
42+
)
43+
end

src/records/cdr.jl

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
CDF Descriptor Record (CDR) - the main file header record
3+
Contains version, encoding, format information, and pointer to GDR
4+
"""
5+
struct CDR <: Record
6+
header::Header
7+
gdr_offset::UInt64 # Can be UssInt32 for v2, UInt64 for v3
8+
version::Int32
9+
release::Int32
10+
encoding::Int32
11+
flags::Int32
12+
rfu_a::Int32 # Reserved field A
13+
rfu_b::Int32 # Reserved field B
14+
increment::Int32
15+
identifier::Int32
16+
rfu_e::Int32 # Reserved field E
17+
# Note: copyright string follows but we'll handle it separately
18+
end
19+
20+
version(cdr::CDR; verbose = true) = verbose ? (cdr.version, cdr.release, cdr.increment) : cdr.version
21+
Majority(cdr::CDR) = (cdr.flags & 0x01) != 0 ? Majority(0) : Majority(1) # Row=0, Column=1
22+
is_cdf_v3(cdr::CDR) = cdr.version == 3
23+
24+
"""
25+
CDR(io::IO, RecordSizeType) -> CDR
26+
27+
Load a CDF Descriptor Record from the IO stream at the specified offset.
28+
This follows the CDF specification for CDR record structure.
29+
"""
30+
@inline function CDR(io::IO, RecordSizeType)
31+
# Read header
32+
header = Header(io, RecordSizeType)
33+
@assert header.record_type == 1 "Invalid CDR record type"
34+
# Read remaining CDR fields in order as per CDF specification
35+
gdr_offset = read_be(io, RecordSizeType)
36+
fields = read_be(io, 9, Int32)
37+
return CDR(header, gdr_offset, fields...)
38+
end

src/records/gdr.jl

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
"""
2+
Global Descriptor Record (GDR) - contains global information about the CDF file
3+
Points to variable and attribute descriptor records
4+
"""
5+
struct GDR
6+
header::Header
7+
rVDRhead::Int64 # Offset to first r-variable descriptor record
8+
zVDRhead::Int64 # Offset to first z-variable descriptor record
9+
ADRhead::Int64 # Offset to first attribute descriptor record
10+
eof::Int64 # End of file offset
11+
NrVars::Int32 # Number of r-variables
12+
num_attr::Int32 # Number of attributes
13+
r_max_rec::Int32 # Maximum record number for r-variables
14+
r_num_dims::Int32 # Number of dimensions for r-variables
15+
NzVars::Int32 # Number of z-variables
16+
uir_head::Int64 # Unused internal record head
17+
rfu_c::Int32 # Reserved field C
18+
leap_second_last_updated::Int32
19+
rfu_e::Int32 # Reserved field E
20+
r_dim_sizes::Vector{UInt32} # Dimension sizes for r-variables
21+
end
22+
23+
24+
"""
25+
GDR(io::IO, RecordSizeType)
26+
27+
Load a Global Descriptor Record from the IO stream at the specified offset.
28+
"""
29+
@inline function GDR(io::IO, RecordSizeType)
30+
# Read header
31+
header = Header(io, RecordSizeType)
32+
@assert header.record_type == 2
33+
34+
# Read GDR fields
35+
rvdr_head, zvdr_head, adr_head, eof = read_be(io, 4, RecordSizeType)
36+
nr_vars, num_attr, r_max_rec, r_num_dims, nz_vars = read_be(io, 5, Int32)
37+
uir_head = read_be(io, Int64)
38+
rfu_c, leap_second_last_updated, rfu_e = read_be(io, 3, Int32)
39+
# Read dimension sizes array
40+
r_dim_sizes = map(i -> read_uint32_be(io), 1:r_num_dims)
41+
42+
return GDR(
43+
header, rvdr_head, zvdr_head, adr_head, eof, nr_vars, num_attr,
44+
r_max_rec, r_num_dims, nz_vars, uir_head, rfu_c,
45+
leap_second_last_updated, rfu_e, r_dim_sizes
46+
)
47+
end

0 commit comments

Comments
 (0)