fix: parsing CDFv2 attributes (#17)

Beforerr · web-flow · commit 61e3d2f436c5 · 2025-09-27T10:30:00.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -6,5 +6,4 @@ Manifest*.toml
 /docs/build/
 ref/
 .claude/
-data/
-!data/a_*.cdf
+data/.*
diff --git a/data/ac_h2_sis_20101105_v06.cdf b/data/ac_h2_sis_20101105_v06.cdf
diff --git a/src/loading/attribute.jl b/src/loading/attribute.jl
@@ -109,15 +109,17 @@ function vattrib(cdf, varnum, name)
     return nothing
 end
 
-function _search_aedr_entries(source, aedr_head::Int64, RecordSizeType, cdf_encoding::Int32, target_varnum::Integer)
+function _search_aedr_entries(source, aedr_head, RecordSizeType, cdf_encoding::Int32, target_varnum::Integer)
     aedr_head == 0 && return nothing
-    offset = aedr_head
+    offset = Int(aedr_head)
+    _num_offset = 13 + 2 * sizeof(RecordSizeType)
+    _next_offset = 5 + sizeof(RecordSizeType)
     while offset != 0
-        num = read_be(source, offset + 29, Int32)
+        num = read_be(source, offset + _num_offset, Int32)
         if num == target_varnum
             return load_aedr_data(source, offset, RecordSizeType, cdf_encoding)
         end
-        offset = read_be(source, offset + 13, Int64)
+        offset = Int(read_be(source, offset + _next_offset, RecordSizeType))
     end
     return nothing
 end
diff --git a/src/parsing.jl b/src/parsing.jl
@@ -125,21 +125,23 @@ function readname(buf::Vector{UInt8}, offset::Int)
     return @views buf[offset:(offset + 255)]
 end
 
-@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos::Int64, ::Type{RecordSizeType}) where {RecordSizeType}
+@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos, ::Type{RecordSizeType}) where {RecordSizeType}
+    pos = Int(pos)
     while pos != 0
         @yield pos
-        pos = read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType)
+        pos = Int(read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType))
     end
 end
 
-function get_offsets!(offsets, buffer::Vector{UInt8}, pos::Int64, RecordSizeType)
+function get_offsets!(offsets, buffer::Vector{UInt8}, pos, FieldSizeType)
+    pos = Int(pos)
     while pos != 0
         push!(offsets, pos)
-        pos = read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, Int64)
+        pos = Int(read_be(buffer, pos + 1 + sizeof(FieldSizeType) + 4, FieldSizeType))
     end
     return offsets
 end
-get_offsets(args...) = get_offsets!(Int64[], args...)
+get_offsets(args...) = get_offsets!(Int[], args...)
 
 
 # Big-endian readers (CDF uses big-endian for most fields)
diff --git a/src/records/aedr.jl b/src/records/aedr.jl
@@ -6,30 +6,33 @@
 Attribute g/r Entry Descriptor Record.
 Describes a global entry (for global attributes) or rVariable entry (for variable attributes).
 """
-struct AEDR{A}
-    header::Header
-    AEDRnext::Int64     # Offset to next AEDR in chain
+struct AEDR{FST, A}
+    # header::Header
+    AEDRnext::FST     # Offset to next AEDR in chain
     AttrNum::Int32      # Attribute number
     DataType::Int32     # CDF data type of the entry
     Num::Int32          # Entry number
     NumElems::Int32     # Number of elements in the entry
     NumStrings::Int32   # Number of strings (for string data)
-    rfuB::Int32         # Reserved field B
-    rfuC::Int32         # Reserved field C
-    rfuD::Int32         # Reserved field D
-    rfuE::Int32         # Reserved field E
+    rfuB::RInt32         # Reserved field B
+    rfuC::RInt32         # Reserved field C
+    rfuD::RInt32         # Reserved field D
+    rfuE::RInt32         # Reserved field E
     Value::A            # This consists of the number of elements (specified by the NumElems field) of the data type (specified by the DataType field). This can be thought of as a 1-dimensional array of values (stored contiguously). The size of this field is the product of the number of elements and the size in bytes of each element.
 end
 
 @inline function load_aedr_data(buffer::Vector{UInt8}, offset, RecordSizeType, cdf_encoding)
-    datatype = read_be(buffer, offset + 25, Int32)
-    NumElems = read_be(buffer, offset + 33, Int32)
+    _datatype_offset = 9 + 2 * sizeof(RecordSizeType)
+    _numelems_offset = 17 + 2 * sizeof(RecordSizeType)
+    _data_offset = 41 + 2 * sizeof(RecordSizeType)
+    datatype = read_be(buffer, offset + _datatype_offset, Int32)
+    NumElems = read_be(buffer, offset + _numelems_offset, Int32)
     T = julia_type(datatype, NumElems)
     return if datatype in (CDF_CHAR, CDF_UCHAR)
-        load_char_data(buffer, offset + 57, NumElems)
+        load_char_data(buffer, offset + _data_offset, NumElems)
     else
         needs_byte_swap = is_big_endian_encoding(cdf_encoding)
-        load_attribute_data(T, buffer, offset + 57, NumElems, needs_byte_swap)
+        load_attribute_data(T, buffer, offset + _data_offset, NumElems, needs_byte_swap)
     end
 end
 
diff --git a/test/cdf2_test.jl b/test/cdf2_test.jl
@@ -1,6 +1,13 @@
+using Test
 using CommonDataFormat
 
-file = "/Users/zijin/.cdaweb/data/WI_H0_MFI/wi_h0_mfi_20210115_v05.cdf"
-ds = CDFDataset(file)
-keys(ds)
-ds["BGSE"]
+include("utils.jl")
+
+@testset "CDFDataset" begin
+    file = data_path("ac_h2_sis_20101105_v06.cdf")
+    ds = CDFDataset(file)
+    var = ds["flux_He"]
+    @test "TITLE" in keys(ds.attrib)
+    @test "CATDESC" in keys(var.attrib)
+    @test CommonDataFormat.attrib(var, "FILLVAL")[1] == -1.0f31
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -5,6 +5,7 @@ import CommonDataFormat as CDF
 include("utils.jl")
 include("epochs_test.jl")
 include("comprehensive_test.jl")
+include("cdf2_test.jl")
 include("CommonDataModelExt_test.jl")
 
 @testset "Fill Value" begin