refactor: optimize attribute loading with improved byte swap handling

Beforerr · Beforerr · commit a5f0e3d7b635 · 2025-11-03T11:21:14.000-08:00
diff --git a/src/loading/attribute.jl b/src/loading/attribute.jl
@@ -9,8 +9,9 @@ Load all attribute entries for a given attribute from its AEDRs.
 @inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding)
     head = max(adr.AgrEDRhead, adr.AzEDRhead)
     offsets = get_offsets(buffer, head, RecordSizeType)
+    needs_byte_swap = is_big_endian_encoding(cdf_encoding)
     return map(offsets) do offset
-        load_aedr_data(buffer, offset, RecordSizeType, cdf_encoding)
+        load_aedr_data(buffer, offset, RecordSizeType, needs_byte_swap)
     end
 end
 
@@ -19,7 +20,7 @@ end
 
 Load all attributes from the CDF file.
 """
-function attrib(cdf::CDFDataset; predicate=is_global)
+function attrib(cdf::CDFDataset; predicate = is_global)
     RecordSizeType = recordsize_type(cdf)
     buffer = cdf.buffer
     cdf_encoding = cdf.cdr.encoding
@@ -59,14 +60,15 @@ function vattrib(cdf::CDFDataset, varnum::Integer)
     RecordSizeType = recordsize_type(cdf)
     buffer = cdf.buffer
     cdf_encoding = cdf.cdr.encoding
-    attributes = Dict{String,Union{String,Vector}}()
+    attributes = Dict{String, Union{String, Vector}}()
     offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
+    needs_byte_swap = is_big_endian_encoding(cdf_encoding)
     for offset in offsets
         is_global(buffer, offset, RecordSizeType) && continue
         adr = ADR(buffer, offset, RecordSizeType)
         for head in (adr.AgrEDRhead, adr.AzEDRhead)
             head == 0 && continue
-            found = _search_aedr_entries(buffer, head, RecordSizeType, cdf_encoding, varnum)
+            found = _search_aedr_entries(buffer, head, RecordSizeType, needs_byte_swap, varnum)
             isnothing(found) && continue
             name = String(adr.Name)
             attributes[name] = _get_attributes(name, found, cdf)
@@ -99,13 +101,14 @@ function vattrib(cdf, varnum, name)
     # Search for the specific attribute by name first
     offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
     name_bytes = codeunits(name)
+    needs_byte_swap = is_big_endian_encoding(cdf_encoding)
     for offset in offsets
         is_global(buffer, offset, RecordSizeType) && continue
         adr = ADR(buffer, offset, RecordSizeType)
         adr.Name != name_bytes && continue
         for head in (adr.AgrEDRhead, adr.AzEDRhead)
             head == 0 && continue
-            found = _search_aedr_entries(buffer, head, RecordSizeType, cdf_encoding, varnum)
+            found = _search_aedr_entries(buffer, head, RecordSizeType, needs_byte_swap, varnum)
             isnothing(found) && continue
             return _get_attributes(name, found, cdf)
         end
@@ -114,15 +117,15 @@ function vattrib(cdf, varnum, name)
     return nothing
 end
 
-@inline function _search_aedr_entries(source, aedr_head, RecordSizeType, cdf_encoding::Int32, target_varnum)
+@inline function _search_aedr_entries(source, aedr_head, RecordSizeType, needs_byte_swap, target_varnum)
     aedr_head == 0 && return nothing
     offset = Int(aedr_head)
     _num_offset = 13 + 2 * sizeof(RecordSizeType)
     _next_offset = 5 + sizeof(RecordSizeType)
     while offset != 0
         num = read_be(source, offset + _num_offset, Int32)
         if num == target_varnum
-            return load_aedr_data(source, offset, RecordSizeType, cdf_encoding)
+            return load_aedr_data(source, offset, RecordSizeType, needs_byte_swap)
         end
         offset = Int(read_be(source, offset + _next_offset, RecordSizeType))
     end
@@ -134,7 +137,7 @@ end
 
 Return a list of attribute names in the CDF file.
 """
-function attribnames(cdf::CDFDataset; predicate=is_global)
+function attribnames(cdf::CDFDataset; predicate = is_global)
     names = String[]
     buffer = cdf.buffer
     RecordSizeType = recordsize_type(cdf)
diff --git a/src/records/aedr.jl b/src/records/aedr.jl
@@ -21,7 +21,7 @@ struct AEDR{FST, A}
     Value::A            # This consists of the number of elements (specified by the NumElems field) of the data type (specified by the DataType field). This can be thought of as a 1-dimensional array of values (stored contiguously). The size of this field is the product of the number of elements and the size in bytes of each element.
 end
 
-function load_aedr_data(buffer::Vector{UInt8}, offset, RecordSizeType, cdf_encoding)
+function load_aedr_data(buffer::Vector{UInt8}, offset, RecordSizeType, needs_byte_swap)
     _datatype_offset = 9 + 2 * sizeof(RecordSizeType)
     _numelems_offset = 17 + 2 * sizeof(RecordSizeType)
     _data_offset = 41 + 2 * sizeof(RecordSizeType)
@@ -31,7 +31,6 @@ function load_aedr_data(buffer::Vector{UInt8}, offset, RecordSizeType, cdf_encod
     return if datatype in (CDF_CHAR, CDF_UCHAR)
         load_char_data(buffer, offset + _data_offset, NumElems)
     else
-        needs_byte_swap = is_big_endian_encoding(cdf_encoding)
         load_attribute_data(T, buffer, offset + _data_offset, NumElems, needs_byte_swap)
     end
 end