diff --git a/Project.toml b/Project.toml index cdf58ec..ec8ee06 100644 --- a/Project.toml +++ b/Project.toml @@ -9,7 +9,6 @@ DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" -ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184" [weakdeps] CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157" @@ -24,5 +23,4 @@ DiskArrays = "0.4" LibDeflate = "0.4.3" Mmap = "1" PrecompileTools = "1" -ResumableFunctions = "1" julia = "1.10" diff --git a/src/CommonDataFormat.jl b/src/CommonDataFormat.jl index 1c30a6c..a664c69 100644 --- a/src/CommonDataFormat.jl +++ b/src/CommonDataFormat.jl @@ -2,7 +2,6 @@ module CommonDataFormat using Dates using Mmap -using ResumableFunctions using DiskArrays using Base.Threads using LibDeflate diff --git a/src/dataset.jl b/src/dataset.jl index 6edb2ad..ebb253f 100644 --- a/src/dataset.jl +++ b/src/dataset.jl @@ -133,3 +133,6 @@ function Base.show(io::IO, m::MIME"text/plain", cdf::CDFDataset) show(io, m, cdf.attrib) return end + +OffsetsIterator(cdf::CDFDataset) = + OffsetsIterator{recordsize_type(cdf)}(cdf.buffer, cdf.gdr.ADRhead) diff --git a/src/loading/attribute.jl b/src/loading/attribute.jl index c72230d..13e38b6 100644 --- a/src/loading/attribute.jl +++ b/src/loading/attribute.jl @@ -1,15 +1,10 @@ # Attribute loading functionality # Handles loading of ADR (Attribute Descriptor Record) and AEDR (Attribute Entry Descriptor Record) chains -""" - load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding) -> Vector{AttributeEntry} - -Load all attribute entries for a given attribute from its AEDRs. -""" -@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding) +# Load all attribute entries for a given attribute from its AEDRs. +@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, needs_byte_swap) head = max(adr.AgrEDRhead, adr.AzEDRhead) - offsets = get_offsets(buffer, head, RecordSizeType) - needs_byte_swap = is_big_endian_encoding(cdf_encoding) + offsets = OffsetsIterator{RecordSizeType}(buffer, head) return map(offsets) do offset load_aedr_data(buffer, offset, RecordSizeType, needs_byte_swap) end @@ -23,13 +18,13 @@ Load all attributes from the CDF file. function attrib(cdf::CDFDataset; predicate = is_global) RecordSizeType = recordsize_type(cdf) buffer = cdf.buffer - cdf_encoding = cdf.cdr.encoding - offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType) + needs_byte_swap = is_big_endian_encoding(cdf) + offsets = collect(OffsetsIterator(cdf)) adrs = map(of -> ADR(buffer, of, RecordSizeType), offsets) adrs = filter!(predicate, adrs) names = map(adr -> String(adr.Name), adrs) aedrs = map(adrs) do adr - load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding) + load_attribute_entries(buffer, adr, RecordSizeType, needs_byte_swap) end return Dict(zip(names, aedrs)) end @@ -42,11 +37,12 @@ Retrieve all entries for a named attribute from the CDF file. function attrib(cdf::CDFDataset, name::String) RecordSizeType = recordsize_type(cdf) buffer = cdf.buffer - cdf_encoding = cdf.cdr.encoding - offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType) + needs_byte_swap = is_big_endian_encoding(cdf) + offsets = OffsetsIterator(cdf) + name_bytes = codeunits(name) for offset in offsets adr = ADR(buffer, offset, RecordSizeType) - name == String(adr.Name) && return load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding) + name_bytes == adr.Name && return load_attribute_entries(buffer, adr, RecordSizeType, needs_byte_swap) end error("Attribute '$name' not found in CDF file") end @@ -61,7 +57,7 @@ function vattrib(cdf::CDFDataset, varnum::Integer) buffer = cdf.buffer cdf_encoding = cdf.cdr.encoding attributes = Dict{String, Union{String, Vector}}() - offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType) + offsets = OffsetsIterator(cdf) needs_byte_swap = is_big_endian_encoding(cdf_encoding) for offset in offsets is_global(buffer, offset, RecordSizeType) && continue @@ -99,7 +95,7 @@ function vattrib(cdf, varnum, name) cdf_encoding = cdf.cdr.encoding # Search for the specific attribute by name first - offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType) + offsets = OffsetsIterator(cdf) name_bytes = codeunits(name) needs_byte_swap = is_big_endian_encoding(cdf_encoding) for offset in offsets @@ -141,7 +137,7 @@ function attribnames(cdf::CDFDataset; predicate = is_global) names = String[] buffer = cdf.buffer RecordSizeType = recordsize_type(cdf) - offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType) + offsets = OffsetsIterator(cdf) for offset in offsets adr = ADR(buffer, offset, RecordSizeType) predicate(adr) && push!(names, String(adr.Name)) diff --git a/src/parsing.jl b/src/parsing.jl index cb261c1..2dfa603 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -122,24 +122,6 @@ function readname(buf::Vector{UInt8}, offset::Int) return @views buf[offset:(offset + 255)] end -@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos, ::Type{RecordSizeType}) where {RecordSizeType} - pos = Int(pos) - while pos != 0 - @yield pos - pos = Int(read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType)) - end -end - -function get_offsets!(offsets, buffer::Vector{UInt8}, pos, FieldSizeType) - pos = Int(pos) - while pos != 0 - push!(offsets, pos) - pos = Int(read_be(buffer, pos + 1 + sizeof(FieldSizeType) + 4, FieldSizeType)) - end - return offsets -end -get_offsets(args...) = get_offsets!(Int[], args...) - """ is_cdf_v3(magic_bytes) @@ -157,7 +139,7 @@ function is_big_endian_encoding(encoding) return encoding in (1, 2, 5, 7, 9, 12, 19) end -const cdf_magic_bytes = [0xCDF30001, 0xCDF26002, 0x0000FFFF] # CDF format uses different magic numbers: CDF3.0, CDF2.x versions +const cdf_magic_bytes = (0xCDF30001, 0xCDF26002, 0x0000FFFF) # CDF format uses different magic numbers: CDF3.0, CDF2.x versions function validate_cdf_magic(magic_bytes) return magic_bytes in cdf_magic_bytes diff --git a/src/types.jl b/src/types.jl index b84e9a2..a7e1472 100644 --- a/src/types.jl +++ b/src/types.jl @@ -5,3 +5,17 @@ struct RInt32 <: ReservedField end _sizeof(x) = sizeof(x) _sizeof(::Type{RInt32}) = sizeof(Int32) + +struct OffsetsIterator{RecordSizeType} + buffer::Vector{UInt8} + start_pos::Int +end + +Base.IteratorSize(::Type{<:OffsetsIterator}) = Base.SizeUnknown() +Base.eltype(::Type{<:OffsetsIterator}) = Int + +function Base.iterate(iter::OffsetsIterator{RecordSizeType}, pos::Int = iter.start_pos) where {RecordSizeType} + pos == 0 && return nothing + next_pos = Int(read_be(iter.buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType)) + return (pos, next_pos) +end