Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184"

[weakdeps]
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
Expand All @@ -24,5 +23,4 @@ DiskArrays = "0.4"
LibDeflate = "0.4.3"
Mmap = "1"
PrecompileTools = "1"
ResumableFunctions = "1"
julia = "1.10"
1 change: 0 additions & 1 deletion src/CommonDataFormat.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ module CommonDataFormat

using Dates
using Mmap
using ResumableFunctions
using DiskArrays
using Base.Threads
using LibDeflate
Expand Down
3 changes: 3 additions & 0 deletions src/dataset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,6 @@ function Base.show(io::IO, m::MIME"text/plain", cdf::CDFDataset)
show(io, m, cdf.attrib)
return
end

OffsetsIterator(cdf::CDFDataset) =
OffsetsIterator{recordsize_type(cdf)}(cdf.buffer, cdf.gdr.ADRhead)
30 changes: 13 additions & 17 deletions src/loading/attribute.jl
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Attribute loading functionality
# Handles loading of ADR (Attribute Descriptor Record) and AEDR (Attribute Entry Descriptor Record) chains

"""
load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding) -> Vector{AttributeEntry}

Load all attribute entries for a given attribute from its AEDRs.
"""
@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding)
# Load all attribute entries for a given attribute from its AEDRs.
@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, needs_byte_swap)
head = max(adr.AgrEDRhead, adr.AzEDRhead)
offsets = get_offsets(buffer, head, RecordSizeType)
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
offsets = OffsetsIterator{RecordSizeType}(buffer, head)
return map(offsets) do offset
load_aedr_data(buffer, offset, RecordSizeType, needs_byte_swap)
end
Expand All @@ -23,13 +18,13 @@ Load all attributes from the CDF file.
function attrib(cdf::CDFDataset; predicate = is_global)
RecordSizeType = recordsize_type(cdf)
buffer = cdf.buffer
cdf_encoding = cdf.cdr.encoding
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
needs_byte_swap = is_big_endian_encoding(cdf)
offsets = collect(OffsetsIterator(cdf))
adrs = map(of -> ADR(buffer, of, RecordSizeType), offsets)
adrs = filter!(predicate, adrs)
names = map(adr -> String(adr.Name), adrs)
aedrs = map(adrs) do adr
load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
load_attribute_entries(buffer, adr, RecordSizeType, needs_byte_swap)
end
return Dict(zip(names, aedrs))
end
Expand All @@ -42,11 +37,12 @@ Retrieve all entries for a named attribute from the CDF file.
function attrib(cdf::CDFDataset, name::String)
RecordSizeType = recordsize_type(cdf)
buffer = cdf.buffer
cdf_encoding = cdf.cdr.encoding
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
needs_byte_swap = is_big_endian_encoding(cdf)
offsets = OffsetsIterator(cdf)
name_bytes = codeunits(name)
for offset in offsets
adr = ADR(buffer, offset, RecordSizeType)
name == String(adr.Name) && return load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
name_bytes == adr.Name && return load_attribute_entries(buffer, adr, RecordSizeType, needs_byte_swap)
end
error("Attribute '$name' not found in CDF file")
end
Expand All @@ -61,7 +57,7 @@ function vattrib(cdf::CDFDataset, varnum::Integer)
buffer = cdf.buffer
cdf_encoding = cdf.cdr.encoding
attributes = Dict{String, Union{String, Vector}}()
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
offsets = OffsetsIterator(cdf)
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
for offset in offsets
is_global(buffer, offset, RecordSizeType) && continue
Expand Down Expand Up @@ -99,7 +95,7 @@ function vattrib(cdf, varnum, name)
cdf_encoding = cdf.cdr.encoding

# Search for the specific attribute by name first
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
offsets = OffsetsIterator(cdf)
name_bytes = codeunits(name)
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
for offset in offsets
Expand Down Expand Up @@ -141,7 +137,7 @@ function attribnames(cdf::CDFDataset; predicate = is_global)
names = String[]
buffer = cdf.buffer
RecordSizeType = recordsize_type(cdf)
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
offsets = OffsetsIterator(cdf)
for offset in offsets
adr = ADR(buffer, offset, RecordSizeType)
predicate(adr) && push!(names, String(adr.Name))
Expand Down
20 changes: 1 addition & 19 deletions src/parsing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -122,24 +122,6 @@ function readname(buf::Vector{UInt8}, offset::Int)
return @views buf[offset:(offset + 255)]
end

@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos, ::Type{RecordSizeType}) where {RecordSizeType}
pos = Int(pos)
while pos != 0
@yield pos
pos = Int(read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType))
end
end

function get_offsets!(offsets, buffer::Vector{UInt8}, pos, FieldSizeType)
pos = Int(pos)
while pos != 0
push!(offsets, pos)
pos = Int(read_be(buffer, pos + 1 + sizeof(FieldSizeType) + 4, FieldSizeType))
end
return offsets
end
get_offsets(args...) = get_offsets!(Int[], args...)

"""
is_cdf_v3(magic_bytes)

Expand All @@ -157,7 +139,7 @@ function is_big_endian_encoding(encoding)
return encoding in (1, 2, 5, 7, 9, 12, 19)
end

const cdf_magic_bytes = [0xCDF30001, 0xCDF26002, 0x0000FFFF] # CDF format uses different magic numbers: CDF3.0, CDF2.x versions
const cdf_magic_bytes = (0xCDF30001, 0xCDF26002, 0x0000FFFF) # CDF format uses different magic numbers: CDF3.0, CDF2.x versions

function validate_cdf_magic(magic_bytes)
return magic_bytes in cdf_magic_bytes
Expand Down
14 changes: 14 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,17 @@ struct RInt32 <: ReservedField end

_sizeof(x) = sizeof(x)
_sizeof(::Type{RInt32}) = sizeof(Int32)

struct OffsetsIterator{RecordSizeType}
buffer::Vector{UInt8}
start_pos::Int
end

Base.IteratorSize(::Type{<:OffsetsIterator}) = Base.SizeUnknown()
Base.eltype(::Type{<:OffsetsIterator}) = Int

function Base.iterate(iter::OffsetsIterator{RecordSizeType}, pos::Int = iter.start_pos) where {RecordSizeType}
pos == 0 && return nothing
next_pos = Int(read_be(iter.buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType))
return (pos, next_pos)
end
Loading