Skip to content

Commit 34af43b

Browse files
committed
perf: replace offset helper functions with OffsetsIterator type
- Introduced OffsetsIterator struct to encapsulate offset traversal logic - Replaced get_offsets, get_offsets!, and get_offsets_lazy with iterator-based approach - Changed cdf_magic_bytes from Vector to Tuple for immutability - Optimized attribute name comparison using codeunits instead of String conversion
1 parent 2bc89c7 commit 34af43b

File tree

3 files changed

+27
-26
lines changed

3 files changed

+27
-26
lines changed

src/loading/attribute.jl

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Load all attribute entries for a given attribute from its AEDRs.
88
"""
99
@inline function load_attribute_entries(buffer::Vector{UInt8}, adr, RecordSizeType, cdf_encoding)
1010
head = max(adr.AgrEDRhead, adr.AzEDRhead)
11-
offsets = get_offsets(buffer, head, RecordSizeType)
11+
offsets = OffsetsIterator{RecordSizeType}(buffer, head)
1212
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
1313
return map(offsets) do offset
1414
load_aedr_data(buffer, offset, RecordSizeType, needs_byte_swap)
@@ -24,7 +24,7 @@ function attrib(cdf::CDFDataset; predicate = is_global)
2424
RecordSizeType = recordsize_type(cdf)
2525
buffer = cdf.buffer
2626
cdf_encoding = cdf.cdr.encoding
27-
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
27+
offsets = collect(OffsetsIterator(cdf))
2828
adrs = map(of -> ADR(buffer, of, RecordSizeType), offsets)
2929
adrs = filter!(predicate, adrs)
3030
names = map(adr -> String(adr.Name), adrs)
@@ -43,10 +43,11 @@ function attrib(cdf::CDFDataset, name::String)
4343
RecordSizeType = recordsize_type(cdf)
4444
buffer = cdf.buffer
4545
cdf_encoding = cdf.cdr.encoding
46-
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
46+
offsets = OffsetsIterator(cdf)
47+
cu = codeunits(name)
4748
for offset in offsets
4849
adr = ADR(buffer, offset, RecordSizeType)
49-
name == String(adr.Name) && return load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
50+
cu == adr.Name && return load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
5051
end
5152
error("Attribute '$name' not found in CDF file")
5253
end
@@ -61,7 +62,7 @@ function vattrib(cdf::CDFDataset, varnum::Integer)
6162
buffer = cdf.buffer
6263
cdf_encoding = cdf.cdr.encoding
6364
attributes = Dict{String, Union{String, Vector}}()
64-
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
65+
offsets = OffsetsIterator(cdf)
6566
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
6667
for offset in offsets
6768
is_global(buffer, offset, RecordSizeType) && continue
@@ -99,7 +100,7 @@ function vattrib(cdf, varnum, name)
99100
cdf_encoding = cdf.cdr.encoding
100101

101102
# Search for the specific attribute by name first
102-
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
103+
offsets = OffsetsIterator(cdf)
103104
name_bytes = codeunits(name)
104105
needs_byte_swap = is_big_endian_encoding(cdf_encoding)
105106
for offset in offsets
@@ -141,7 +142,7 @@ function attribnames(cdf::CDFDataset; predicate = is_global)
141142
names = String[]
142143
buffer = cdf.buffer
143144
RecordSizeType = recordsize_type(cdf)
144-
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
145+
offsets = OffsetsIterator(cdf)
145146
for offset in offsets
146147
adr = ADR(buffer, offset, RecordSizeType)
147148
predicate(adr) && push!(names, String(adr.Name))

src/parsing.jl

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -122,24 +122,6 @@ function readname(buf::Vector{UInt8}, offset::Int)
122122
return @views buf[offset:(offset + 255)]
123123
end
124124

125-
@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos, ::Type{RecordSizeType}) where {RecordSizeType}
126-
pos = Int(pos)
127-
while pos != 0
128-
@yield pos
129-
pos = Int(read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType))
130-
end
131-
end
132-
133-
function get_offsets!(offsets, buffer::Vector{UInt8}, pos, FieldSizeType)
134-
pos = Int(pos)
135-
while pos != 0
136-
push!(offsets, pos)
137-
pos = Int(read_be(buffer, pos + 1 + sizeof(FieldSizeType) + 4, FieldSizeType))
138-
end
139-
return offsets
140-
end
141-
get_offsets(args...) = get_offsets!(Int[], args...)
142-
143125
"""
144126
is_cdf_v3(magic_bytes)
145127
@@ -157,7 +139,7 @@ function is_big_endian_encoding(encoding)
157139
return encoding in (1, 2, 5, 7, 9, 12, 19)
158140
end
159141

160-
const cdf_magic_bytes = [0xCDF30001, 0xCDF26002, 0x0000FFFF] # CDF format uses different magic numbers: CDF3.0, CDF2.x versions
142+
const cdf_magic_bytes = (0xCDF30001, 0xCDF26002, 0x0000FFFF) # CDF format uses different magic numbers: CDF3.0, CDF2.x versions
161143

162144
function validate_cdf_magic(magic_bytes)
163145
return magic_bytes in cdf_magic_bytes

src/types.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,21 @@ struct RInt32 <: ReservedField end
55

66
_sizeof(x) = sizeof(x)
77
_sizeof(::Type{RInt32}) = sizeof(Int32)
8+
9+
struct OffsetsIterator{RecordSizeType}
10+
buffer::Vector{UInt8}
11+
start_pos::Int
12+
end
13+
14+
Base.IteratorSize(::Type{<:OffsetsIterator}) = Base.SizeUnknown()
15+
Base.eltype(::Type{<:OffsetsIterator}) = Int
16+
17+
function OffsetsIterator(cdf::CDFDataset)
18+
return OffsetsIterator{recordsize_type(cdf)}(cdf.buffer, cdf.gdr.ADRhead)
19+
end
20+
21+
function Base.iterate(iter::OffsetsIterator{RecordSizeType}, pos::Int = iter.start_pos) where {RecordSizeType}
22+
pos == 0 && return nothing
23+
next_pos = Int(read_be(iter.buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType))
24+
return (pos, next_pos)
25+
end

0 commit comments

Comments
 (0)