Skip to content

Commit 038d8ba

Browse files
authored
feat: add lazy attribute loading using ResumableFunctions (#16)
1 parent 2ce90f9 commit 038d8ba

File tree

5 files changed

+23
-13
lines changed

5 files changed

+23
-13
lines changed

Project.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "CommonDataFormat"
22
uuid = "a9737db6-c05c-4e48-868b-6bc41491d9d9"
3-
version = "0.1.2"
43
authors = ["Beforerr <[email protected]> and contributors"]
4+
version = "0.1.2"
55

66
[deps]
77
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
@@ -10,6 +10,7 @@ Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4"
1010
DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3"
1111
LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b"
1212
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
13+
ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184"
1314
StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294"
1415
UnixTimes = "ab1a18e7-b408-4913-896c-624bb82ed7f4"
1516

@@ -27,6 +28,7 @@ Dictionaries = "0.4"
2728
DiskArrays = "0.4"
2829
LibDeflate = "0.4.3"
2930
Mmap = "1"
31+
ResumableFunctions = "1.0.4"
3032
StaticStrings = "0.2.6"
3133
UnixTimes = "1.7.2"
3234
julia = "1.10"

src/CommonDataFormat.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ module CommonDataFormat
33
using Dates, UnixTimes
44
using Mmap
55
using Dictionaries
6+
using ResumableFunctions
67
using DiskArrays
78
using StaticStrings
89
using Base.Threads

src/loading/attribute.jl

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,13 @@ end
1919
2020
Load all attributes from the CDF file.
2121
"""
22-
function attrib(cdf::CDFDataset; predicate = is_global)
22+
function attrib(cdf::CDFDataset; predicate=is_global)
2323
RecordSizeType = recordsize_type(cdf)
2424
buffer = cdf.buffer
2525
cdf_encoding = cdf.cdr.encoding
2626
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
2727
adrs = map(of -> ADR(buffer, of, RecordSizeType), offsets)
28-
adrs = filter(predicate, adrs)
28+
adrs = filter!(predicate, adrs)
2929
names = map(adr -> String(adr.Name), adrs)
3030
aedrs = map(adrs) do adr
3131
load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
@@ -42,7 +42,7 @@ function attrib(cdf::CDFDataset, name::String)
4242
RecordSizeType = recordsize_type(cdf)
4343
buffer = cdf.buffer
4444
cdf_encoding = cdf.cdr.encoding
45-
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
45+
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
4646
for offset in offsets
4747
adr = ADR(buffer, offset, RecordSizeType)
4848
name == String(adr.Name) && return load_attribute_entries(buffer, adr, RecordSizeType, cdf_encoding)
@@ -59,11 +59,11 @@ function vattrib(cdf::CDFDataset, varnum::Integer)
5959
RecordSizeType = recordsize_type(cdf)
6060
buffer = cdf.buffer
6161
cdf_encoding = cdf.cdr.encoding
62-
attributes = Dict{String, Union{String, Vector}}()
63-
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
62+
attributes = Dict{String,Union{String,Vector}}()
63+
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
6464
for offset in offsets
65+
is_global(buffer, offset, RecordSizeType) && continue
6566
adr = ADR(buffer, offset, RecordSizeType)
66-
is_global(adr) && continue
6767
@assert min(adr.AgrEDRhead, adr.AzEDRhead) == 0
6868
head = max(adr.AgrEDRhead, adr.AzEDRhead)
6969
found = _search_aedr_entries(buffer, head, RecordSizeType, cdf_encoding, varnum)
@@ -95,11 +95,11 @@ function vattrib(cdf, varnum, name)
9595
cdf_encoding = cdf.cdr.encoding
9696

9797
# Search for the specific attribute by name first
98-
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
98+
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
9999
name_bytes = codeunits(name)
100100
for offset in offsets
101+
is_global(buffer, offset, RecordSizeType) && continue
101102
adr = ADR(buffer, offset, RecordSizeType)
102-
is_global(adr) && continue
103103
adr.Name != name_bytes && continue
104104
@assert min(adr.AgrEDRhead, adr.AzEDRhead) == 0
105105
head = max(adr.AgrEDRhead, adr.AzEDRhead)
@@ -127,15 +127,14 @@ end
127127
128128
Return a list of attribute names in the CDF file.
129129
"""
130-
function attribnames(cdf::CDFDataset; filter = is_global)
130+
function attribnames(cdf::CDFDataset; predicate=is_global)
131131
names = String[]
132132
buffer = cdf.buffer
133133
RecordSizeType = recordsize_type(cdf)
134-
offsets = get_offsets(buffer, cdf.gdr.ADRhead, RecordSizeType)
135-
sizehint!(names, length(offsets))
134+
offsets = get_offsets_lazy(buffer, cdf.gdr.ADRhead, RecordSizeType)
136135
for offset in offsets
137136
adr = ADR(buffer, offset, RecordSizeType)
138-
filter(adr) && push!(names, String(adr.Name))
137+
predicate(adr) && push!(names, String(adr.Name))
139138
end
140139
return names
141140
end

src/parsing.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,12 @@ function readname(buf::Vector{UInt8}, offset::Int)
125125
return @views buf[offset:(offset + 255)]
126126
end
127127

128+
@resumable function get_offsets_lazy(buffer::Vector{UInt8}, pos::Int64, ::Type{RecordSizeType}) where {RecordSizeType}
129+
while pos != 0
130+
@yield pos
131+
pos = read_be(buffer, pos + 1 + sizeof(RecordSizeType) + 4, RecordSizeType)
132+
end
133+
end
128134

129135
function get_offsets!(offsets, buffer::Vector{UInt8}, pos::Int64, RecordSizeType)
130136
while pos != 0

src/records/adr.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ struct ADR{FSZ, S} <: Record
2020
end
2121

2222
is_global(adr) = adr.Scope == 1
23+
is_global(buffer, offset, ::Type{Int32}) = read_be(buffer, offset + 17, Int32) == 1
24+
is_global(buffer, offset, ::Type{Int64}) = read_be(buffer, offset + 29, Int32) == 1
2325

2426

2527
"""

0 commit comments

Comments
 (0)