Skip to content

Commit 8c6ea17

Browse files
authored
feat: add CommonDataModel extension and expose CDF data type constants (#10)
1 parent d777011 commit 8c6ea17

20 files changed

+193
-100
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
*.jl.*.cov
22
*.jl.cov
33
*.jl.mem
4-
/Manifest*.toml
4+
Manifest*.toml
55
/docs/Manifest*.toml
66
/docs/build/
77
ref/

Project.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
1212
StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294"
1313
UnixTimes = "ab1a18e7-b408-4913-896c-624bb82ed7f4"
1414

15+
[weakdeps]
16+
CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157"
17+
18+
[extensions]
19+
CommonDataFormatCommonDataModelExt = ["CommonDataModel"]
20+
1521
[compat]
1622
CodecZlib = "0.7"
1723
Dates = "1"

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
[![Build Status](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/JuliaSpacePhysics/CommonDataFormat.jl/actions/workflows/CI.yml?query=branch%3Amain)
55
[![Coverage](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/JuliaSpacePhysics/CommonDataFormat.jl)
66

7-
A Julia package for reading Common Data Format (CDF) files, widely used in space physics and other scientific domains for storing multidimensional data arrays and metadata.
7+
A Julia package for reading Common Data Format (CDF) files, widely used in space physics for storing multidimensional data arrays and metadata.
88

99
## Features
1010

1111
- **Pure Julia implementation** - No external dependencies on CDF libraries
12-
- **Efficient data access** - Lazy loading and memory-mapped access
12+
- **Efficient data access** - Lazy memory-mapped access for data and attributes, super fast decompression using [`LibDeflate`](https://github.com/jakobnissen/LibDeflate.jl)
1313

1414
## Installation
1515

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
module CommonDataFormatCommonDataModelExt
2+
3+
using CommonDataFormat
4+
import CommonDataFormat as CDF
5+
import CommonDataModel
6+
import CommonDataModel as CDM
7+
using CommonDataFormat: CDFDataset, CDFVariable
8+
9+
const SymbolOrString = Union{Symbol, AbstractString}
10+
11+
# Dataset level -----------------------------------------------------------------
12+
13+
CDM.path(ds::CDFDataset) = CDF.filename(ds)
14+
CDM.varnames(ds::CDFDataset) = keys(ds)
15+
16+
function CDM.variable(ds::CDFDataset, name::SymbolOrString)
17+
return CDF.variable(ds, String(name))
18+
end
19+
20+
CDM.attribnames(ds::CDFDataset) = CDF.attribnames(ds)
21+
CDM.attrib(ds::CDFDataset, args...) = CDF.attrib(ds, args...)
22+
23+
# Variable level ----------------------------------------------------------------
24+
25+
CDM.name(var::CDFVariable) = var.name
26+
CDM.dataset(var::CDFVariable) = var.parentdataset
27+
CDM.attribnames(var::CDFVariable) = keys(CDF.attrib(var))
28+
CDM.attrib(var::CDFVariable, args...) = CDF.attrib(var, args...)
29+
@inline function CDM.dimnames(var::CDFVariable, i)
30+
@assert i <= ndims(var) DimensionMismatch()
31+
key = if i == 1
32+
"DEPEND_0"
33+
elseif i == 2
34+
"DEPEND_1"
35+
elseif i == 3
36+
"DEPEND_2"
37+
end
38+
return CDF.attrib(var, key)
39+
end
40+
41+
end

src/CommonDataFormat.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ using LibDeflate: GzipDecompressResult
1212
export CDFDataset, CDFVariable
1313
export Majority, CompressionType, DataType
1414
export Epoch, Epoch16, TT2000
15+
export CDF_EPOCH, CDF_EPOCH16, CDF_TIME_TT2000, CDF_CHAR, CDF_UCHAR
1516

1617
include("epochs.jl")
1718
include("enums.jl")

src/dataset.jl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ function CDFDataset(filename)
3333
buffer, compression = decompress_bytes(buffer, RecordSizeType)
3434
end
3535
# Parse CDF header
36-
cdr = CDR(buffer, 9, RecordSizeType)
36+
cdr = CDR(buffer, 8, RecordSizeType)
3737
gdr = GDR(buffer, cdr.gdr_offset, RecordSizeType)
3838
return CDFDataset{compression, RecordSizeType}(filename, cdr, gdr, buffer)
3939
end
@@ -63,10 +63,11 @@ function find_vdr(cdf::CDFDataset, var_name::String)
6363
gdr = GDR(cdf)
6464
RecordSizeType = recordsize_type(cdf)
6565
buffer = cdf.buffer
66+
var_name_bytes = codeunits(var_name)
6667
for current_offset in (gdr.rVDRhead, gdr.zVDRhead)
6768
while current_offset != 0
6869
vdr = zVDR(buffer, current_offset, RecordSizeType)
69-
if String(vdr.name) == var_name
70+
if vdr.name == var_name_bytes
7071
return vdr
7172
end
7273
current_offset = vdr.vdr_next

src/epochs.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ struct TT2000 <: CDFDateTime
4545
value::Int64
4646
end
4747

48+
fillvalue(::Epoch) = -1.0e31
49+
fillvalue(::Epoch16) = -1.0e31
50+
fillvalue(::TT2000) = 9999
51+
4852
# Conversion to DateTime
4953
function Dates.DateTime(epoch::Epoch)
5054
return DateTime(0) + Millisecond(round(Int64, epoch.value))
@@ -88,7 +92,16 @@ for f in (:year, :month, :day, :hour, :minute, :second, :millisecond)
8892
@eval Dates.$f(epoch::CDFDateTime) = Dates.$f(DateTime(epoch))
8993
end
9094

91-
Base.show(io::IO, epoch::CDFDateTime) = print(io, typeof(epoch), "(", DateTime(epoch), ")")
95+
Dates.value(epoch::CDFDateTime) = epoch.value
96+
97+
function Base.show(io::IO, epoch::CDFDateTime)
98+
fillval = fillvalue(epoch)
99+
if fillval == epoch.value
100+
print(io, "FILLVAL")
101+
else
102+
print(io, DateTime(epoch))
103+
end
104+
end
92105
Base.promote_rule(::Type{<:CDFDateTime}, ::Type{Dates.DateTime}) = Dates.DateTime
93106
Base.convert(::Type{Dates.DateTime}, x::CDFDateTime) = Dates.DateTime(x)
94107
Base.bswap(x::T) where {T <: CDFDateTime} = T(Base.bswap(x.value))

src/loading/variable.jl

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,17 @@ function load_variable_data(source, vxr_head, ::Type{T}, dims, ::Type{RecordSize
3737
return reshape(data, dims)
3838
end
3939

40-
function read_variable_data!(data::Vector{T}, source, vvrs, compression, record_size, RecordSizeType; nbuffers = nthreads()) where {T}
40+
function read_variable_data!(data::Vector{T}, source, vvrs, compression, record_size, ::Type{FieldSizeT}; nbuffers = nthreads()) where {T, FieldSizeT}
4141
pos = 1
4242
if compression == NoCompression || first(vvrs).RecordType == VVR_ # vvr records is the ultimative source
4343
for entry in vvrs
4444
N = min(length(data) - pos + 1, length(entry) * record_size)
45-
load_vvr_data!(data, pos, source, entry.offset, N, RecordSizeType)
45+
load_vvr_data!(data, pos, source, entry.offset, N, FieldSizeT)
4646
pos += N
4747
end
48+
elseif length(vvrs) == 1
49+
load_cvvr_data!(data, 1, source, vvrs[1].offset, length(data), FieldSizeT, compression)
50+
pos = length(data) + 1
4851
else
4952
n_ch = min(nbuffers, length(vvrs))
5053
chnl = Channel{Decompressor}(n_ch)
@@ -54,7 +57,7 @@ function read_variable_data!(data::Vector{T}, source, vvrs, compression, record_
5457
Base.@inbounds Threads.@threads for i in eachindex(vvrs)
5558
decompressor = take!(chnl)
5659
N = Ns[i]
57-
load_cvvr_data!(data, positions[i] + 1, source, vvrs[i].offset, N, RecordSizeType, compression; decompressor)
60+
load_cvvr_data!(data, positions[i] + 1, source, vvrs[i].offset, N, FieldSizeT, compression; decompressor)
5861
put!(chnl, decompressor)
5962
end
6063
pos = positions[end] + 1
@@ -69,17 +72,18 @@ function read_vvrs(src, vxr_head, RecordSizeType)
6972
return entries
7073
end
7174

72-
function collect_vxr_entries!(entries::Vector{VVREntry}, src::Vector{UInt8}, offset, RecordSizeType)
75+
function collect_vxr_entries!(entries::Vector{VVREntry}, src::Vector{UInt8}, offset, ::Type{FieldSizeT}) where FieldSizeT
7376
while offset != 0
74-
vxr = VXR(src, offset, RecordSizeType)
75-
foreach(vxr.first, vxr.last, vxr.offset) do first_rec, last_rec, raw_offset
76-
leaf_offset = Int(raw_offset)
77-
record_type = Header(src, leaf_offset + 1, RecordSizeType).record_type
77+
78+
vxr = VXR(src, offset, FieldSizeT)
79+
for (first, last, offset) in vxr
80+
leaf_offset = Int(offset)
81+
record_type = Header(src, leaf_offset + 1, FieldSizeT).record_type
7882
@assert record_type in (VVR_, CVVR_, VXR_)
7983
if record_type == VXR_
80-
collect_vxr_entries!(entries, src, leaf_offset, RecordSizeType)
84+
collect_vxr_entries!(entries, src, leaf_offset, FieldSizeT)
8185
else
82-
push!(entries, VVREntry(record_type, first_rec, last_rec, leaf_offset))
86+
push!(entries, VVREntry(record_type, Int(first), Int(last), leaf_offset))
8387
end
8488
end
8589
offset = Int(vxr.vxr_next)

src/parsing.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
end
1515
end
1616

17+
@inline function read_be(p::Ptr{T}, i) where T
18+
return ntoh(unsafe_load(p + (i - 1) * sizeof(T)))
19+
end
20+
1721
@inline function read_be(v::Vector{UInt8}, i, n, T)
1822
S = sizeof(T)
1923
return ntuple(j -> read_be(v, i + (j - 1) * S, T), n)

src/records/cdr.jl

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ CDF Descriptor Record (CDR) - the main file header record
33
Contains version, encoding, format information, and pointer to GDR
44
"""
55
struct CDR <: Record
6-
header::Header
7-
gdr_offset::UInt64 # Can be UssInt32 for v2, UInt64 for v3
6+
# header::Header
7+
gdr_offset::UInt64 # Can be UInt32 for v2, UInt64 for v3
88
version::Int32
99
release::Int32
1010
encoding::Int32
@@ -13,7 +13,7 @@ struct CDR <: Record
1313
rfu_b::Int32 # Reserved field B
1414
increment::Int32
1515
identifier::Int32
16-
rfu_e::Int32 # Reserved field E
16+
# rfu_e::Int32 # Reserved field E
1717
# Note: copyright string follows but we'll handle it separately
1818
end
1919

@@ -22,17 +22,15 @@ Majority(cdr::CDR) = (cdr.flags & 0x01) != 0 ? Majority(0) : Majority(1) # Row=
2222
is_cdf_v3(cdr::CDR) = cdr.version == 3
2323

2424
"""
25-
CDR(buffer, pos, RecordSizeType)
25+
CDR(buffer, pos, FieldSizeT)
2626
2727
Load a CDF Descriptor Record from the IO stream at the specified offset.
2828
This follows the CDF specification for CDR record structure.
2929
"""
30-
@inline function CDR(buffer::Vector{UInt8}, pos, RecordSizeType)
31-
header = Header(buffer, pos, RecordSizeType)
32-
@assert header.record_type == 1 "Invalid CDR record type"
33-
pos += sizeof(RecordSizeType) + 4
30+
@inline function CDR(buffer::Vector{UInt8}, offset, FieldSizeT)
31+
pos = check_record_type(1, buffer, offset, FieldSizeT)
3432
# Read remaining CDR fields in order as per CDF specification
35-
gdr_offset, pos = read_be_i(buffer, pos, RecordSizeType)
36-
fields = read_be(buffer, pos, 9, Int32)
37-
return CDR(header, gdr_offset, fields...)
33+
gdr_offset, pos = read_be_i(buffer, pos, FieldSizeT)
34+
fields, pos = @read_be_fields(buffer, pos, fieldtypes(CDR)[2:end]...)
35+
return CDR(gdr_offset, fields...)
3836
end

0 commit comments

Comments
 (0)