diff --git a/Project.toml b/Project.toml index 5c5923a..25f40af 100644 --- a/Project.toml +++ b/Project.toml @@ -12,7 +12,6 @@ LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184" -StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294" [weakdeps] CommonDataModel = "1fbeeb36-5f17-413c-809b-666fb144f157" @@ -30,5 +29,4 @@ LibDeflate = "0.4.3" Mmap = "1" PrecompileTools = "1" ResumableFunctions = "1" -StaticStrings = "0.2.6" julia = "1.10" diff --git a/src/CommonDataFormat.jl b/src/CommonDataFormat.jl index 450c077..2e45784 100644 --- a/src/CommonDataFormat.jl +++ b/src/CommonDataFormat.jl @@ -5,7 +5,6 @@ using Mmap using Dictionaries: Dictionary using ResumableFunctions using DiskArrays -using StaticStrings: StaticString using Base.Threads using CodecZlib: GzipDecompressor, transcode using LibDeflate @@ -22,6 +21,7 @@ export is_record_varying include("epochs.jl") include("enums.jl") include("types.jl") +include("staticstring.jl") include("parsing.jl") include("decompress.jl") include("records/records.jl") diff --git a/src/enums.jl b/src/enums.jl index a597152..a8b1771 100644 --- a/src/enums.jl +++ b/src/enums.jl @@ -67,5 +67,5 @@ const type_map = Dict( function julia_type(cdf_type, num_elems) cdf_type = DataType(cdf_type) - return cdf_type in (CDF_CHAR, CDF_UCHAR) ? StaticString{Int(num_elems)} : type_map[cdf_type] + return cdf_type in (CDF_CHAR, CDF_UCHAR) ? StaticString{Int(num_elems), UInt8} : type_map[cdf_type] end diff --git a/src/epochs.jl b/src/epochs.jl index cdd2e07..129c21d 100644 --- a/src/epochs.jl +++ b/src/epochs.jl @@ -149,6 +149,8 @@ function Base.show(io::IO, epoch::Epoch16) end Base.promote_rule(::Type{<:CDFDateTime}, ::Type{Dates.DateTime}) = Dates.DateTime +Base.promote_rule(::Type{T}, ::Type{Dates.Date}) where {T <: CDFDateTime} = T +# Comment out because of invalidation Base.convert(::Type{Dates.DateTime}, x::CDFDateTime) = Dates.DateTime(x) Base.bswap(x::Epoch) = Epoch(Base.bswap(x.instant)) Base.bswap(x::Epoch16) = Epoch16(Base.bswap(x.seconds), Base.bswap(x.picoseconds)) diff --git a/src/parsing.jl b/src/parsing.jl index b250251..cb261c1 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -164,7 +164,7 @@ function validate_cdf_magic(magic_bytes) end _btye_swap!(data) = map!(ntoh, data, data) -_btye_swap!(data::AbstractArray{StaticString{N}}) where {N} = data +_btye_swap!(data::AbstractArray{<:StaticString{N}}) where {N} = data # function _btye_swap!(data::AbstractArray{TT2000}) # rd = reinterpret(Int64, data) # return map!(ntoh, rd, rd) diff --git a/src/staticstring.jl b/src/staticstring.jl new file mode 100644 index 0000000..a38d244 --- /dev/null +++ b/src/staticstring.jl @@ -0,0 +1,61 @@ +# https://github.com/mkitti/StaticStrings.jl +# https://github.com/JuliaPy/PythonCall.jl/blob/main/src/Utils/Utils.jl + +struct StaticString{N, T} <: AbstractString + codeunits::NTuple{N, T} + StaticString{N, T}(codeunits::NTuple{N, T}) where {N, T} = new{N, T}(codeunits) +end + +function Base.iterate(x::StaticString{N, UInt8}, i::Int = 1) where {N} + i > N && return + cs = x.codeunits + c = @inbounds cs[i] + if all(iszero, (cs[j] for j in i:N)) + return + elseif (c & 0x80) == 0x00 + return (reinterpret(Char, UInt32(c) << 24), i + 1) + elseif (c & 0x40) == 0x00 + nothing + elseif (c & 0x20) == 0x00 + if @inbounds (i ≤ N - 1) && ((cs[i + 1] & 0xC0) == 0x80) + return ( + reinterpret(Char, (UInt32(cs[i]) << 24) | (UInt32(cs[i + 1]) << 16)), + i + 2, + ) + end + elseif (c & 0x10) == 0x00 + if @inbounds (i ≤ N - 2) && ((cs[i + 1] & 0xC0) == 0x80) && ((cs[i + 2] & 0xC0) == 0x80) + return ( + reinterpret( + Char, + (UInt32(cs[i]) << 24) | + (UInt32(cs[i + 1]) << 16) | + (UInt32(cs[i + 2]) << 8), + ), + i + 3, + ) + end + elseif (c & 0x08) == 0x00 + if @inbounds (i ≤ N - 3) && + ((cs[i + 1] & 0xC0) == 0x80) && + ((cs[i + 2] & 0xC0) == 0x80) && + ((cs[i + 3] & 0xC0) == 0x80) + return ( + reinterpret( + Char, + (UInt32(cs[i]) << 24) | + (UInt32(cs[i + 1]) << 16) | + (UInt32(cs[i + 2]) << 8) | + UInt32(cs[i + 3]), + ), + i + 4, + ) + end + end + throw(StringIndexError(x, i)) +end + +function Base.String(x::StaticString{N, T}) where {N, T} + b = Base.StringVector(N) + return String(b .= x.codeunits) +end diff --git a/test/comprehensive_test.jl b/test/comprehensive_test.jl index 9323938..fe640b5 100644 --- a/test/comprehensive_test.jl +++ b/test/comprehensive_test.jl @@ -1,3 +1,5 @@ +# Comprehensive test based on the Python CDFpp test.py +# Tests all variables in a_cdf.cdf for expected shapes, types, values, and attributes using Test using CommonDataFormat import CommonDataFormat as CDF @@ -5,11 +7,6 @@ using Dates include("utils.jl") -""" -Comprehensive test based on the Python CDFpp test.py -Tests all variables in a_cdf.cdf for expected shapes, types, values, and attributes -""" - # Expected variable definitions (translated from Python test.py) const EXPECTED_VARIABLES = Dict( "epoch" => ( @@ -129,6 +126,12 @@ ds = CDFDataset(file) @test Set(keys(ds)) == Set(keys(EXPECTED_VARIABLES)) end +@testset "StaticString" begin + using CommonDataFormat: StaticString + @test typeof(ds["var_string"][1]) == StaticString{16, UInt8} + @test String(ds["var_string"][1]) == "This is a string" +end + @testset "DateTime Conversions" begin for var in ("epoch", "epoch16", "tt2000") @testset "Variable: $var" begin