diff --git a/Project.toml b/Project.toml index 4842d66..be4727a 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,7 @@ Dictionaries = "85a47980-9c8c-11e8-2b9f-f7ca1fa99fb4" DiskArrays = "3c3547ce-8d99-4f5e-a174-61eb10b00ae3" LibDeflate = "9255714d-24a7-4b30-8ea3-d46a97f7e13b" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" ResumableFunctions = "c5292f4c-5179-55e1-98c5-05642aab7184" StaticStrings = "4db0a0c5-418a-4e1d-8806-cb305fe13294" @@ -27,6 +28,7 @@ Dictionaries = "0.4" DiskArrays = "0.4" LibDeflate = "0.4.3" Mmap = "1" +PrecompileTools = "1" ResumableFunctions = "1" StaticStrings = "0.2.6" julia = "1.10" diff --git a/justfile b/justfile new file mode 100644 index 0000000..f5d993d --- /dev/null +++ b/justfile @@ -0,0 +1,17 @@ +perf: + #!/usr/bin/env -S julia --threads=auto --project=. + @time using CommonDataFormat + elx_file = "data/elb_l2_epdef_20210914_v01.cdf" + @time ds = CDFDataset(elx_file) + @time var = ds["elb_pef_hs_Epat_eflux"] + @time Array(var) + @time var2 = ds["elb_pef_hs_epa_spec"] + @time Array(var2) + @time Array(ds["elb_pef_fs_time"]) + +snoop: + #!/usr/bin/env -S julia --threads=auto --project=. -i + using SnoopCompileCore + invs = @snoop_invalidations using CommonDataFormat + using SnoopCompile, AbstractTrees + trees = invalidation_trees(invs) \ No newline at end of file diff --git a/src/CommonDataFormat.jl b/src/CommonDataFormat.jl index bfd45ca..450c077 100644 --- a/src/CommonDataFormat.jl +++ b/src/CommonDataFormat.jl @@ -10,6 +10,7 @@ using Base.Threads using CodecZlib: GzipDecompressor, transcode using LibDeflate using LibDeflate: GzipDecompressResult +using PrecompileTools export CDFDataset, CDFVariable export Majority, CompressionType, DataType @@ -29,5 +30,6 @@ include("vattribute.jl") include("dataset.jl") include("loading/attribute.jl") include("loading/variable.jl") +include("precompile.jl") -end +end \ No newline at end of file diff --git a/src/epochs.jl b/src/epochs.jl index be71466..d25d853 100644 --- a/src/epochs.jl +++ b/src/epochs.jl @@ -6,6 +6,7 @@ # 3. CDF_TIME_TT2000 (TT2000 as short) is nanoseconds since J2000 with leap seconds import Base: promote_rule, -, + +using Dates: value, toms, tons include("leap_second.jl") @@ -14,6 +15,20 @@ const EPOCH_OFFSET_SECONDS = 62167219200.0 # Seconds from year 0 to Unix epoch abstract type CDFDateTime <: Dates.AbstractDateTime end +struct Picosecond <: Period + value::Float64 +end + +Picosecond(ns::Nanosecond) = convert(Picosecond, ns) +Picosecond(p::Period) = Picosecond(Nanosecond(p)) +Base.convert(::Type{Nanosecond}, x::Picosecond) = Nanosecond(round(Int64, x.value / 1.0e3)) +Base.convert(::Type{Picosecond}, x::Nanosecond) = Picosecond(x.value * 1.0e3) +Base.convert(::Type{Picosecond}, x::Dates.FixedPeriod) = Picosecond(Nanosecond(x)) +Base.promote_rule(::Type{Picosecond}, ::Type{<:Dates.FixedPeriod}) = Picosecond + +Dates._units(x::Picosecond) = " picosecond" * (abs(value(x)) == 1 ? "" : "s") + + """ Epoch @@ -56,8 +71,11 @@ fillvalue(::Epoch16) = -1.0e31 fillvalue(::TT2000) = 9999 (-)(epoch::Epoch, other::Epoch) = Millisecond(round(Int64, epoch.instant - other.instant)) -(+)(tt2000::TT2000, other::Period) = TT2000(tt2000.instant.value + Dates.tons(other)) -(+)(epoch::Epoch, other::Period) = Epoch(epoch.instant + Dates.toms(other)) +(-)(epoch::Epoch16, other::Epoch16) = Picosecond((epoch.seconds - other.seconds) * 1.0e12 + epoch.picoseconds - other.picoseconds) +(+)(tt2000::TT2000, other::Period) = TT2000(value(tt2000) + tons(other)) +(-)(tt2000::TT2000, other::Period) = TT2000(value(tt2000) - tons(other)) +(+)(epoch::Epoch, other::Period) = Epoch(value(epoch) + toms(other)) +(-)(epoch::Epoch, other::Period) = Epoch(value(epoch) - toms(other)) # Conversion to DateTime function Dates.DateTime(epoch::Epoch) @@ -66,8 +84,8 @@ end function Dates.DateTime(epoch::Epoch16) s_since_unix = epoch.seconds - EPOCH_OFFSET_SECONDS - total_ns = s_since_unix * 1.0e9 + epoch.picoseconds / 1000.0 - return DateTime(1970) + Nanosecond(round(Int64, total_ns)) + total_ms = s_since_unix * 1.0e3 + epoch.picoseconds / 1.0e9 + return DateTime(1970) + Millisecond(round(Int64, total_ms)) end function Dates.DateTime(epoch::TT2000) @@ -79,10 +97,10 @@ end # Conversion from TimeType function Epoch16(dt::DateTime) - ns_since_unix = (dt - DateTime(1970, 1, 1)).value * 1_000_000 # DateTime precision is milliseconds - s_since_unix = ns_since_unix / 1.0e9 + ms_since_unix = value(dt - DateTime(1970, 1, 1)) + s_since_unix = div(ms_since_unix, 1000) s_total = s_since_unix + EPOCH_OFFSET_SECONDS - ps_component = (ns_since_unix % 1.0e9) * 1000.0 # Convert nanoseconds remainder to picoseconds + ps_component = rem(ms_since_unix, 1000) * 1000000000 # Convert nanoseconds remainder to picoseconds return Epoch16(s_total, ps_component) end @@ -102,11 +120,12 @@ for f in (:year, :month, :day, :hour, :minute, :second, :millisecond) @eval Dates.$f(epoch::CDFDateTime) = Dates.$f(DateTime(epoch)) end -Dates.value(epoch::CDFDateTime) = epoch.instant +Dates.value(epoch::Epoch) = epoch.instant +Dates.value(epoch::Epoch16) = ComplexF64(epoch.seconds, epoch.picoseconds) Dates.value(epoch::TT2000) = epoch.instant.value function Base.floor(x::T, p::Union{DatePeriod, TimePeriod}) where {T <: CDFDateTime} - convert(T, floor(convert(DateTime, x), p)) + return convert(T, floor(convert(DateTime, x), p)) end function Base.show(io::IO, epoch::CDFDateTime) @@ -117,7 +136,12 @@ function Base.show(io::IO, epoch::CDFDateTime) print(io, DateTime(epoch)) end end +function Base.show(io::IO, epoch::Epoch16) + return print(io, DateTime(epoch)) +end + Base.promote_rule(::Type{<:CDFDateTime}, ::Type{Dates.DateTime}) = Dates.DateTime Base.convert(::Type{Dates.DateTime}, x::CDFDateTime) = Dates.DateTime(x) -Base.bswap(x::T) where {T <: CDFDateTime} = T(Base.bswap(x.instant)) +Base.bswap(x::Epoch) = Epoch(Base.bswap(x.instant)) +Base.bswap(x::Epoch16) = Epoch16(Base.bswap(x.seconds), Base.bswap(x.picoseconds)) Base.bswap(x::TT2000) = TT2000(Base.bswap(x.instant.value)) diff --git a/src/parsing.jl b/src/parsing.jl index 18a159f..ac3dc8d 100644 --- a/src/parsing.jl +++ b/src/parsing.jl @@ -25,7 +25,7 @@ end end @inline function read_be_i(v::Vector{UInt8}, i, T::Base.DataType) - return read_be(v, i, T), i + sizeof(T) + return read_be(v, i, T), i + _sizeof(T) end @inline function read_be_i(v::Vector{UInt8}, i, n::Integer, T) @@ -61,7 +61,7 @@ macro read_be_fields(buffer, pos, Ts...) for (sym, T) in zip(value_syms, types) Tesc = esc(T) push!(stmts, :(local $sym = read_be($buf, $pos_sym, $Tesc))) - push!(stmts, :($pos_sym += sizeof($Tesc))) + push!(stmts, :($pos_sym += _sizeof($Tesc))) end tuple_expr = Expr(:tuple, value_syms...) @@ -83,7 +83,7 @@ end for (i, idx) in enumerate(indxs) T = fieldtype(SType, idx) push!(exprs, :(local $(value_syms[i]) = read_be(buffer, $pos_sym, $T))) - push!(exprs, :($pos_sym += sizeof($T))) + push!(exprs, :($pos_sym += _sizeof($T))) end # Return tuple of values and final position diff --git a/src/precompile.jl b/src/precompile.jl new file mode 100644 index 0000000..361b587 --- /dev/null +++ b/src/precompile.jl @@ -0,0 +1,12 @@ +precompile(Array, (CDFVariable{TT2000, 1, VDR{Int64}, CDFDataset{NoCompression, Int64}},)) +for T in (Float32, Float64), i in 1:3 + precompile(Array, (CDFVariable{T, i, VDR{Int64}, CDFDataset{NoCompression, Int64}},)) +end + +PrecompileTools.@setup_workload begin + elx_file = joinpath(@__DIR__, "../data/elb_l2_epdef_20210914_v01.cdf") + + PrecompileTools.@compile_workload begin + ds = CDFDataset(elx_file) + end +end diff --git a/src/types.jl b/src/types.jl index f1d9677..b84e9a2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -2,4 +2,6 @@ abstract type Record end abstract type ReservedField end struct RInt32 <: ReservedField end -Base.sizeof(::Type{RInt32}) = sizeof(Int32) + +_sizeof(x) = sizeof(x) +_sizeof(::Type{RInt32}) = sizeof(Int32) diff --git a/test/epochs_test.jl b/test/epochs_test.jl index 05fa243..3f525c7 100644 --- a/test/epochs_test.jl +++ b/test/epochs_test.jl @@ -1,25 +1,49 @@ using Test using CommonDataFormat +import CommonDataFormat as CDF using Dates @testset "Epochs" begin - @test Epoch(DateTime(0)) == Epoch(0) + t = Epoch(DateTime(0)) + @test t == Epoch(0) @test DateTime(Epoch(DateTime(0))) == DateTime(0) @test Epoch(Epoch(0)) == Epoch(0) @test Epoch(10) - Epoch(0) == Millisecond(10) @test string(Epoch(-1.0e31)) == "FILLVAL" + @test Epoch(10) - Millisecond(10) == Epoch(0) + @test Epoch(0) + Second(1) == Epoch(1000) + @test ntoh(hton(t)) == t # @test Epoch16(DateTime(0)) == Epoch16(0, 0) end @testset "TT2000" begin - @test DateTime(TT2000(DateTime(2000))) == DateTime(2000) + t = TT2000(DateTime(2000)) + @test DateTime(t) == DateTime(2000) @test TT2000(DateTime(TT2000(0))) == TT2000(0) @test TT2000(TT2000(0)) == TT2000(0) @test TT2000(10) - TT2000(0) == Nanosecond(10) + @test t - Day(1) == DateTime(1999, 12, 31) @test floor(TT2000(0), Minute(1)) == DateTime(2000, 1, 1, 11, 58) @test TT2000(0) + Minute(1) == TT2000(60_000_000_000) @test string(TT2000(0)) == "2000-01-01T11:58:55.816" @test TT2000(0) == TT2000(0) |> bswap @test TT2000(0) == DateTime("2000-01-01T11:58:55.816") -end \ No newline at end of file +end + +@testset "Epoch16" begin + t = Epoch16(6.377810224e10, 8.97e11) + @test t == DateTime(2021, 1, 17, 11, 30, 40, 897) + @test Epoch16(DateTime(t)) == t + @test string(t) == "2021-01-17T11:30:40.897" + @test ntoh(hton(t)) == t + @test Epoch16(6.377810224e10, 8.97e11) - Epoch16(6.377810224e10, 0) == CDF.Picosecond(8.97e11) +end + +@testset "Picosecond" begin + @test CDF.Picosecond(1) == CDF.Picosecond(1) + @test Nanosecond(CDF.Picosecond(Nanosecond(1000))) == Nanosecond(1000) + @test string(CDF.Picosecond(1)) == "1.0 picosecond" + @test CDF.Picosecond(Millisecond(1)) == CDF.Picosecond(1.0e9) + @test CDF.Picosecond(1.0e9) == Millisecond(1) +end diff --git a/test/perf_test.jl b/test/perf_test.jl index 1df756f..c1ea314 100644 --- a/test/perf_test.jl +++ b/test/perf_test.jl @@ -19,8 +19,9 @@ b2= @b full_load(elx_file) evals=2 mms_file = data_path(".mms1_scm_srvy_l2_scsrvy_20190301_v2.2.0.cdf") ds = CDFDataset(mms_file) -sum(ds["mms1_scm_acb_gse_scsrvy_srvy_l2"]) -sum(ds["mms1_scm_acb_gse_scsrvy_srvy_l2"][:, 100:100000]) +var = ds["mms1_scm_acb_gse_scsrvy_srvy_l2"] +sum(var) +sum(var[:, 100:100000]) b30 = @b ds["mms1_scm_acb_gse_scsrvy_srvy_l2"] evals=20 b3= @b sum(Array(ds["mms1_scm_acb_gse_scsrvy_srvy_l2"])) evals=2 b4= @b sum(ds["mms1_scm_acb_gse_scsrvy_srvy_l2"][:, 100:100000]) evals=5 diff --git a/test/perf_test_ntoh.jl b/test/perf_test_ntoh.jl new file mode 100644 index 0000000..5e057a0 --- /dev/null +++ b/test/perf_test_ntoh.jl @@ -0,0 +1,41 @@ +a = rand(3, 100000) + +f1(a) = map!(ntoh, a, a) +f2(a) = a .= ntoh.(a) + +using Polyester + +function f3(a) + return @inbounds @simd for i in eachindex(a) + a[i] = ntoh(a[i]) + end +end + +function f4(a) + @batch for i in eachindex(a) + a[i] = ntoh(a[i]) + end + return a +end + +using Base.Threads + +function f5(a) + Threads.@threads for i in eachindex(a) + a[i] = ntoh(a[i]) + end + return a +end + +a = rand(3, 10000000) + +b1 = @b f1(a) evals = 10 +b2 = @b f2(a) evals = 10 +b3 = @b f3(a) evals = 10 +b4 = @b f4(a) evals = 10 +b5 = @b f5(a) evals = 10 + + +ds = CDFDataset("/Users/zijin/.cdaweb/data/THB_L2_FGM/thb_fgl_gseQ_thb_l2s_fgm_20210120000000_20210120235959_cdaweb.cdf") +var = ds["thb_fgl_epoch16"] +@b Array(var) diff --git a/test/runtests.jl b/test/runtests.jl index 8d8f507..5d1766d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -71,12 +71,17 @@ end @test var[1:3] == Float32[6.7, 6.7, 7.3] @test var["UNITS"] == "nT" @test var["FIELDNAM"] == "BR (RTN)" + + + @test ds["Epoch"][1] == DateTime(2024, 9, 1, 0, 0) + @test ntoh(hton(ds["Epoch"][1])) == DateTime(2024, 9, 1, 0, 0) + @test @allocations(ds["BR"]) <= 50 @info @allocated(ds.attrib) if VERSION >= v"1.12" @test @allocated(ds.attrib) <= 30000 else - @test @allocated(ds.attrib) <= 65000 + @test @allocated(ds.attrib) <= 70000 end end