diff --git a/Project.toml b/Project.toml index 4b976c86..d95b6b3d 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ authors = ["Invenia Technical Computing"] version = "1.5.0" [deps] +ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" @@ -12,6 +13,7 @@ Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] +ArrowTypes = "1.2" Documenter = "0.23, 0.24" Infinity = "0.2.3" RecipesBase = "0.7, 0.8, 1" @@ -19,6 +21,7 @@ TimeZones = "0.7, 0.8, 0.9, 0.10, 0.11, 1" julia = "1" [extras] +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" ImageMagick = "6218d12a-5da1-5696-b52f-db25d2ecc6d1" Infinity = "a303e19e-6eb4-11e9-3b09-cd9505f79100" @@ -27,4 +30,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" VisualRegressionTests = "34922c18-7c2a-561c-bac1-01e79b2c4c92" [targets] -test = ["Documenter", "ImageMagick", "Infinity", "Plots", "Test", "VisualRegressionTests"] +test = ["Arrow", "Documenter", "ImageMagick", "Infinity", "Plots", "Test", "VisualRegressionTests"] diff --git a/src/Intervals.jl b/src/Intervals.jl index 0c606436..91f58e67 100644 --- a/src/Intervals.jl +++ b/src/Intervals.jl @@ -1,5 +1,6 @@ module Intervals +using ArrowTypes: ArrowTypes, JuliaType, arrowname using Dates using Printf using RecipesBase @@ -34,6 +35,7 @@ include("plotting.jl") include("docstrings.jl") include("deprecated.jl") include("compat.jl") +include("arrow.jl") export Bound, Closed, diff --git a/src/arrow.jl b/src/arrow.jl new file mode 100644 index 00000000..f39f21cb --- /dev/null +++ b/src/arrow.jl @@ -0,0 +1,79 @@ +for T in (Closed, Open, Unbounded) + name = QuoteNode(Symbol("JuliaLang.Intervals.$(string(T))")) + + @eval begin + ArrowTypes.arrowname(::Type{$T}) = $name + ArrowTypes.JuliaType(::Val{$name}) = $T + end +end + +# Use a more efficient Arrow serialization when a vector uses a concrete element type +let name = Symbol("JuliaLang.Intervals.Interval{T,L,R}") + ArrowTypes.arrowname(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} = name + function ArrowTypes.ArrowType(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} + return Interval{T,L,R} + end + function ArrowTypes.arrowmetadata(::Type{Interval{T,L,R}}) where {T, L <: Bound, R <: Bound} + return join(arrowname.([L, R]), ",") + end + function ArrowTypes.JuliaType(::Val{name}, ::Type{NamedTuple{(:first, :last), Tuple{T, T}}}, meta) where T + L, R = ArrowTypes.JuliaType.(Val.(Symbol.(split(meta, ",")))) + return Interval{T,L,R} + end + function ArrowTypes.fromarrow(::Type{Interval{T,L,R}}, left, right) where {T, L <: Bound, R <: Bound} + return Interval{T,L,R}( + L === Unbounded ? nothing : left, + R === Unbounded ? nothing : right, + ) + end +end + +# A less efficient Arrow serialization format for when a vector contains non-concrete element types +let name = Symbol("JuliaLang.Intervals.Interval{T}") + ArrowTypes.arrowname(::Type{<:Interval{T}}) where T = name + function ArrowTypes.ArrowType(::Type{<:Interval{T}}) where T + return NamedTuple{(:left, :right), Tuple{Tuple{String, T}, Tuple{String, T}}} + end + function ArrowTypes.toarrow(x::Interval{T}) where T + L, R = bounds_types(x) + return (; left=(string(arrowname(L)), x.first), right=(string(arrowname(R)), x.last)) + end + function ArrowTypes.JuliaType(::Val{name}, ::Type{NamedTuple{names, types}}) where {names, types} + T = fieldtype(fieldtype(types, 1), 2) + return Interval{T} + end + function ArrowTypes.fromarrow(::Type{Interval{T}}, left, right) where T + L = ArrowTypes.JuliaType(Val(Symbol(left[1]))) + R = ArrowTypes.JuliaType(Val(Symbol(right[1]))) + return Interval{T,L,R}( + L === Unbounded ? nothing : left[2], + R === Unbounded ? nothing : right[2], + ) + end +end + +# Note: The type returnedy by the `ArrowType` function is not passed into the `JuliaType` +# function. Instead the result of `typeof(toarrow(...))` is passed into `JuliaType`. +# To reproduce this use an isbits object as a type parameter in `ArrowType`. + +# An inefficient Arrow serialization format which supports non-concrete element types +let name = Symbol("JuliaLang.Intervals.AnchoredInterval{P,T}") + ArrowTypes.arrowname(::Type{<:AnchoredInterval{P,T}}) where {P,T} = name + function ArrowTypes.ArrowType(::Type{<:AnchoredInterval{P,T}}) where {P,T} + return NamedTuple{(:anchor,), Tuple{Tuple{typeof(P), T, String, String}}} + end + function ArrowTypes.toarrow(x::AnchoredInterval{P,T}) where {P,T} + L, R = bounds_types(x) + return (; anchor=(P, x.anchor, string(arrowname(L)), string(arrowname(R)))) + end + function ArrowTypes.JuliaType(::Val{name}) + return AnchoredInterval + end + function ArrowTypes.fromarrow(::Type{AnchoredInterval}, anchor) + P = anchor[1] + T = typeof(anchor[2]) # Note: Arrow can't access the original `T` anyway + L = ArrowTypes.JuliaType(Val(Symbol(anchor[3]))) + R = ArrowTypes.JuliaType(Val(Symbol(anchor[4]))) + return AnchoredInterval{P,T,L,R}(anchor[2]) + end +end diff --git a/test/arrow.jl b/test/arrow.jl new file mode 100644 index 00000000..aef2c775 --- /dev/null +++ b/test/arrow.jl @@ -0,0 +1,38 @@ +@testset "Arrow support" begin + @testset "Interval (concrete)" begin + col = [Interval{Closed,Unbounded}(1, nothing)] + + table = (; col) + t = Arrow.Table(Arrow.tobuffer(table)) + + @test eltype(t.col) == Interval{Int, Closed, Unbounded} + @test t.col == col + end + + @testset "Interval (non-concrete)" begin + col = [ + Interval{Closed, Closed}(1, 2), + Interval{Closed, Open}(2, 3), + Interval{Unbounded, Open}(nothing, 4), + ] + + table = (; col) + t = Arrow.Table(Arrow.tobuffer(table)) + + @test eltype(t.col) == Interval{Int} + @test t.col == col + end + + @testset "AnchoredInterval" begin + zdt_start = ZonedDateTime(2016, 8, 11, 1, tz"America/Winnipeg") + zdt_end = ZonedDateTime(2016, 8, 12, 0, tz"America/Winnipeg") + col = HE.(zdt_start:Hour(1):zdt_end) + + table = (; col) + t = Arrow.Table(Arrow.tobuffer(table)) + + # Arrow.jl converts all Period types into Second + @test_broken eltype(t.col) == HourEnding{ZonedDateTime, Open, Closed} + @test t.col == col + end +end diff --git a/test/runtests.jl b/test/runtests.jl index c40d1f4e..d088c065 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,3 +1,5 @@ +VERSION >= v"1.3" && using Arrow + using Base.Iterators: product using Dates using Documenter: doctest @@ -20,6 +22,12 @@ include("test_utils.jl") include("comparisons.jl") include("plotting.jl") + if VERSION >= v"1.3" + include("arrow.jl") + else + @warn "Skipping Arrow.jl support tests" + end + # Note: The output of the doctests currently requires a newer version of Julia # https://github.com/JuliaLang/julia/pull/34387 # The doctests fail on x86, so only run them on 64-bit hardware