From 6a9158306cd44ecffc57ed480eed109ec082110a Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 11 Sep 2018 06:20:12 -0600 Subject: [PATCH] Cleanup integration w/ queryverse; provide Tables.DataValueUnwrapper for sinks to unwrap potentially DataValue-based NamedTuples (#16) --- src/Tables.jl | 4 +++- src/datavalues.jl | 19 +++++-------------- src/{enumerable.jl => query.jl} | 25 ++++++++++++++++--------- test/runtests.jl | 8 ++++---- 4 files changed, 28 insertions(+), 28 deletions(-) rename src/{enumerable.jl => query.jl} (58%) diff --git a/src/Tables.jl b/src/Tables.jl index 373a472..36b0b2b 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -6,7 +6,6 @@ export rowtable, columntable function __init__() @require DataValues="e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" include("datavalues.jl") - @require Query="1a8c2f83-1ff3-5112-b086-8aa67b057ba1" include("enumerable.jl") @require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" begin using .CategoricalArrays allocatecolumn(::Type{CategoricalString{R}}, rows) where {R} = CategoricalArray{String, 1, R}(undef, rows) @@ -178,4 +177,7 @@ include("namedtuples.jl") # generic fallback definitions include("fallbacks.jl") +# integration with queryverse +include("query.jl") + end # module diff --git a/src/datavalues.jl b/src/datavalues.jl index 76bf6e2..5f6215c 100644 --- a/src/datavalues.jl +++ b/src/datavalues.jl @@ -1,23 +1,10 @@ using .DataValues -# DataValue-compatible row iteration for Data.Sources +# DataValue overloads for query.jl definitions nondatavaluetype(::Type{DataValue{T}}) where {T} = Union{T, Missing} -nondatavaluetype(::Type{T}) where {T} = T -Base.@pure function nondatavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names} - TT = Tuple{Any[ nondatavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...} - return NamedTuple{names, TT} -end - -unwrap(x) = x unwrap(x::DataValue) = isna(x) ? missing : DataValues.unsafe_get(x) - datavaluetype(::Type{T}) where {T <: DataValue} = T -datavaluetype(::Type{T}) where {T} = T datavaluetype(::Type{Union{T, Missing}}) where {T} = DataValue{T} -Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types} - TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...} - return NamedTuple{names, TT} -end struct DataValueRowIterator{NT, S} x::S @@ -26,6 +13,7 @@ DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRo "Returns a DataValue-based NamedTuple-iterator" DataValueRowIterator(::Type{Schema{names, types}}, x::S) where {names, types, S} = DataValueRowIterator{datavaluetype(NamedTuple{names, types}), S}(x) + function datavaluerows(x) r = Tables.rows(x) #TODO: add support for unknown schema @@ -55,6 +43,9 @@ function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: Nam end end +# Alternative lazy row implementation; currently though, Query.jl relies on +# being able to infer return types via all the type information of NamedTuples + # function Base.iterate(rows::DataValueRowIterator{NT}, st=()) where {NT} # state = iterate(rows.x, st...) # state === nothing && return nothing diff --git a/src/enumerable.jl b/src/query.jl similarity index 58% rename from src/enumerable.jl rename to src/query.jl index a20c11b..1730dd4 100644 --- a/src/enumerable.jl +++ b/src/query.jl @@ -1,17 +1,26 @@ -using .Query +nondatavaluetype(::Type{T}) where {T} = T +datavaluetype(::Type{T}) where {T} = T -@static if isdefined(Query.QueryOperators, :Enumerable) +Base.@pure function nondatavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names} + TT = Tuple{Any[ nondatavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...} + return NamedTuple{names, TT} +end -import .Query.QueryOperators: Enumerable +Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types} + TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...} + return NamedTuple{names, TT} +end -Tables.istable(::Type{<:Enumerable}) = true -Tables.rowaccess(::Type{<:Enumerable}) = true -Tables.rows(e::Enumerable) = DataValueUnwrapper(e) +unwrap(x) = x struct DataValueUnwrapper{S} x::S end +Tables.istable(::Type{<:DataValueUnwrapper}) = true +Tables.rowaccess(::Type{<:DataValueUnwrapper}) = true +Tables.rows(x::DataValueUnwrapper) = x + function Tables.schema(dv::DataValueUnwrapper) eT = eltype(dv.x) !(eT <: NamedTuple) && return nothing @@ -34,6 +43,4 @@ end Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm)) Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm)) -Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1)) - -end # isdefined \ No newline at end of file +Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1)) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 6fc5c91..4363ea7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -87,7 +87,7 @@ end @test Tables.buildcolumns(nothing, rt) == nt @test Tables.columntable(nothing, nt) == nt - + # append nt2 = columntable(nt, rt) @test Tables.rowcount(nt2) == 6 @@ -195,19 +195,19 @@ end rt2 = collect(dv) @test rt2[1] == (a = 1, b = DataValue{Float64}(4.0), c = "7") - ei = QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv) + ei = Tables.DataValueUnwrapper(QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv)) nt = ei |> columntable @test isequal(rt, nt) rt3 = ei |> rowtable @test isequal(rt |> rowtable, rt3) # rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")] - mt = ei |> y->QueryOperators.map(y, x->(a=x.b, c=x.c), Expr(:block)) + mt = Tables.DataValueUnwrapper(ei.x |> y->QueryOperators.map(y, x->(a=x.b, c=x.c), Expr(:block))) @inferred (mt |> columntable) @inferred (mt |> rowtable) # uninferrable case - mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block)) + mt = Tables.DataValueUnwrapper(ei.x |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block))) @test (mt |> columntable) == (a = Real[1, 2.0, 3], c = ["7", "8", "9"]) @test length(mt |> rowtable) == 3 end \ No newline at end of file