Skip to content

Commit

Permalink
Cleanup integration w/ queryverse; provide Tables.DataValueUnwrapper …
Browse files Browse the repository at this point in the history
…for sinks to unwrap potentially DataValue-based NamedTuples (#16)
  • Loading branch information
quinnj authored Sep 11, 2018
1 parent d9100de commit 6a91583
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 28 deletions.
4 changes: 3 additions & 1 deletion src/Tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ export rowtable, columntable

function __init__()
@require DataValues="e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" include("datavalues.jl")
@require Query="1a8c2f83-1ff3-5112-b086-8aa67b057ba1" include("enumerable.jl")
@require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" begin
using .CategoricalArrays
allocatecolumn(::Type{CategoricalString{R}}, rows) where {R} = CategoricalArray{String, 1, R}(undef, rows)
Expand Down Expand Up @@ -178,4 +177,7 @@ include("namedtuples.jl")
# generic fallback definitions
include("fallbacks.jl")

# integration with queryverse
include("query.jl")

end # module
19 changes: 5 additions & 14 deletions src/datavalues.jl
Original file line number Diff line number Diff line change
@@ -1,23 +1,10 @@
using .DataValues

# DataValue-compatible row iteration for Data.Sources
# DataValue overloads for query.jl definitions
nondatavaluetype(::Type{DataValue{T}}) where {T} = Union{T, Missing}
nondatavaluetype(::Type{T}) where {T} = T
Base.@pure function nondatavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names}
TT = Tuple{Any[ nondatavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...}
return NamedTuple{names, TT}
end

unwrap(x) = x
unwrap(x::DataValue) = isna(x) ? missing : DataValues.unsafe_get(x)

datavaluetype(::Type{T}) where {T <: DataValue} = T
datavaluetype(::Type{T}) where {T} = T
datavaluetype(::Type{Union{T, Missing}}) where {T} = DataValue{T}
Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types}
TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...}
return NamedTuple{names, TT}
end

struct DataValueRowIterator{NT, S}
x::S
Expand All @@ -26,6 +13,7 @@ DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRo

"Returns a DataValue-based NamedTuple-iterator"
DataValueRowIterator(::Type{Schema{names, types}}, x::S) where {names, types, S} = DataValueRowIterator{datavaluetype(NamedTuple{names, types}), S}(x)

function datavaluerows(x)
r = Tables.rows(x)
#TODO: add support for unknown schema
Expand Down Expand Up @@ -55,6 +43,9 @@ function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: Nam
end
end

# Alternative lazy row implementation; currently though, Query.jl relies on
# being able to infer return types via all the type information of NamedTuples

# function Base.iterate(rows::DataValueRowIterator{NT}, st=()) where {NT}
# state = iterate(rows.x, st...)
# state === nothing && return nothing
Expand Down
25 changes: 16 additions & 9 deletions src/enumerable.jl → src/query.jl
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
using .Query
nondatavaluetype(::Type{T}) where {T} = T
datavaluetype(::Type{T}) where {T} = T

@static if isdefined(Query.QueryOperators, :Enumerable)
Base.@pure function nondatavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names}
TT = Tuple{Any[ nondatavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...}
return NamedTuple{names, TT}
end

import .Query.QueryOperators: Enumerable
Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types}
TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...}
return NamedTuple{names, TT}
end

Tables.istable(::Type{<:Enumerable}) = true
Tables.rowaccess(::Type{<:Enumerable}) = true
Tables.rows(e::Enumerable) = DataValueUnwrapper(e)
unwrap(x) = x

struct DataValueUnwrapper{S}
x::S
end

Tables.istable(::Type{<:DataValueUnwrapper}) = true
Tables.rowaccess(::Type{<:DataValueUnwrapper}) = true
Tables.rows(x::DataValueUnwrapper) = x

function Tables.schema(dv::DataValueUnwrapper)
eT = eltype(dv.x)
!(eT <: NamedTuple) && return nothing
Expand All @@ -34,6 +43,4 @@ end

Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm))
Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm))
Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1))

end # isdefined
Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1))
8 changes: 4 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ end

@test Tables.buildcolumns(nothing, rt) == nt
@test Tables.columntable(nothing, nt) == nt

# append
nt2 = columntable(nt, rt)
@test Tables.rowcount(nt2) == 6
Expand Down Expand Up @@ -195,19 +195,19 @@ end
rt2 = collect(dv)
@test rt2[1] == (a = 1, b = DataValue{Float64}(4.0), c = "7")

ei = QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv)
ei = Tables.DataValueUnwrapper(QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv))
nt = ei |> columntable
@test isequal(rt, nt)
rt3 = ei |> rowtable
@test isequal(rt |> rowtable, rt3)

# rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")]
mt = ei |> y->QueryOperators.map(y, x->(a=x.b, c=x.c), Expr(:block))
mt = Tables.DataValueUnwrapper(ei.x |> y->QueryOperators.map(y, x->(a=x.b, c=x.c), Expr(:block)))
@inferred (mt |> columntable)
@inferred (mt |> rowtable)

# uninferrable case
mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block))
mt = Tables.DataValueUnwrapper(ei.x |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block)))
@test (mt |> columntable) == (a = Real[1, 2.0, 3], c = ["7", "8", "9"])
@test length(mt |> rowtable) == 3
end

0 comments on commit 6a91583

Please sign in to comment.