From 4295d837db49373786a5d38c57b8433fcebcd65c Mon Sep 17 00:00:00 2001 From: quinnj Date: Wed, 5 Sep 2018 08:03:38 -0600 Subject: [PATCH] Package cleanup, improve test coverage, and fix tests --- src/Tables.jl | 3 +++ src/datavalues.jl | 23 +++++++++++++---------- src/enumerable.jl | 32 +++++++++++++++++--------------- src/fallbacks.jl | 8 +++++--- test/runtests.jl | 43 ++++++++++++++++++++++++++++++++++++++++--- 5 files changed, 78 insertions(+), 31 deletions(-) diff --git a/src/Tables.jl b/src/Tables.jl index c28dcd1..ae41f73 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -115,8 +115,11 @@ Obviously every table type is different, but via a combination of `Tables.rows` abstract type Table end # default definitions +istable(x::T) where {T} = istable(T) istable(::Type{T}) where {T} = false +rowaccess(x::T) where {T} = rowaccess(T) rowaccess(::Type{T}) where {T} = false +columnaccess(x::T) where {T} = columnaccess(T) columnaccess(::Type{T}) where {T} = false schema(x) = nothing diff --git a/src/datavalues.jl b/src/datavalues.jl index 583bdc1..c2862c0 100644 --- a/src/datavalues.jl +++ b/src/datavalues.jl @@ -14,27 +14,31 @@ unwrap(x::DataValue) = isna(x) ? missing : DataValues.unsafe_get(x) datavaluetype(::Type{T}) where {T <: DataValue} = T datavaluetype(::Type{T}) where {T} = DataValue{T} datavaluetype(::Type{Union{T, Missing}}) where {T} = DataValue{T} -Base.@pure function datavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names} - TT = Tuple{Any[ datavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...} +Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types} + TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...} return NamedTuple{names, TT} end struct DataValueRowIterator{NT, S} x::S end +DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRowIterator{NT, S}(x) + +"Returns a DataValue-based NamedTuple-iterator" +DataValueRowIterator(::Type{Schema{names, types}}, x::S) where {names, types, S} = DataValueRowIterator{datavaluetype(NamedTuple{names, types}), S}(x) +function datavaluerows(x) + r = Tables.rows(x) + #TODO: add support for unknown schema + return DataValueRowIterator(datavaluetype(Tables.schema(r)), r) +end -# Should maybe make this return a custom DataValueRow type to allow lazier -# DataValue wrapping; but need to make sure Query/QueryOperators support first Base.eltype(rows::DataValueRowIterator{NT, S}) where {NT, S} = NT Base.IteratorSize(::Type{DataValueRowIterator{NT, S}}) where {NT, S} = Base.IteratorSize(S) Base.length(rows::DataValueRowIterator) = length(rows.x) -"Returns a DataValue-based NamedTuple-iterator" -DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRowIterator{datavaluetype(NT), S}(x) - function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: NamedTuple{names}, S} where {names} if @generated - vals = Tuple(:(getproperty(row, $(fieldtype(NT, i)), $i, $(Meta.QuoteNode(names[i])))) for i = 1:fieldcount(NT)) + vals = Tuple(:($(fieldtype(NT, i))(getproperty(row, $(nondatavaluetype(fieldtype(NT, i))), $i, $(Meta.QuoteNode(names[i]))))) for i = 1:fieldcount(NT)) q = quote x = iterate(rows.x, st...) x === nothing && return nothing @@ -47,8 +51,7 @@ function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: Nam x = iterate(rows.x, st...) x === nothing && return nothing row, st = x - return NT(Tuple(getproperty(row, fieldtype(NT, i), i, names[i]) for i = 1:fieldcount(NT))), (st,) + return NT(Tuple(fieldtype(NT, i)(getproperty(row, nondatavaluetype(fieldtype(NT, i)), i, names[i])) for i = 1:fieldcount(NT))), (st,) end end -datavaluerows(x) = DataValueRowIterator(schema(x), rows(x)) diff --git a/src/enumerable.jl b/src/enumerable.jl index ecfb432..5edc8c4 100644 --- a/src/enumerable.jl +++ b/src/enumerable.jl @@ -1,28 +1,30 @@ -using .QueryOperators +using .QueryOperators: Enumerable +using .DataValues -struct DataValueUnwrapRow{T} - row::T -end - -Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm)) -Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm)) -Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1)) +Tables.istable(::Type{<:Enumerable}) = true +Tables.rowaccess(::Type{<:Enumerable}) = true +Tables.rows(e::Enumerable) = DataValueUnwrapper(e) -struct DataValueUnwrapper{NT, S} +struct DataValueUnwrapper{S} x::S end +Tables.schema(dv::DataValueUnwrapper) = Tables.Schema(nondatavaluetype(eltype(dv.x))) Base.eltype(rows::DataValueUnwrapper) = DataValueUnwrapRow{eltype(rows.x)} -Base.IteratorSize(::Type{DataValueUnwrapper{NT, S}}) where {NT, S} = Base.IteratorSize(S) +Base.IteratorSize(::Type{DataValueUnwrapper{S}}) where {S} = Base.IteratorSize(S) Base.length(rows::DataValueUnwrapper) = length(rows.x) -AccessStyle(::Type{E}) where {E <: QueryOperators.Enumerable} = RowAccess() -schema(e::QueryOperators.Enumerable) = nondatavaluetype(eltype(e)) -rows(e::E) where {E <: QueryOperators.Enumerable} = DataValueUnwrapper{schema(e), E}(e) - -function Base.iterate(rows::DataValueUnwrapper{NT}, st=()) where {NT <: NamedTuple{names}} where {names} +function Base.iterate(rows::DataValueUnwrapper, st=()) x = iterate(rows.x, st...) x === nothing && return nothing row, st = x return DataValueUnwrapRow(row), (st,) end + +struct DataValueUnwrapRow{T} + row::T +end + +Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm)) +Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm)) +Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1)) diff --git a/src/fallbacks.jl b/src/fallbacks.jl index 63fbf52..ecf1aa4 100644 --- a/src/fallbacks.jl +++ b/src/fallbacks.jl @@ -10,7 +10,7 @@ end Base.getproperty(c::ColumnsRow, ::Type{T}, col::Int, nm::Symbol) where {T} = getproperty(getfield(c, 1), T, col, nm)[getfield(c, 2)] Base.getproperty(c::ColumnsRow, nm::Symbol) = getproperty(getfield(c, 1), nm)[getfield(c, 2)] -Base.propertynames(c::ColumnsRow) = propertynames(c.columns) +Base.propertynames(c::ColumnsRow) = propertynames(getfield(c, 1)) struct RowIterator{T} columns::T @@ -33,6 +33,8 @@ function rows(x::T) where {T} end # build columns from rows +haslength(L) = L isa Union{Base.HasShape, Base.HasLength} + """ Tables.allocatecolumn(::Type{T}, len) => returns a column type (usually AbstractVector) w/ size to hold `len` elements @@ -55,7 +57,7 @@ end @inline function buildcolumns(schema, rowitr::T) where {T} L = Base.IteratorSize(T) - len = Base.haslength(L) ? length(rowitr) : 0 + len = haslength(L) ? length(rowitr) : 0 nt = allocatecolumns(schema, len) for (i, row) in enumerate(rowitr) eachcolumn(add!, schema, row, L, nt, i) @@ -91,7 +93,7 @@ function buildcolumns(::Nothing, rowitr::T) where {T} row::eltype(rowitr), st = state names = propertynames(row) L = Base.IteratorSize(T) - len = Base.haslength(L) ? length(rowitr) : 0 + len = haslength(L) ? length(rowitr) : 0 sch = Schema(names, nothing) columns = NamedTuple{names}(Tuple(Union{}[] for _ = 1:length(names))) return _buildcolumns(rowitr, row, st, sch, L, columns, 1, len, Ref{Any}(columns)) diff --git a/test/runtests.jl b/test/runtests.jl index 8913372..185a839 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -88,6 +88,12 @@ end @test Tables.buildcolumns(nothing, rt) == nt rt = [(a=1, b=4.0, c="7"), (a=2.0, b=missing, c="8"), (a=3, b=6.0, c="9")] @test isequal(Tables.buildcolumns(nothing, rt), (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"])) + + nti = Tables.NamedTupleIterator{nothing, typeof(rt)}(rt) + nti2 = collect(nti) + @test isequal(rt, nti2) + + @test Tables.columntable(nothing, nt) == nt end import Base: == @@ -146,8 +152,26 @@ function genericcolumntable(x) end ==(a::GenericColumnTable, b::GenericColumnTable) = getfield(a, 1) == getfield(b, 1) && getfield(a, 2) == getfield(b, 2) -@testset "Tables.jl" begin - +@testset "Tables.jl interface" begin + + @test !Tables.istable(1) + @test !Tables.istable(Int) + @test !Tables.rowaccess(1) + @test !Tables.rowaccess(Int) + @test !Tables.columnaccess(1) + @test !Tables.columnaccess(Int) + @test Tables.schema(1) === nothing + + sch = Tables.Schema{(:a, :b), Tuple{Int64, Float64}}() + @test Tables.Schema((:a, :b), Tuple{Int64, Float64}) === sch + @test Tables.Schema(NamedTuple{(:a, :b), Tuple{Int64, Float64}}) === sch + @test Tables.Schema((:a, :b), nothing) === Tables.Schema{(:a, :b), nothing}() + @test Tables.Schema([:a, :b], [Int64, Float64]) === sch + show(sch) + @test sch.names == (:a, :b) + @test sch.types == (Int64, Float64) + @test_throws ArgumentError sch.foobar + gr = GenericRowTable([GenericRow(1, 4.0, "7"), GenericRow(2, 5.0, "8"), GenericRow(3, 6.0, "9")]) gc = GenericColumnTable(Dict(:a=>1, :b=>2, :c=>3), [GenericColumn([1,2,3]), GenericColumn([4.0, 5.0, 6.0]), GenericColumn(["7", "8", "9"])]) @test gc == (gr |> genericcolumntable) @@ -156,9 +180,22 @@ end end @static if :Query in Symbol.(Base.loaded_modules_array()) + rt = (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"]) + + dv = Tables.datavaluerows(rt) + @test eltype(dv) == NamedTuple{(:a, :b, :c),Tuple{DataValue{Real},DataValue{Float64},DataValue{String}}} + rt2 = collect(dv) + @test rt2[1] == (a = DataValue{Real}(1), b = DataValue{Float64}(4.0), c = DataValue{String}("7")) + + ei = QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv) + nt = ei |> columntable + @test isequal(rt, nt) + rt3 = ei |> rowtable + @test isequal(rt |> rowtable, rt3) rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")] - mt = rt |> @map({_.a, _.c}) + map(source::Enumerable, f::Function, f_expr::Expr) + mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block)) @inferred (mt |> columntable) @inferred (mt |> rowtable) end \ No newline at end of file