Skip to content

Commit

Permalink
Package cleanup, improve test coverage, and fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
quinnj committed Sep 5, 2018
1 parent 80717e3 commit 4295d83
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 31 deletions.
3 changes: 3 additions & 0 deletions src/Tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,11 @@ Obviously every table type is different, but via a combination of `Tables.rows`
abstract type Table end

# default definitions
istable(x::T) where {T} = istable(T)
istable(::Type{T}) where {T} = false
rowaccess(x::T) where {T} = rowaccess(T)
rowaccess(::Type{T}) where {T} = false
columnaccess(x::T) where {T} = columnaccess(T)
columnaccess(::Type{T}) where {T} = false
schema(x) = nothing

Expand Down
23 changes: 13 additions & 10 deletions src/datavalues.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,31 @@ unwrap(x::DataValue) = isna(x) ? missing : DataValues.unsafe_get(x)
datavaluetype(::Type{T}) where {T <: DataValue} = T
datavaluetype(::Type{T}) where {T} = DataValue{T}
datavaluetype(::Type{Union{T, Missing}}) where {T} = DataValue{T}
Base.@pure function datavaluetype(::Type{NT}) where {NT <: NamedTuple{names}} where {names}
TT = Tuple{Any[ datavaluetype(fieldtype(NT, i)) for i = 1:fieldcount(NT) ]...}
Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types}
TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...}
return NamedTuple{names, TT}
end

struct DataValueRowIterator{NT, S}
x::S
end
DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRowIterator{NT, S}(x)

"Returns a DataValue-based NamedTuple-iterator"
DataValueRowIterator(::Type{Schema{names, types}}, x::S) where {names, types, S} = DataValueRowIterator{datavaluetype(NamedTuple{names, types}), S}(x)
function datavaluerows(x)
r = Tables.rows(x)
#TODO: add support for unknown schema
return DataValueRowIterator(datavaluetype(Tables.schema(r)), r)
end

# Should maybe make this return a custom DataValueRow type to allow lazier
# DataValue wrapping; but need to make sure Query/QueryOperators support first
Base.eltype(rows::DataValueRowIterator{NT, S}) where {NT, S} = NT
Base.IteratorSize(::Type{DataValueRowIterator{NT, S}}) where {NT, S} = Base.IteratorSize(S)
Base.length(rows::DataValueRowIterator) = length(rows.x)

"Returns a DataValue-based NamedTuple-iterator"
DataValueRowIterator(::Type{NT}, x::S) where {NT <: NamedTuple, S} = DataValueRowIterator{datavaluetype(NT), S}(x)

function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: NamedTuple{names}, S} where {names}
if @generated
vals = Tuple(:(getproperty(row, $(fieldtype(NT, i)), $i, $(Meta.QuoteNode(names[i])))) for i = 1:fieldcount(NT))
vals = Tuple(:($(fieldtype(NT, i))(getproperty(row, $(nondatavaluetype(fieldtype(NT, i))), $i, $(Meta.QuoteNode(names[i]))))) for i = 1:fieldcount(NT))
q = quote
x = iterate(rows.x, st...)
x === nothing && return nothing
Expand All @@ -47,8 +51,7 @@ function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: Nam
x = iterate(rows.x, st...)
x === nothing && return nothing
row, st = x
return NT(Tuple(getproperty(row, fieldtype(NT, i), i, names[i]) for i = 1:fieldcount(NT))), (st,)
return NT(Tuple(fieldtype(NT, i)(getproperty(row, nondatavaluetype(fieldtype(NT, i)), i, names[i])) for i = 1:fieldcount(NT))), (st,)
end
end

datavaluerows(x) = DataValueRowIterator(schema(x), rows(x))
32 changes: 17 additions & 15 deletions src/enumerable.jl
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
using .QueryOperators
using .QueryOperators: Enumerable
using .DataValues

struct DataValueUnwrapRow{T}
row::T
end

Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm))
Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm))
Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1))
Tables.istable(::Type{<:Enumerable}) = true
Tables.rowaccess(::Type{<:Enumerable}) = true
Tables.rows(e::Enumerable) = DataValueUnwrapper(e)

struct DataValueUnwrapper{NT, S}
struct DataValueUnwrapper{S}
x::S
end

Tables.schema(dv::DataValueUnwrapper) = Tables.Schema(nondatavaluetype(eltype(dv.x)))
Base.eltype(rows::DataValueUnwrapper) = DataValueUnwrapRow{eltype(rows.x)}
Base.IteratorSize(::Type{DataValueUnwrapper{NT, S}}) where {NT, S} = Base.IteratorSize(S)
Base.IteratorSize(::Type{DataValueUnwrapper{S}}) where {S} = Base.IteratorSize(S)
Base.length(rows::DataValueUnwrapper) = length(rows.x)

AccessStyle(::Type{E}) where {E <: QueryOperators.Enumerable} = RowAccess()
schema(e::QueryOperators.Enumerable) = nondatavaluetype(eltype(e))
rows(e::E) where {E <: QueryOperators.Enumerable} = DataValueUnwrapper{schema(e), E}(e)

function Base.iterate(rows::DataValueUnwrapper{NT}, st=()) where {NT <: NamedTuple{names}} where {names}
function Base.iterate(rows::DataValueUnwrapper, st=())
x = iterate(rows.x, st...)
x === nothing && return nothing
row, st = x
return DataValueUnwrapRow(row), (st,)
end

struct DataValueUnwrapRow{T}
row::T
end

Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm))
Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm))
Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1))
8 changes: 5 additions & 3 deletions src/fallbacks.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ end

Base.getproperty(c::ColumnsRow, ::Type{T}, col::Int, nm::Symbol) where {T} = getproperty(getfield(c, 1), T, col, nm)[getfield(c, 2)]
Base.getproperty(c::ColumnsRow, nm::Symbol) = getproperty(getfield(c, 1), nm)[getfield(c, 2)]
Base.propertynames(c::ColumnsRow) = propertynames(c.columns)
Base.propertynames(c::ColumnsRow) = propertynames(getfield(c, 1))

struct RowIterator{T}
columns::T
Expand All @@ -33,6 +33,8 @@ function rows(x::T) where {T}
end

# build columns from rows
haslength(L) = L isa Union{Base.HasShape, Base.HasLength}

"""
Tables.allocatecolumn(::Type{T}, len) => returns a column type (usually AbstractVector) w/ size to hold `len` elements
Expand All @@ -55,7 +57,7 @@ end

@inline function buildcolumns(schema, rowitr::T) where {T}
L = Base.IteratorSize(T)
len = Base.haslength(L) ? length(rowitr) : 0
len = haslength(L) ? length(rowitr) : 0
nt = allocatecolumns(schema, len)
for (i, row) in enumerate(rowitr)
eachcolumn(add!, schema, row, L, nt, i)
Expand Down Expand Up @@ -91,7 +93,7 @@ function buildcolumns(::Nothing, rowitr::T) where {T}
row::eltype(rowitr), st = state
names = propertynames(row)
L = Base.IteratorSize(T)
len = Base.haslength(L) ? length(rowitr) : 0
len = haslength(L) ? length(rowitr) : 0
sch = Schema(names, nothing)
columns = NamedTuple{names}(Tuple(Union{}[] for _ = 1:length(names)))
return _buildcolumns(rowitr, row, st, sch, L, columns, 1, len, Ref{Any}(columns))
Expand Down
43 changes: 40 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ end
@test Tables.buildcolumns(nothing, rt) == nt
rt = [(a=1, b=4.0, c="7"), (a=2.0, b=missing, c="8"), (a=3, b=6.0, c="9")]
@test isequal(Tables.buildcolumns(nothing, rt), (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"]))

nti = Tables.NamedTupleIterator{nothing, typeof(rt)}(rt)
nti2 = collect(nti)
@test isequal(rt, nti2)

@test Tables.columntable(nothing, nt) == nt
end

import Base: ==
Expand Down Expand Up @@ -146,8 +152,26 @@ function genericcolumntable(x)
end
==(a::GenericColumnTable, b::GenericColumnTable) = getfield(a, 1) == getfield(b, 1) && getfield(a, 2) == getfield(b, 2)

@testset "Tables.jl" begin

@testset "Tables.jl interface" begin

@test !Tables.istable(1)
@test !Tables.istable(Int)
@test !Tables.rowaccess(1)
@test !Tables.rowaccess(Int)
@test !Tables.columnaccess(1)
@test !Tables.columnaccess(Int)
@test Tables.schema(1) === nothing

sch = Tables.Schema{(:a, :b), Tuple{Int64, Float64}}()
@test Tables.Schema((:a, :b), Tuple{Int64, Float64}) === sch
@test Tables.Schema(NamedTuple{(:a, :b), Tuple{Int64, Float64}}) === sch
@test Tables.Schema((:a, :b), nothing) === Tables.Schema{(:a, :b), nothing}()
@test Tables.Schema([:a, :b], [Int64, Float64]) === sch
show(sch)
@test sch.names == (:a, :b)
@test sch.types == (Int64, Float64)
@test_throws ArgumentError sch.foobar

gr = GenericRowTable([GenericRow(1, 4.0, "7"), GenericRow(2, 5.0, "8"), GenericRow(3, 6.0, "9")])
gc = GenericColumnTable(Dict(:a=>1, :b=>2, :c=>3), [GenericColumn([1,2,3]), GenericColumn([4.0, 5.0, 6.0]), GenericColumn(["7", "8", "9"])])
@test gc == (gr |> genericcolumntable)
Expand All @@ -156,9 +180,22 @@ end
end

@static if :Query in Symbol.(Base.loaded_modules_array())
rt = (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"])

dv = Tables.datavaluerows(rt)
@test eltype(dv) == NamedTuple{(:a, :b, :c),Tuple{DataValue{Real},DataValue{Float64},DataValue{String}}}
rt2 = collect(dv)
@test rt2[1] == (a = DataValue{Real}(1), b = DataValue{Float64}(4.0), c = DataValue{String}("7"))

ei = QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv)
nt = ei |> columntable
@test isequal(rt, nt)
rt3 = ei |> rowtable
@test isequal(rt |> rowtable, rt3)

rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")]
mt = rt |> @map({_.a, _.c})
map(source::Enumerable, f::Function, f_expr::Expr)
mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block))
@inferred (mt |> columntable)
@inferred (mt |> rowtable)
end

0 comments on commit 4295d83

Please sign in to comment.