Skip to content

Commit 36e9520

Browse files
authored
Fix generic RowIterator performance by caching columns length (#14)
* Fix generic RowIterator performance by caching columns length and fixup some queryverse integration
1 parent 4295d83 commit 36e9520

File tree

6 files changed

+45
-15
lines changed

6 files changed

+45
-15
lines changed

src/Tables.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export rowtable, columntable
66

77
function __init__()
88
@require DataValues="e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" include("datavalues.jl")
9-
@require QueryOperators="2aef5ad7-51ca-5a8f-8e88-e75cf067b44b" include("enumerable.jl")
9+
@require Query="1a8c2f83-1ff3-5112-b086-8aa67b057ba1" include("enumerable.jl")
1010
@require CategoricalArrays="324d7699-5711-5eae-9e2f-1d82baa6b597" begin
1111
using .CategoricalArrays
1212
allocatecolumn(::Type{CategoricalString{R}}, rows) where {R} = CategoricalArray{String, 1, R}(undef, rows)

src/datavalues.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ unwrap(x) = x
1212
unwrap(x::DataValue) = isna(x) ? missing : DataValues.unsafe_get(x)
1313

1414
datavaluetype(::Type{T}) where {T <: DataValue} = T
15-
datavaluetype(::Type{T}) where {T} = DataValue{T}
15+
datavaluetype(::Type{T}) where {T} = T
1616
datavaluetype(::Type{Union{T, Missing}}) where {T} = DataValue{T}
1717
Base.@pure function datavaluetype(::Tables.Schema{names, types}) where {names, types}
1818
TT = Tuple{Any[ datavaluetype(fieldtype(types, i)) for i = 1:fieldcount(types) ]...}
@@ -55,3 +55,16 @@ function Base.iterate(rows::DataValueRowIterator{NT, S}, st=()) where {NT <: Nam
5555
end
5656
end
5757

58+
# function Base.iterate(rows::DataValueRowIterator{NT}, st=()) where {NT}
59+
# state = iterate(rows.x, st...)
60+
# state === nothing && return nothing
61+
# row, st = state
62+
# return DataValueRow{NT, typeof(row)}(row), (st,)
63+
# end
64+
65+
# struct DataValueRow{NT, T}
66+
# row::T
67+
# end
68+
69+
# @inline Base.getproperty(dvr::DataValueRow{NamedTuple{names, types}}, nm::Symbol) where {names, types} = getproperty(dvr, Tables.columntype(names, types, nm), Tables.columnindex(names, nm), nm)
70+
# @inline Base.getproperty(dvr::DataValueRow, ::Type{T}, col::Int, nm::Symbol) where {T} = T(getproperty(getfield(dvr, 1), nondatavaluetype(T), col, nm))

src/enumerable.jl

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
using .QueryOperators: Enumerable
2-
using .DataValues
1+
using .Query
2+
3+
@static if isdefined(Query.QueryOperators, :Enumerable)
4+
5+
import .Query.QueryOperators: Enumerable
36

47
Tables.istable(::Type{<:Enumerable}) = true
58
Tables.rowaccess(::Type{<:Enumerable}) = true
@@ -9,7 +12,11 @@ struct DataValueUnwrapper{S}
912
x::S
1013
end
1114

12-
Tables.schema(dv::DataValueUnwrapper) = Tables.Schema(nondatavaluetype(eltype(dv.x)))
15+
function Tables.schema(dv::DataValueUnwrapper)
16+
eT = eltype(dv.x)
17+
!(eT <: NamedTuple) && return nothing
18+
return Tables.Schema(nondatavaluetype(eT))
19+
end
1320
Base.eltype(rows::DataValueUnwrapper) = DataValueUnwrapRow{eltype(rows.x)}
1421
Base.IteratorSize(::Type{DataValueUnwrapper{S}}) where {S} = Base.IteratorSize(S)
1522
Base.length(rows::DataValueUnwrapper) = length(rows.x)
@@ -28,3 +35,5 @@ end
2835
Base.getproperty(d::DataValueUnwrapRow, ::Type{T}, col::Int, nm::Symbol) where {T} = unwrap(getproperty(getfield(d, 1), T, col, nm))
2936
Base.getproperty(d::DataValueUnwrapRow, nm::Symbol) = unwrap(getproperty(getfield(d, 1), nm))
3037
Base.propertynames(d::DataValueUnwrapRow) = propertynames(getfield(d, 1))
38+
39+
end # isdefined

src/fallbacks.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
## we'll provide a default implementation of the dual
44

55
# generic row iteration of columns
6+
rowcount(cols) = length(getproperty(cols, propertynames(cols)[1]))
7+
68
struct ColumnsRow{T}
79
columns::T # a `Columns` object
810
row::Int
@@ -14,9 +16,10 @@ Base.propertynames(c::ColumnsRow) = propertynames(getfield(c, 1))
1416

1517
struct RowIterator{T}
1618
columns::T
19+
len::Int
1720
end
1821
Base.eltype(x::RowIterator{T}) where {T} = ColumnsRow{T}
19-
Base.length(x::RowIterator) = length(getproperty(x.columns, propertynames(x.columns)[1]))
22+
Base.length(x::RowIterator) = x.len
2023
schema(x::RowIterator) = schema(x.columns)
2124

2225
function Base.iterate(rows::RowIterator, st=1)
@@ -26,7 +29,8 @@ end
2629

2730
function rows(x::T) where {T}
2831
if columnaccess(T)
29-
return RowIterator(columns(x))
32+
cols = columns(x)
33+
return RowIterator(cols, rowcount(cols))
3034
else
3135
throw(ArgumentError("no default `Tables.rows` implementation for type: $T"))
3236
end
@@ -90,7 +94,7 @@ end
9094
function buildcolumns(::Nothing, rowitr::T) where {T}
9195
state = iterate(rowitr)
9296
state === nothing && return NamedTuple()
93-
row::eltype(rowitr), st = state
97+
row, st = state
9498
names = propertynames(row)
9599
L = Base.IteratorSize(T)
96100
len = haslength(L) ? length(rowitr) : 0

src/namedtuples.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ function Base.iterate(rows::NamedTupleIterator{Schema{names, T}}, st=()) where {
3636
end
3737

3838
# unknown schema case
39-
function Base.iterate(rows::NamedTupleIterator{nothing, T}, st=()) where {T}
39+
function Base.iterate(rows::NamedTupleIterator{Nothing, T}, st=()) where {T}
4040
x = iterate(rows.x, st...)
4141
x === nothing && return nothing
4242
row, st = x

test/runtests.jl

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ end
8989
rt = [(a=1, b=4.0, c="7"), (a=2.0, b=missing, c="8"), (a=3, b=6.0, c="9")]
9090
@test isequal(Tables.buildcolumns(nothing, rt), (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"]))
9191

92-
nti = Tables.NamedTupleIterator{nothing, typeof(rt)}(rt)
92+
nti = Tables.NamedTupleIterator{Nothing, typeof(rt)}(rt)
9393
nti2 = collect(nti)
9494
@test isequal(rt, nti2)
9595

@@ -183,19 +183,23 @@ end
183183
rt = (a = Real[1, 2.0, 3], b = Union{Missing, Float64}[4.0, missing, 6.0], c = ["7", "8", "9"])
184184

185185
dv = Tables.datavaluerows(rt)
186-
@test eltype(dv) == NamedTuple{(:a, :b, :c),Tuple{DataValue{Real},DataValue{Float64},DataValue{String}}}
186+
@test eltype(dv) == NamedTuple{(:a, :b, :c),Tuple{Real,DataValue{Float64},String}}
187187
rt2 = collect(dv)
188-
@test rt2[1] == (a = DataValue{Real}(1), b = DataValue{Float64}(4.0), c = DataValue{String}("7"))
188+
@test rt2[1] == (a = 1, b = DataValue{Float64}(4.0), c = "7")
189189

190190
ei = QueryOperators.EnumerableIterable{eltype(dv), typeof(dv)}(dv)
191191
nt = ei |> columntable
192192
@test isequal(rt, nt)
193193
rt3 = ei |> rowtable
194194
@test isequal(rt |> rowtable, rt3)
195195

196-
rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")]
197-
map(source::Enumerable, f::Function, f_expr::Expr)
198-
mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block))
196+
# rt = [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8"), (a=3, b=6.0, c="9")]
197+
mt = ei |> y->QueryOperators.map(y, x->(a=x.b, c=x.c), Expr(:block))
199198
@inferred (mt |> columntable)
200199
@inferred (mt |> rowtable)
200+
201+
# uninferrable case
202+
mt = ei |> y->QueryOperators.map(y, x->(a=x.a, c=x.c), Expr(:block))
203+
@test (mt |> columntable) == (a = Real[1, 2.0, 3], c = ["7", "8", "9"])
204+
@test length(mt |> rowtable) == 3
201205
end

0 commit comments

Comments
 (0)