From 9bc355c0cc3f274b18fd61a0b7ee4ac7c9f1f705 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Tue, 28 Jun 2022 08:24:01 +0200 Subject: [PATCH 1/6] add getrows --- docs/src/index.md | 3 +++ src/Tables.jl | 22 ++++++++++++++++++++++ test/runtests.jl | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index 50e066f..f113fb7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -197,9 +197,11 @@ the table-specific use-case, knowing that it will Just Work™️. Before moving on to _implementing_ the Tables.jl interfaces, we take a quick break to highlight some useful utility functions provided by Tables.jl: + ```@docs Tables.Schema Tables.schema +Tables.getrows Tables.partitions Tables.partitioner Tables.rowtable @@ -239,6 +241,7 @@ For a type `MyTable`, the interface to becoming a proper table is straightforwar | **Optional methods** | | | | `Tables.schema(x::MyTable)` | `Tables.schema(x) = nothing` | Return a [`Tables.Schema`](@ref) object from your `Tables.AbstractRow` iterator or `Tables.AbstractColumns` object; or `nothing` for unknown schema | | `Tables.materializer(::Type{MyTable})` | `Tables.columntable` | Declare a "materializer" sink function for your table type that can construct an instance of your type from any Tables.jl input | +| `Tables.getrows(x::MyTable, inds; view)` | | Return a row or a sub-table of the original table Based on whether your table type has defined `Tables.rows` or `Tables.columns`, you then ensure that the `Tables.AbstractRow` iterator or `Tables.AbstractColumns` object satisfies the respective interface. diff --git a/src/Tables.jl b/src/Tables.jl index 6f39462..f4bed73 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -565,6 +565,28 @@ struct Partitioner{T} x::T end +""" + getrows(x, inds; view=nothing) + +Return one or more rows from table `x` according to the position(s) specified by `inds`. + +- If `inds` is a single integer return a row object. +- If `inds` is a collection of integers, return a table object. + In this case,t he returned type is not necessarily the same as the original table type. + +The `view` argument influences whether the returned object is a view of the original table +or an independent copy: +- If `view=nothing` (the default) then the implementation for a specific table type + is free to decide whether to return a copy or a view. +- If `view=true` then a view is returned and if `view=false` a copy is returned. + This applies both to returning a row or a table. + +Any specialized implementation of `getrows` must support the `view=nothing` argument. +Support for `view=true` or `view=false` instead can be an opt-in +(i.e. implementations might error on them if they are not supported). +""" +function getrows end + """ Tables.partitioner(f, itr) Tables.partitioner(x) diff --git a/test/runtests.jl b/test/runtests.jl index 88e5874..f131bc4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -798,4 +798,8 @@ Tables.columnnames(::WideTable2) = [Symbol("x", i) for i = 1:1000] @test nm isa Symbol @test col isa Vector{Float64} end + + @testset "getrows" begin + Tables.getrows isa Function + end end From b41146a4289c7333a9e5c20c253d0ccbd9d02140 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Tue, 28 Jun 2022 15:12:47 +0200 Subject: [PATCH 2/6] implement getrows for RowTable and ColumnTable --- src/Tables.jl | 5 +++-- src/namedtuples.jl | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/Tables.jl b/src/Tables.jl index f4bed73..06cb85c 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -568,14 +568,15 @@ end """ getrows(x, inds; view=nothing) -Return one or more rows from table `x` according to the position(s) specified by `inds`. +Return one or more rows from table `x` according to the position(s) specified by `inds`: - If `inds` is a single integer return a row object. - If `inds` is a collection of integers, return a table object. - In this case,t he returned type is not necessarily the same as the original table type. + In this case, the returned type is not necessarily the same as the original table type. The `view` argument influences whether the returned object is a view of the original table or an independent copy: + - If `view=nothing` (the default) then the implementation for a specific table type is free to decide whether to return a copy or a view. - If `view=true` then a view is returned and if `view=false` a copy is returned. diff --git a/src/namedtuples.jl b/src/namedtuples.jl index 7b2f3e5..e29c3e7 100644 --- a/src/namedtuples.jl +++ b/src/namedtuples.jl @@ -106,6 +106,14 @@ function rowtable(itr::T) where {T} return collect(namedtupleiterator(eltype(r), r)) end +function getrows(x::RowTable, inds; view::Union{Bool,Nothing} = nothing) + if view === true + return view(x, inds) + else + return x[inds] + end +end + # NamedTuple of arrays of matching dimensionality const ColumnTable = NamedTuple{names, T} where {names, T <: NTuple{N, AbstractArray{S, D} where S}} where {N, D} rowcount(c::ColumnTable) = length(c) == 0 ? 0 : length(c[1]) @@ -173,3 +181,11 @@ function columntable(itr::T) where {T} return columntable(schema(cols), cols) end columntable(x::ColumnTable) = x + +function getrows(x::ColumnTable, inds; view::Union{Bool,Nothing} = nothing) + if view === true + return map(c -> view(c, inds), x) + else + return map(c -> c[inds], x) + end +end From 04c4991adfc55287151be48f6b23f310d4c7e4f2 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Thu, 30 Jun 2022 06:59:52 +0200 Subject: [PATCH 3/6] add tests and implement feedbacks from review --- .vscode/settings.json | 3 +++ src/Tables.jl | 4 ++-- src/namedtuples.jl | 4 ++-- test/runtests.jl | 31 ++++++++++++++++++++++++++++--- 4 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..60c0c7d --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "/Users/carlo/.julia/dev/Tables" +} \ No newline at end of file diff --git a/src/Tables.jl b/src/Tables.jl index 06cb85c..87b52ef 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -566,12 +566,12 @@ struct Partitioner{T} end """ - getrows(x, inds; view=nothing) + Tables.getrows(x, inds; view=nothing) Return one or more rows from table `x` according to the position(s) specified by `inds`: - If `inds` is a single integer return a row object. -- If `inds` is a collection of integers, return a table object. +- If `inds` is a collection of integers, return an indexable object of rows. In this case, the returned type is not necessarily the same as the original table type. The `view` argument influences whether the returned object is a view of the original table diff --git a/src/namedtuples.jl b/src/namedtuples.jl index e29c3e7..7ac24fd 100644 --- a/src/namedtuples.jl +++ b/src/namedtuples.jl @@ -108,7 +108,7 @@ end function getrows(x::RowTable, inds; view::Union{Bool,Nothing} = nothing) if view === true - return view(x, inds) + return Base.view(x, inds) else return x[inds] end @@ -184,7 +184,7 @@ columntable(x::ColumnTable) = x function getrows(x::ColumnTable, inds; view::Union{Bool,Nothing} = nothing) if view === true - return map(c -> view(c, inds), x) + return map(c -> Base.view(c, inds), x) else return map(c -> c[inds], x) end diff --git a/test/runtests.jl b/test/runtests.jl index f131bc4..f45d6ee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -145,6 +145,34 @@ end @test Tables.buildcolumns(nothing, rt) == nt @test Tables.columntable(nothing, nt) == nt + @testset "columntable getrows" begin + @test Tables.getrows(nt, 1) == (a=1, b=4.0, c="7") + @test Tables.getrows(nt, 1, view=false) == (a=1, b=4.0, c="7") + @test Tables.getrows(nt, 1, view=nothing) == (a=1, b=4.0, c="7") + @test Tables.getrows(nt, 1:2) == (a=[1,2], b=[4.0, 5.0], c=["7","8"]) + @test Tables.getrows(nt, 1:2, view=false) == (a=[1,2], b=[4.0, 5.0], c=["7","8"]) + @test Tables.getrows(nt, 1:2, view=nothing) == (a=[1,2], b=[4.0, 5.0], c=["7","8"]) + + @test Tables.getrows(nt, 1, view=true) == (a = fill(1), b = fill(4.0), c = fill("7")) + rs = Tables.getrows(nt, 1:2, view=true) + @test rs == (a=[1,2], b=[4.0, 5.0], c=["7","8"]) + @test rs.a.parent === nt.a + end + + @testset "rowtable getrows" begin + @test Tables.getrows(rt, 1) == (a=1, b=4.0, c="7") + @test Tables.getrows(rt, 1, view=false) == (a=1, b=4.0, c="7") + @test Tables.getrows(rt, 1, view=nothing) == (a=1, b=4.0, c="7") + @test Tables.getrows(rt, 1:2) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")] + @test Tables.getrows(rt, 1:2, view=false) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")] + @test Tables.getrows(rt, 1:2, view=nothing) == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")] + + @test Tables.getrows(rt, 1, view=true) == fill((a = 1, b = 4.0, c = "7")) + rs = Tables.getrows(rt, 1:2, view=true) + @test rs == [(a=1, b=4.0, c="7"), (a=2, b=5.0, c="8")] + @test rs.parent === rt + end + # test push! rtf = Iterators.Filter(x->x.a >= 1, rt) @test Tables.columntable(rtf) == nt @@ -799,7 +827,4 @@ Tables.columnnames(::WideTable2) = [Symbol("x", i) for i = 1:1000] @test col isa Vector{Float64} end - @testset "getrows" begin - Tables.getrows isa Function - end end From 669565836bfc52555a429c34776ce64edbd0dcb5 Mon Sep 17 00:00:00 2001 From: Carlo Lucibello Date: Thu, 30 Jun 2022 07:00:35 +0200 Subject: [PATCH 4/6] ignore vscode --- .gitignore | 3 ++- .vscode/settings.json | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 2251642..c7e6298 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -Manifest.toml \ No newline at end of file +Manifest.toml +.vscode diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 60c0c7d..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "julia.environmentPath": "/Users/carlo/.julia/dev/Tables" -} \ No newline at end of file From c539d5ab0a13a1f9096163b5013b838a2cd719b6 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Sat, 27 Aug 2022 18:33:53 -0600 Subject: [PATCH 5/6] Update src/Tables.jl Co-authored-by: Milan Bouchet-Valat --- src/Tables.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Tables.jl b/src/Tables.jl index 87b52ef..464f69f 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -583,7 +583,7 @@ or an independent copy: This applies both to returning a row or a table. Any specialized implementation of `getrows` must support the `view=nothing` argument. -Support for `view=true` or `view=false` instead can be an opt-in +Support for `view=true` or `view=false` is optional (i.e. implementations might error on them if they are not supported). """ function getrows end From 76f1375cf92fa9e435a0d3fcb6b76e254771b836 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 29 Aug 2022 21:47:51 -0600 Subject: [PATCH 6/6] Update src/Tables.jl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bogumił Kamiński --- src/Tables.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Tables.jl b/src/Tables.jl index 464f69f..b4848ef 100644 --- a/src/Tables.jl +++ b/src/Tables.jl @@ -570,9 +570,11 @@ end Return one or more rows from table `x` according to the position(s) specified by `inds`: -- If `inds` is a single integer return a row object. -- If `inds` is a collection of integers, return an indexable object of rows. +- If `inds` is a single non-boolean integer return a row object. +- If `inds` is a vector of non-boolean integers, a vector of booleans, or a `:`, return an indexable object of rows. In this case, the returned type is not necessarily the same as the original table type. + +If other type of `inds` is passed than specified above the behavior is undefined. The `view` argument influences whether the returned object is a view of the original table or an independent copy: