|
3 | 3 | # ------------------------------------------------------------------
|
4 | 4 |
|
5 | 5 | """
|
6 |
| - Coerce(pairs, tight=false, verbosity=1) |
| 6 | + Coerce(col₁ => S₁, col₂ => S₂, ..., colₙ => Sₙ) |
7 | 7 |
|
8 | 8 | Return a copy of the table, ensuring that the scientific types of the columns match the new specification.
|
9 | 9 |
|
10 |
| -This transform wraps the ScientificTypes.coerce function. Please see their docstring for more details. |
| 10 | +This transform uses the `DataScienceTraits.coerce` function. Please see their docstring for more details. |
11 | 11 |
|
12 | 12 | # Examples
|
13 | 13 |
|
14 | 14 | ```julia
|
15 |
| -using ScientificTypes |
16 |
| -Coerce(:col1 => Continuous, :col2 => Count) |
| 15 | +import DataScienceTraits as DST |
| 16 | +Coerce(1 => DST.Continuous, 2 => DST.Continuous) |
| 17 | +Coerce(:a => DST.Continuous, :b => DST.Continuous) |
| 18 | +Coerce("a" => DST.Continuous, "b" => DST.Continuous) |
17 | 19 | ```
|
18 | 20 | """
|
19 |
| -struct Coerce{P} <: FeatureTransform |
20 |
| - pairs::P |
21 |
| - tight::Bool |
22 |
| - verbosity::Int |
| 21 | +struct Coerce{S<:ColumnSelector} <: StatelessFeatureTransform |
| 22 | + selector::S |
| 23 | + scitypes::Vector{DataType} |
23 | 24 | end
|
24 | 25 |
|
25 |
| -Coerce(pairs::Pair{Symbol,<:Type}...; tight=false, verbosity=1) = Coerce(pairs, tight, verbosity) |
26 |
| -Coerce(pairs::Pair{<:AbstractString,<:Type}...; kwargs...) = Coerce((Symbol(k) => v for (k, v) in pairs)...; kwargs...) |
| 26 | +Coerce() = throw(ArgumentError("cannot create Coerce transform without arguments")) |
| 27 | + |
| 28 | +Coerce(pairs::Pair{C,DataType}...) where {C<:Column} = Coerce(selector(first.(pairs)), collect(last.(pairs))) |
27 | 29 |
|
28 | 30 | isrevertible(::Type{<:Coerce}) = true
|
29 | 31 |
|
30 | 32 | function applyfeat(transform::Coerce, feat, prep)
|
31 |
| - newtable = coerce(feat, transform.pairs...; tight=transform.tight, verbosity=transform.verbosity) |
32 |
| - |
| 33 | + cols = Tables.columns(feat) |
| 34 | + names = Tables.columnnames(cols) |
33 | 35 | types = Tables.schema(feat).types
|
| 36 | + snames = transform.selector(names) |
| 37 | + typedict = Dict(zip(snames, transform.scitypes)) |
| 38 | + |
| 39 | + columns = map(names) do name |
| 40 | + x = Tables.getcolumn(cols, name) |
| 41 | + name ∈ snames ? coerce(typedict[name], x) : x |
| 42 | + end |
| 43 | + |
| 44 | + 𝒯 = (; zip(names, columns)...) |
| 45 | + newfeat = 𝒯 |> Tables.materializer(feat) |
34 | 46 |
|
35 |
| - newtable, types |
| 47 | + newfeat, types |
36 | 48 | end
|
37 | 49 |
|
38 | 50 | function revertfeat(::Coerce, newfeat, fcache)
|
39 | 51 | cols = Tables.columns(newfeat)
|
40 | 52 | names = Tables.columnnames(cols)
|
41 | 53 |
|
42 |
| - oldcols = map(zip(fcache, names)) do (T, n) |
| 54 | + columns = map(fcache, names) do T, n |
43 | 55 | x = Tables.getcolumn(cols, n)
|
44 | 56 | collect(T, x)
|
45 | 57 | end
|
46 | 58 |
|
47 |
| - 𝒯 = (; zip(names, oldcols)...) |
| 59 | + 𝒯 = (; zip(names, columns)...) |
48 | 60 | 𝒯 |> Tables.materializer(newfeat)
|
49 | 61 | end
|
0 commit comments