Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJFlux"
uuid = "094fc8d1-fd35-5302-93ea-dabda2abf845"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>", "Ayush Shridhar <ayush.shridhar1999@gmail.com>"]
version = "0.6.6"
version = "0.6.7"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand All @@ -17,15 +17,15 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CategoricalArrays = "0.10"
CategoricalArrays = "1"
ColorTypes = "0.10.3, 0.11, 0.12"
ComputationalResources = "0.3.2"
Flux = "0.14, 0.15, 0.16"
MLJModelInterface = "1.11"
Metalhead = "0.9.3"
Optimisers = "0.3.2, 0.4"
ProgressMeter = "1.7.1"
StatisticalMeasures = "0.1"
StatisticalMeasures = "0.3"
Statistics = "<0.0.1, 1"
Tables = "1.0"
julia = "1.10"
Expand Down
4 changes: 2 additions & 2 deletions src/classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ data `X` and `y`.
"""
function MLJFlux.shape(model::NeuralNetworkClassifier, X, y)
X = X isa Matrix ? Tables.table(X) : X
levels = MLJModelInterface.classes(y[1])
levels = CategoricalArrays.levels(y[1])
n_output = length(levels)
n_input = Tables.schema(X).names |> length
return (n_input, n_output)
Expand All @@ -31,7 +31,7 @@ MLJFlux.fitresult(
y,
ordinal_mappings = nothing,
embedding_matrices = nothing,
) = (chain, MLJModelInterface.classes(y[1]), ordinal_mappings, embedding_matrices)
) = (chain, levels(y[1]), ordinal_mappings, embedding_matrices)

function MLJModelInterface.predict(
model::NeuralNetworkClassifier,
Expand Down
5 changes: 2 additions & 3 deletions src/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ tomat(y::Vector) = reshape(y, size(y, 1), 1)
reformat(y, ::Type{<:AbstractVector{<:Union{Continuous,Count}}}) =
reshape(y, 1, length(y))
function reformat(y, ::Type{<:AbstractVector{<:Finite}})
levels = y |> first |> MLJModelInterface.classes
levels = y |> first |> CategoricalArrays.levels
return Flux.onehotbatch(y, levels)
end

Expand Down Expand Up @@ -285,7 +285,7 @@ end
function collate(model::NeuralNetworkBinaryClassifier, X, y, verbosity)
row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size)
Xmatrix = _f32(reformat(X), verbosity)
yvec = (y .== classes(y)[2])' # convert to boolean
yvec = (y .== levels(y)[2])' # convert to boolean
return [_get(Xmatrix, b) for b in row_batches], [_get(yvec, b) for b in row_batches]
end

Expand All @@ -294,4 +294,3 @@ function _f32(x::AbstractArray, verbosity)
verbosity > 0 && @info "MLJFlux: converting input data to Float32"
return Float32.(x)
end

6 changes: 3 additions & 3 deletions src/encoders.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function ordinal_encoder_fit(X; featinds)
# 2. Use feature mapper to compute the mapping of each level in each column
for i in featinds
feat_col = Tables.getcolumn(Tables.columns(X), i)
feat_levels = levels(feat_col)
feat_levels = CategoricalArrays.unwrap.(levels(feat_col))
# Check if feat levels is already ordinal encoded in which case we skip
(Set([Float32(i) for i in 1:length(feat_levels)]) == Set(feat_levels)) && continue
# Compute the dict using the given feature_mapper function
Expand Down Expand Up @@ -64,10 +64,10 @@ function ordinal_encoder_transform(X, mapping_matrix)
# Create the transformation function for each column
if ind in keys(mapping_matrix)
train_levels = keys(mapping_matrix[ind])
test_levels = levels(col)
test_levels = CategoricalArrays.unwrap.(levels(col))
check_unkown_levels(train_levels, test_levels)
level2scalar = mapping_matrix[ind]
new_col = unwrap.(recode(col, level2scalar...))
new_col = CategoricalArrays.unwrap.(recode(col, level2scalar...))
push!(new_feats, new_col)
else
push!(new_feats, col)
Expand Down
4 changes: 2 additions & 2 deletions src/image.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
function shape(model::ImageClassifier, X, y)
levels = MLJModelInterface.classes(y[1])
levels = CategoricalArrays.levels(y)
n_output = length(levels)
n_input = size(X[1])

Expand All @@ -18,7 +18,7 @@ build(model::ImageClassifier, rng, shape) =
model.finaliser)

fitresult(model::ImageClassifier, chain, y, ::Any, ::Any) =
(chain, MLJModelInterface.classes(y[1]))
(chain, levels(y))

function MLJModelInterface.predict(model::ImageClassifier, fitresult, Xnew)
chain, levels = fitresult
Expand Down
36 changes: 31 additions & 5 deletions src/mlj_embedder_interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ In the following example we wrap a `NeuralNetworkClassifier` as an `EntityEmbedd
that it can be used to supply continuously encoded features to a nearest neighbor model,
which does not support categorical features.

## Simple Example
```julia
using MLJ

Expand All @@ -129,21 +130,46 @@ EntityEmbedder = @load EntityEmbedder pkg=MLJFlux
# Flux model to do learn the entity embeddings:
NeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux

# Other supervised model type, requiring `Continuous` features:
KNNClassifier = @load KNNClassifier pkg=NearestNeighborModels

# Instantiate the models:
clf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))
emb = EntityEmbedder(clf)

# For illustrative purposes, train the embedder on its own:
# Train and transform the data using the embedder:
mach = machine(emb, X, y)
fit!(mach)
Xnew = transform(mach, X)

# And compare feature scitypes:
# Compare schemas before and after transformation
schema(X)
schema(Xnew)
```

## Using with Downstream Models (Pipeline)
```julia
using MLJ

# Setup some data
N = 400
X = (
a = rand(Float32, N),
b = categorical(rand("abcde", N)),
c = categorical(rand("ABCDEFGHIJ", N), ordered = true),
)

y = categorical(rand("YN", N));

# Initiate model
EntityEmbedder = @load EntityEmbedder pkg=MLJFlux

# Flux model to do learn the entity embeddings:
NeuralNetworkClassifier = @load NeuralNetworkClassifier pkg=MLJFlux

# Other supervised model type, requiring `Continuous` features:
KNNClassifier = @load KNNClassifier pkg=NearestNeighborModels

# Instantiate the models:
clf = NeuralNetworkClassifier(embedding_dims=Dict(:b => 2, :c => 3))
emb = EntityEmbedder(clf)

# Now construct the pipeline:
pipe = emb |> KNNClassifier()
Expand Down
2 changes: 1 addition & 1 deletion test/entity_embedding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ entityprops = [
EE1 = Flux.trainables(embedder.embedders[2])[1] # (newdim, levels) = (5, 10)
EE2 = Flux.trainables(embedder.embedders[4])[1] # (newdim, levels) = (2, 2)

## One-hot encoding
## One-hot encoding
z2_hot = Flux.onehotbatch(z2, levels(z2))
z4_hot = Flux.onehotbatch(z4, levels(z4))

Expand Down
Loading