Description
Hi! @sebffischer
I am trying to implement a neural network with a custom loss function for binary classification.
It works just like the standard torch::nn_bce_with_logits_loss
-- https://torch.mlverse.org/docs/reference/nn_bce_with_logits_loss
criterion(output, target)
where both output and target are 1d tensors.
Here is a MRE.
task_sonar <- mlr3::tsk("sonar")
mlr3torch::LearnerTorchMLP$new(
task_type="classif",
loss=torch::nn_cross_entropy_loss
)$configure(
epochs=2,
batch_size=5
)$train(task_sonar)
mlr3torch::LearnerTorchMLP$new(
task_type="classif",
loss=torch::nn_bce_with_logits_loss
)$configure(
epochs=2,
batch_size=5
)$train(task_sonar)
I tried the code above, and I observe a tensor size error,
> task_sonar <- mlr3::tsk("sonar")
> mlr3torch::LearnerTorchMLP$new(
+ task_type="classif",
+ loss=torch::nn_cross_entropy_loss
+ )$configure(
+ epochs=2,
+ batch_size=5
+ )$train(task_sonar)
> mlr3torch::LearnerTorchMLP$new(
+ task_type="classif",
+ loss=torch::nn_bce_with_logits_loss
+ )$configure(
+ epochs=2,
+ batch_size=5
+ )$train(task_sonar)
Erreur dans (function (self, target, weight, pos_weight, reduction) :
The size of tensor a (5) must match the size of tensor b (2) at non-singleton dimension 1
Exception raised from infer_size_impl at ../aten/src/ATen/ExpandUtils.cpp:31 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0xb0 (0x79d326994120 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xfa (0x79d326937a5a in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libc10.so)
frame #2: at::infer_size_dimvector(c10::ArrayRef<long>, c10::ArrayRef<long>) + 0x3d4 (0x79d311ec39a4 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libtorch_cpu.so)
frame #3: at::TensorIteratorBase::compute_shape(at::TensorIteratorConfig const&) + 0xc0 (0x79d311f7eab0 in /home/local/USHERBROOKE/ho
in the output above we see that loss=torch::nn_cross_entropy_loss
works without error, but loss=torch::nn_bce_with_logits_loss
gives a tensor size error.
I guess this is because MLP learner is always outputing a matrix with 2 columns? (even for the binary case where it could output 1 column or just a 1d tensor instead)
Is it possible in mlr3torch to have a neural network that outputs a 1d tensor instead?
I tried using graph learner code below, with nn_linear layer that has out_features=1
.
ce_po_list <- list(
mlr3pipelines::po(
"select",
selector = mlr3pipelines::selector_type(c("numeric", "integer"))),
mlr3torch::PipeOpTorchIngressNumeric$new(),
mlr3pipelines::po(
"nn_linear",
out_features=1),
mlr3pipelines::po(
"torch_loss",
torch::nn_cross_entropy_loss),
mlr3pipelines::po(
"torch_optimizer",
mlr3torch::t_opt("sgd", lr=0.1)),
mlr3pipelines::po(
"torch_model_classif",
batch_size = 5,
epochs = 2)
)
ce_graph_obj <- Reduce(mlr3pipelines::concat_graphs, ce_po_list)
ce_graph_learner <- mlr3::as_learner(ce_graph_obj)
ce_graph_learner$train(task_sonar)
bce_po_list <- list(
mlr3pipelines::po(
"select",
selector = mlr3pipelines::selector_type(c("numeric", "integer"))),
mlr3torch::PipeOpTorchIngressNumeric$new(),
mlr3pipelines::po(
"nn_linear",
out_features=1),
mlr3pipelines::po(
"torch_loss",
torch::nn_bce_with_logits_loss),
mlr3pipelines::po(
"torch_optimizer",
mlr3torch::t_opt("sgd", lr=0.1)),
mlr3pipelines::po(
"torch_model_classif",
batch_size = 5,
epochs = 2)
)
bce_graph_obj <- Reduce(mlr3pipelines::concat_graphs, bce_po_list)
bce_graph_learner <- mlr3::as_learner(bce_graph_obj)
bce_graph_learner$train(task_sonar)
I observe two different errors for the two different loss functions:
> ce_graph_learner$train(task_sonar)
Erreur dans (function (self, target, weight, reduction, ignore_index, label_smoothing) :
Target 2 is out of bounds.
Exception raised from nll_loss_out_frame at ../aten/src/ATen/native/LossNLL.cpp:251 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0xb0 (0x79d326994120 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libc10.so)
frame #1: <unknown function> + 0x118ac3a (0x79d31178ac3a in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libtorch_cpu.so)
frame #2: at::native::structured_nll_loss_forward_out_cpu::impl(at::Tensor const&, at::Tensor const&, at::OptionalTensorRef, long, long, at::Tensor const&, at::Tensor const&) + 0x779 (0x79d3124c92a9 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libtorch_cpu.so)
frame #3: <unknown function> + 0x2eaf9d2 (0x79d3134af9d2 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0x2eafb45 (0x79d3134afb45 in /home/local/USHERBROOKE
> bce_graph_learner$train(task_sonar)
Erreur dans (function (self, target, weight, pos_weight, reduction) :
output with shape [5] doesn't match the broadcast shape [5, 5]
Exception raised from mark_resize_outputs at ../aten/src/ATen/TensorIterator.cpp:1207 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0xb0 (0x79d326994120 in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0xfa (0x79d326937a5a in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libc10.so)
frame #2: at::TensorIteratorBase::mark_resize_outputs(at::TensorIteratorConfig const&) + 0x21d (0x79d311f7fd0d in /home/local/USHERBROOKE/hoct2726/lib/R/library/torch/lib/libtorch_cpu.so)
frame #3: at::TensorIteratorBase::build(at::TensorIteratorConfig&) + 0x78 (0x79d311f7fda8 in /home/local/USHERBROOKE/hoct2726/lib/R/library/t
So I guess this means that it is not currently supported?
for positive control, I tried changing out_features=2
in the code above, and in that case I observe the same result as MLP learner (cross entropy loss works, error for bce_with_logits_loss).