deeppavlov
diff --git a/‎autointent/datafiles/default-multilabel-config.yaml‎
Lines changed: 3 additions & 2 deletions b/‎autointent/datafiles/default-multilabel-config.yaml‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎autointent/modules/__init__.py‎
Lines changed: 5 additions & 1 deletion b/‎autointent/modules/__init__.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎autointent/modules/prediction/__init__.py‎
Lines changed: 9 additions & 1 deletion b/‎autointent/modules/prediction/__init__.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎autointent/modules/prediction/adaptive.py‎
Lines changed: 108 additions & 0 deletions b/‎autointent/modules/prediction/adaptive.py‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎autointent/modules/prediction/argmax.py‎
Lines changed: 31 additions & 6 deletions b/‎autointent/modules/prediction/argmax.py‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎autointent/modules/prediction/base.py‎
Lines changed: 0 additions & 37 deletions b/‎autointent/modules/prediction/base.py‎
Lines changed: 0 additions & 37 deletions
diff --git a/‎autointent/modules/prediction/jinoos.py‎
Lines changed: 14 additions & 1 deletion b/‎autointent/modules/prediction/jinoos.py‎
Lines changed: 14 additions & 1 deletion
@@ -5,7 +5,7 @@ nodes:
     search_space:
       - module_type: vector_db
         k: [10]
-        model_name:
+        embedder_name:
           - deepvk/USER-bge-m3
   - node_type: scoring
     metric: scoring_roc_auc
@@ -18,4 +18,5 @@ nodes:
     metric: prediction_accuracy
     search_space:
       - module_type: threshold
-        thresh: [0.5]
+        thresh: [0.5]
+      - module_type: adaptive
@@ -2,6 +2,7 @@
 
 from .base import Module
 from .prediction import (
+    AdaptivePredictor,
     ArgmaxPredictor,
     JinoosPredictor,
     PredictionModule,
@@ -35,10 +36,13 @@ def create_modules_dict(modules: list[type[T]]) -> dict[str, type[T]]:
     [ArgmaxPredictor, JinoosPredictor, ThresholdPredictor, TunablePredictor]
 )
 
-PREDICTION_MODULES_MULTILABEL: dict[str, type[Module]] = create_modules_dict([ThresholdPredictor, TunablePredictor])
+PREDICTION_MODULES_MULTILABEL: dict[str, type[Module]] = create_modules_dict(
+    [AdaptivePredictor, ThresholdPredictor, TunablePredictor]
+)
 
 __all__ = [
     "Module",
+    "AdaptivePredictor",
     "ArgmaxPredictor",
     "JinoosPredictor",
     "PredictionModule",
 
@@ -1,7 +1,15 @@
+from .adaptive import AdaptivePredictor
 from .argmax import ArgmaxPredictor
 from .base import PredictionModule
 from .jinoos import JinoosPredictor
 from .threshold import ThresholdPredictor
 from .tunable import TunablePredictor
 
-__all__ = ["ArgmaxPredictor", "JinoosPredictor", "PredictionModule", "ThresholdPredictor", "TunablePredictor"]
+__all__ = [
+    "AdaptivePredictor",
+    "ArgmaxPredictor",
+    "JinoosPredictor",
+    "PredictionModule",
+    "ThresholdPredictor",
+    "TunablePredictor",
+]
@@ -0,0 +1,108 @@
+import json
+from pathlib import Path
+from typing import Any, TypedDict
+
+import numpy as np
+import numpy.typing as npt
+from sklearn.metrics import f1_score
+from typing_extensions import Self
+
+from autointent import Context
+from autointent.context.data_handler import Tag
+from autointent.custom_types import LabelType
+from autointent.metrics.converter import transform
+
+from .base import PredictionModule
+from .utils import InvalidNumClassesError, WrongClassificationError, apply_tags
+
+default_search_space = np.linspace(0, 1, num=10)
+
+
+class AdaptivePredictorDumpMetadata(TypedDict):
+    r: float
+    tags: list[Tag] | None
+    n_classes: int
+
+
+class AdaptivePredictor(PredictionModule):
+    metadata_dict_name = "metadata.json"
+    n_classes: int
+    _r: float
+    tags: list[Tag] | None
+    name = "adaptive"
+
+    def __init__(self, search_space: list[float] | None = None) -> None:
+        self.search_space = search_space if search_space is not None else default_search_space
+
+    @classmethod
+    def from_context(cls, context: Context, search_space: list[float] | None = None) -> Self:
+        return cls(
+            search_space=search_space,
+        )
+
+    def fit(
+        self,
+        scores: npt.NDArray[Any],
+        labels: list[LabelType],
+        tags: list[Tag] | None = None,
+    ) -> None:
+        self.tags = tags
+        multilabel = isinstance(labels[0], list)
+        if not multilabel:
+            msg = """AdaptivePredictor is not designed to perform multiclass classification,
+            consider using other predictor algorithms"""
+            raise WrongClassificationError(msg)
+        self.n_classes = (
+            len(labels[0]) if multilabel and isinstance(labels[0], list) else len(set(labels).difference([-1]))
+        )
+
+        metrics_list = []
+        for r in self.search_space:
+            y_pred = multilabel_predict(scores, r, self.tags)
+            metric_value = multilabel_score(labels, y_pred)
+            metrics_list.append(metric_value)
+
+        self._r = float(self.search_space[np.argmax(metrics_list)])
+
+    def predict(self, scores: npt.NDArray[Any]) -> npt.NDArray[Any]:
+        if scores.shape[1] != self.n_classes:
+            msg = "Provided scores number don't match with number of classes which predictor was trained on."
+            raise InvalidNumClassesError(msg)
+        return multilabel_predict(scores, self._r, self.tags)
+
+    def dump(self, path: str) -> None:
+        dump_dir = Path(path)
+
+        metadata = AdaptivePredictorDumpMetadata(r=self._r, tags=self.tags, n_classes=self.n_classes)
+
+        with (dump_dir / self.metadata_dict_name).open("w") as file:
+            json.dump(metadata, file, indent=4)
+
+    def load(self, path: str) -> None:
+        dump_dir = Path(path)
+
+        with (dump_dir / self.metadata_dict_name).open() as file:
+            metadata: AdaptivePredictorDumpMetadata = json.load(file)
+
+        self._r = metadata["r"]
+        self.n_classes = metadata["n_classes"]
+        self.tags = [Tag(**tag) for tag in metadata["tags"] if metadata["tags"] and isinstance(metadata["tags"], list)]  # type: ignore[arg-type, union-attr]
+        self.metadata = metadata
+
+
+def get_adapted_threshes(r: float, scores: npt.NDArray[Any]) -> npt.NDArray[Any]:
+    return r * np.max(scores, axis=1) + (1 - r) * np.min(scores, axis=1)  # type: ignore[no-any-return]
+
+
+def multilabel_predict(scores: npt.NDArray[Any], r: float, tags: list[Tag] | None) -> npt.NDArray[Any]:
+    thresh = get_adapted_threshes(r, scores)
+    res = (scores >= thresh[:, None]).astype(int)  # suspicious
+    if tags:
+        res = apply_tags(res, scores, tags)
+    return res
+
+
+def multilabel_score(y_true: list[LabelType], y_pred: npt.NDArray[Any]) -> float:
+    y_true_, y_pred_ = transform(y_true, y_pred)
+
+    return f1_score(y_pred_, y_true_, average="weighted")  # type: ignore[no-any-return]
@@ -1,3 +1,5 @@
+import json
+from pathlib import Path
 from typing import Any
 
 import numpy as np
@@ -6,14 +8,19 @@
 
 from autointent import Context
 from autointent.context.data_handler import Tag
-from autointent.custom_types import LabelType
+from autointent.custom_types import BaseMetadataDict, LabelType
 
 from .base import PredictionModule
+from .utils import InvalidNumClassesError, WrongClassificationError
+
+
+class ArgmaxPredictorDumpMetadata(BaseMetadataDict):
+    n_classes: int
 
 
 class ArgmaxPredictor(PredictionModule):
-    metadata = {}  # noqa: RUF012
     name = "argmax"
+    n_classes: int
 
     def __init__(self) -> None:
         pass
@@ -28,13 +35,31 @@ def fit(
         labels: list[LabelType],
         tags: list[Tag] | None = None,
     ) -> None:
-        pass
+        multilabel = isinstance(labels[0], list)
+        if multilabel:
+            msg = "ArgmaxPredictor is compatible with single-label classifiction only"
+            raise WrongClassificationError(msg)
+        self.n_classes = len(set(labels).difference([-1]))
 
     def predict(self, scores: npt.NDArray[Any]) -> npt.NDArray[Any]:
+        if scores.shape[1] != self.n_classes:
+            msg = "Provided scores number don't match with number of classes which predictor was trained on."
+            raise InvalidNumClassesError(msg)
         return np.argmax(scores, axis=1)  # type: ignore[no-any-return]
 
+    def dump(self, path: str) -> None:
+        self.metadata = ArgmaxPredictorDumpMetadata(n_classes=self.n_classes)
+
+        dump_dir = Path(path)
+
+        with (dump_dir / self.metadata_dict_name).open("w") as file:
+            json.dump(self.metadata, file, indent=4)
+
     def load(self, path: str) -> None:
-        pass
+        dump_dir = Path(path)
 
-    def dump(self, path: str) -> None:
-        pass
+        with (dump_dir / self.metadata_dict_name).open() as file:
+            metadata: ArgmaxPredictorDumpMetadata = json.load(file)
+
+        self.n_classes = metadata["n_classes"]
+        self.metadata = metadata
@@ -58,40 +58,3 @@ def get_prediction_evaluation_data(
         return_scores = np.concatenate([scores, oos_scores])
 
     return labels.tolist(), return_scores
-
-
-def apply_tags(labels: npt.NDArray[Any], scores: npt.NDArray[Any], tags: list[Tag]) -> npt.NDArray[Any]:
-    """
-    this function is intended to be used with multilabel predictor
-
-    If some intent classes have common tag (i.e. they are mutually exclusive) \
-    and were assigned to one sample, leave only that class that has the highest score.
-
-    Arguments
-    ---
-    - `labels`: np.ndarray of shape (n_samples, n_classes) with binary labels
-    - `scores`: np.ndarray of shape (n_samples, n_classes) with float values from 0..1
-    - `tags`: list of Tags
-
-    Return
-    ---
-    np.ndarray of shape (n_samples, n_classes) with binary labels
-    """
-
-    n_samples, _ = labels.shape
-    res = np.copy(labels)
-
-    for i in range(n_samples):
-        sample_labels = labels[i].astype(bool)
-        sample_scores = scores[i]
-
-        for tag in tags:
-            if any(sample_labels[idx] for idx in tag.intent_ids):
-                # Find the index of the class with the highest score among the tagged indices
-                max_score_index = max(tag.intent_ids, key=lambda idx: sample_scores[idx])
-                # Set all other tagged indices to 0 in the res
-                for idx in tag.intent_ids:
-                    if idx != max_score_index:
-                        res[i, idx] = 0
-
-    return res
@@ -12,17 +12,20 @@
 from autointent.metrics.converter import transform
 
 from .base import PredictionModule
+from .utils import InvalidNumClassesError, WrongClassificationError
 
 default_search_space = np.linspace(0, 1, num=100)
 
 
 class JinoosPredictorDumpMetadata(BaseMetadataDict):
     thresh: float
+    n_classes: int
 
 
 class JinoosPredictor(PredictionModule):
     thresh: float
     name = "jinoos"
+    n_classes: int
 
     def __init__(
         self,
@@ -45,6 +48,12 @@ def fit(
         """
         TODO: use dev split instead of test split
         """
+        multilabel = isinstance(labels[0], list)
+        if multilabel:
+            msg = "JinoosPredictor is compatible with single-label classification only"
+            raise WrongClassificationError(msg)
+        self.n_classes = len(set(labels).difference([-1]))
+
         pred_classes, best_scores = _predict(scores)
 
         metrics_list: list[float] = []
@@ -56,11 +65,14 @@ def fit(
         self.thresh = float(self.search_space[np.argmax(metrics_list)])
 
     def predict(self, scores: npt.NDArray[Any]) -> npt.NDArray[Any]:
+        if scores.shape[1] != self.n_classes:
+            msg = "Provided scores number don't match with number of classes which predictor was trained on."
+            raise InvalidNumClassesError(msg)
         pred_classes, best_scores = _predict(scores)
         return _detect_oos(pred_classes, best_scores, self.thresh)
 
     def dump(self, path: str) -> None:
-        self.metadata = JinoosPredictorDumpMetadata(thresh=self.thresh)
+        self.metadata = JinoosPredictorDumpMetadata(thresh=self.thresh, n_classes=self.n_classes)
 
         dump_dir = Path(path)
 
@@ -75,6 +87,7 @@ def load(self, path: str) -> None:
 
         self.thresh = metadata["thresh"]
         self.metadata = metadata
+        self.n_classes = metadata["n_classes"]
 
 
 def _predict(scores: npt.NDArray[np.float64]) -> tuple[npt.NDArray[np.int64], npt.NDArray[np.float64]]: