deeppavlov
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.vscode/settings.json‎
Lines changed: 6 additions & 1 deletion b/‎.vscode/settings.json‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 16 additions & 1 deletion b/‎README.md‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎docs/optimizer_config.schema.json‎
Lines changed: 1 addition & 8 deletions b/‎docs/optimizer_config.schema.json‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎src/autointent/_dump_tools/main.py‎
Lines changed: 3 additions & 0 deletions b/‎src/autointent/_dump_tools/main.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/autointent/_dump_tools/unit_dumpers.py‎
Lines changed: 6 additions & 6 deletions b/‎src/autointent/_dump_tools/unit_dumpers.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/autointent/_wrappers/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎src/autointent/_wrappers/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/autointent/_wrappers/base_torch_module.py‎
Lines changed: 51 additions & 42 deletions b/‎src/autointent/_wrappers/base_torch_module.py‎
Lines changed: 51 additions & 42 deletions
@@ -182,3 +182,5 @@ vector_db*
 *.db
 *.sqlite
 /wandb
+model_output/
+my.py
@@ -8,5 +8,10 @@
             "*.yaml",
             "!*/.github/*/*.yaml"
         ]
-    }
+    },
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
 }
@@ -7,7 +7,7 @@ Auto ML for intent classification.
 
 Documentation: [deeppavlov.github.io/AutoIntent](https://deeppavlov.github.io/AutoIntent/).
 
-> The project is under active development.
+The project is under active development.
 
 ## Installation
 
@@ -35,6 +35,21 @@ pipeline.fit(dataset)
 pipeline.predict(["show me my latest transactions"])
 ```
 
+## Cite
+
+If you find our work useful, please cite our EMNLP 2025 [paper](https://arxiv.org/abs/2509.21138):
+```
+@misc{alekseev2025autointentautomltextclassification,
+      title={AutoIntent: AutoML for Text Classification}, 
+      author={Ilya Alekseev and Roman Solomatin and Darina Rustamova and Denis Kuznetsov},
+      year={2025},
+      eprint={2509.21138},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2509.21138}, 
+}
+```
+
 ## Disclaimer
 
 This project is in development phase. Bugs and breaking changes are expected. Contributions and feedback are welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md).
 
@@ -266,12 +266,6 @@
                     "description": "Whether to use embeddings caching.",
                     "title": "Use Cache",
                     "type": "boolean"
-                },
-                "freeze": {
-                    "default": true,
-                    "description": "Whether to freeze the model parameters.",
-                    "title": "Freeze",
-                    "type": "boolean"
                 }
             },
             "title": "EmbedderConfig",
@@ -578,8 +572,7 @@
                 "query_prompt": null,
                 "passage_prompt": null,
                 "similarity_fn_name": "cosine",
-                "use_cache": true,
-                "freeze": true
+                "use_cache": true
             }
         },
         "cross_encoder_config": {
 
@@ -4,6 +4,7 @@
 
 import numpy as np
 import numpy.typing as npt
+import torch
 
 from autointent.configs import CrossEncoderConfig, EmbedderConfig
 from autointent.context.optimization_info import Artifact
@@ -108,6 +109,8 @@ def dump(
                 simple_attrs[key] = val
             elif isinstance(val, np.ndarray):
                 arrays[key] = val
+            elif isinstance(val, torch.Tensor):
+                arrays[key] = val.cpu().numpy()
             else:
                 # Use the appropriate dumper for complex objects
                 Dumper._dump_single_object(key, val, path, exists_ok, raise_errors)
 
@@ -21,7 +21,7 @@
 )
 
 from autointent import Embedder, Ranker, VectorIndex
-from autointent._wrappers import BaseTorchModuleWithVocab
+from autointent._wrappers import BaseTorchModule
 from autointent.schemas import TagsList
 
 from .base import BaseObjectDumper, ModuleSimpleAttributes
@@ -276,11 +276,11 @@ def check_isinstance(cls, obj: Any) -> bool:  # noqa: ANN401
         return isinstance(obj, PreTrainedTokenizer | PreTrainedTokenizerFast)
 
 
-class TorchModelDumper(BaseObjectDumper[BaseTorchModuleWithVocab]):
+class TorchModelDumper(BaseObjectDumper[BaseTorchModule]):
     dir_or_file_name = "torch_models"
 
     @staticmethod
-    def dump(obj: BaseTorchModuleWithVocab, path: Path, exists_ok: bool) -> None:
+    def dump(obj: BaseTorchModule, path: Path, exists_ok: bool) -> None:
         path.mkdir(parents=True, exist_ok=exists_ok)
         class_info = {
             "module": obj.__class__.__module__,
@@ -291,16 +291,16 @@ def dump(obj: BaseTorchModuleWithVocab, path: Path, exists_ok: bool) -> None:
         obj.dump(path)
 
     @staticmethod
-    def load(path: Path, **kwargs: Any) -> BaseTorchModuleWithVocab:  # noqa: ANN401, ARG004
+    def load(path: Path, **kwargs: Any) -> BaseTorchModule:  # noqa: ANN401, ARG004
         with (path / "class_info.json").open("r") as f:
             class_info = json.load(f)
         module = importlib.import_module(class_info["module"])
-        model_class: BaseTorchModuleWithVocab = getattr(module, class_info["name"])
+        model_class: BaseTorchModule = getattr(module, class_info["name"])
         return model_class.load(path)
 
     @classmethod
     def check_isinstance(cls, obj: Any) -> bool:  # noqa: ANN401
-        return isinstance(obj, BaseTorchModuleWithVocab)
+        return isinstance(obj, BaseTorchModule)
 
 
 class CatBoostDumper(BaseObjectDumper[CatBoostClassifier]):
 
@@ -2,5 +2,6 @@
 from .embedder import Embedder
 from .vector_index import VectorIndex
 from .base_torch_module import BaseTorchModuleWithVocab
+from .base_torch_module import BaseTorchModule
 
-__all__ = ["BaseTorchModuleWithVocab", "Embedder", "Ranker", "VectorIndex"]
+__all__ = ["BaseTorchModule", "BaseTorchModuleWithVocab", "Embedder", "Ranker", "VectorIndex"]
@@ -13,10 +13,52 @@
 from autointent.configs import VocabConfig
 
 
-class BaseTorchModuleWithVocab(nn.Module, ABC):
+class BaseTorchModule(nn.Module, ABC):
+    @abstractmethod
+    def forward(self, text: torch.Tensor) -> torch.Tensor:
+        """Compute sentence embeddings for given text.
+
+        Args:
+            text: torch tensor of shape (B, T), token ids
+
+        Returns:
+            embeddings of shape (B, H)
+        """
+
+    @abstractmethod
+    def dump(self, path: Path) -> None:
+        """Dump torch module to disk.
+
+        This method encapsulates all the logic of dumping module's weights and
+        hyperparameters required for initialization from disk and nice inference.
+
+        Args:
+            path: path in file system
+        """
+
+    @classmethod
+    @abstractmethod
+    def load(cls, path: Path, device: str | None = None) -> Self:
+        """Load torch module from disk.
+
+        This method loads all weights and hyperparameters required for
+        initialization from disk and inference.
+
+        Args:
+            path: path in file system
+            device: torch notation for CPU, CUDA, MPS, etc. By default, it is inferred automatically.
+        """
+
+    @property
+    def device(self) -> torch.device:
+        """Torch device object where this module resides."""
+        return next(self.parameters()).device
+
+
+class BaseTorchModuleWithVocab(BaseTorchModule, ABC):
     def __init__(
         self,
-        embed_dim: int,
+        embed_dim: int | None = None,
         vocab_config: VocabConfig | None = None,
     ) -> None:
         super().__init__()
@@ -34,6 +76,9 @@ def __init__(
 
     def set_vocab(self, vocab: dict[str, Any]) -> None:
         """Save vocabulary into module's attributes and initialize embeddings matrix."""
+        if self.embed_dim is None:
+            msg = "embed_dim must be set to initialize embeddings"
+            raise ValueError(msg)
         self.vocab_config.vocab = vocab
         self.embedding = nn.Embedding(
             num_embeddings=len(self.vocab_config.vocab),
@@ -43,6 +88,10 @@ def set_vocab(self, vocab: dict[str, Any]) -> None:
 
     def build_vocab(self, utterances: list[str]) -> None:
         """Build vocabulary from training utterances."""
+        if self.embed_dim is None:
+            msg = "embed_dim must be set to initialize embeddings"
+            raise ValueError(msg)
+
         if self.vocab_config.vocab is not None:
             msg = "Vocab is already built."
             raise RuntimeError(msg)
@@ -80,43 +129,3 @@ def text_to_indices(self, utterances: list[str]) -> list[list[int]]:
             seq = seq + [self.vocab_config.padding_idx] * (self.vocab_config.max_seq_length - len(seq))
             sequences.append(seq)
         return sequences
-
-    @abstractmethod
-    def forward(self, text: torch.Tensor) -> torch.Tensor:
-        """Compute sentence embeddings for given text.
-
-        Args:
-            text: torch tensor of shape (B, T), token ids
-
-        Returns:
-            embeddings of shape (B, H)
-        """
-
-    @abstractmethod
-    def dump(self, path: Path) -> None:
-        """Dump torch module to disk.
-
-        This method encapsulates all the logic of dumping module's weights and
-        hyperparameters required for initialization from disk and nice inference.
-
-        Args:
-            path: path in file system
-        """
-
-    @classmethod
-    @abstractmethod
-    def load(cls, path: Path, device: str | None = None) -> Self:
-        """Load torch module from disk.
-
-        This method loads all weights and hyperparameters required for
-        initialization from disk and inference.
-
-        Args:
-            path: path in file system
-            device: torch notation for CPU, CUDA, MPS, etc. By default, it is inferred automatically.
-        """
-
-    @property
-    def device(self) -> torch.device:
-        """Torch device object where this module resides."""
-        return next(self.parameters()).device
Original file line number	Diff line number	Diff line change
`@@ -8,5 +8,10 @@`
`8`	`8`	`"*.yaml",`
`9`	`9`	`"!/.github//*.yaml"`
`10`	`10`	`]`
`11`		`- }`
	`11`	`+ },`
	`12`	`+ "python.testing.pytestArgs": [`
	`13`	`+ "."`
	`14`	`+ ],`
	`15`	`+ "python.testing.unittestEnabled": false,`
	`16`	`+ "python.testing.pytestEnabled": true`
`12`	`17`	`}`