Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/reusable-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ on:
required: true
type: string
description: 'Command to run tests'
extras:
required: false
type: string
default: ''
description: 'Space-separated --extra flags (e.g., "--extra transformers --extra peft")'

jobs:
test:
Expand Down Expand Up @@ -39,7 +44,7 @@ jobs:
- name: Install dependencies for Python ${{ matrix.python-version }}
run: |
uv python pin ${{ matrix.python-version }}
uv sync --group test
uv sync --group test ${{ inputs.extras }}
- name: Run tests
run: |
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/test-embedder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: test embedder

on:
push:
branches:
- dev
pull_request:

jobs:
test:
uses: ./.github/workflows/reusable-test.yaml
with:
test_command: pytest -n auto tests/embedder/
extras: --extra sentence-transformers

1 change: 1 addition & 0 deletions .github/workflows/test-inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ jobs:
uses: ./.github/workflows/reusable-test.yaml
with:
test_command: pytest -n auto tests/pipeline/test_inference.py
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers
1 change: 1 addition & 0 deletions .github/workflows/test-optimization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ jobs:
uses: ./.github/workflows/reusable-test.yaml
with:
test_command: pytest -n auto tests/pipeline/test_optimization.py
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers
1 change: 1 addition & 0 deletions .github/workflows/test-presets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ jobs:
uses: ./.github/workflows/reusable-test.yaml
with:
test_command: pytest -n auto tests/pipeline/test_presets.py
extras: --extra catboost --extra peft --extra transformers --extra sentence-transformers
47 changes: 47 additions & 0 deletions .github/workflows/test-scorers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: test scorers

on:
push:
branches:
- dev
pull_request:

jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ ubuntu-latest ]
python-version: [ "3.10", "3.11", "3.12" ]
dependency-group: [ "base", "transformers", "peft", "catboost" ]
include:
- os: windows-latest
python-version: "3.10"
dependency-group: "base"

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Cache Hugging Face
id: cache-hf
uses: actions/cache@v4
with:
path: ~/.cache/huggingface
key: ${{ runner.os }}-hf

- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.8.8"

- name: Install dependencies for Python ${{ matrix.python-version }}
run: |
uv python pin ${{ matrix.python-version }}
uv sync --group test ${{ matrix.dependency-group != 'base' && format('--extra {0}', matrix.dependency-group) || '' }}

- name: Run scorer tests
run: |
uv run pytest -n auto tests/modules/scoring/

2 changes: 1 addition & 1 deletion .github/workflows/typing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Install dependencies
run: |
uv lock
uv sync --group typing
uv sync --group typing --extra peft --extra sentence-transformers
- name: Run mypy
run: uv run mypy src/autointent
2 changes: 1 addition & 1 deletion .github/workflows/unit-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ jobs:
test:
uses: ./.github/workflows/reusable-test.yaml
with:
test_command: pytest -n auto --ignore=tests/nodes --ignore=tests/pipeline
test_command: pytest -n auto --ignore=tests/modules/scoring/ --ignore=tests/pipeline --ignore=tests/embedder
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ dependencies = [
[project.optional-dependencies]
catboost = ["catboost (>=1.2.8,<2.0.0)"]
peft = ["peft (>= 0.10.0, !=0.15.0, !=0.15.1, <1.0.0)"]
transformers = ["transformers (>=4.49.0,<5.0.0)"]
transformers = [
"transformers[torch] (>=4.49.0,<5.0.0)",
]
sentence-transformers = ["sentence-transformers (>=3,<4)"]
dspy = [
"dspy (>=2.6.5,<3.0.0)",
Expand Down
9 changes: 5 additions & 4 deletions src/autointent/_wrappers/embedder/hashing_vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_hash(self) -> int:
hasher.update(self.config.norm if self.config.norm is not None else "None")
hasher.update(self.config.binary)
hasher.update(self.config.dtype)
return hasher.hexdigest()
return int(hasher.hexdigest(), 16)

@overload
def embed(
Expand Down Expand Up @@ -97,7 +97,7 @@ def embed(
"""
# Transform texts to sparse matrix, then convert to dense
embeddings_sparse = self._vectorizer.transform(utterances)
embeddings = embeddings_sparse.toarray().astype(np.float32)
embeddings: npt.NDArray[np.float32] = embeddings_sparse.toarray().astype(np.float32)

if return_tensors:
return torch.from_numpy(embeddings)
Expand All @@ -115,7 +115,8 @@ def similarity(
Returns:
Similarity matrix with shape (n_samples, m_samples).
"""
return cosine_similarity(embeddings1, embeddings2).astype(np.float32)
similarity_matrix: npt.NDArray[np.float32] = cosine_similarity(embeddings1, embeddings2).astype(np.float32)
return similarity_matrix

def dump(self, path: Path) -> None:
"""Save the backend state to disk.
Expand Down Expand Up @@ -157,7 +158,7 @@ def load(cls, path: Path) -> "HashingVectorizerEmbeddingBackend":
logger.debug("Loaded HashingVectorizer backend from %s", path)
return instance

def train(self, utterances: list[str], labels: list[int], config) -> None: # noqa: ANN001
def train(self, utterances: list[str], labels: list[int], config) -> None: # noqa: ANN001 # type: ignore[no-untyped-def]
"""Train the backend.

HashingVectorizer is stateless and doesn't support training.
Expand Down
5 changes: 3 additions & 2 deletions tests/modules/decision/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from autointent.context.data_handler import DataHandler
from autointent.modules import KNNScorer
from tests.conftest import get_test_embedder_config


@pytest.fixture
Expand All @@ -12,7 +13,7 @@ def multiclass_fit_data(dataset):
knn_params = {
"k": 3,
"weights": "distance",
"embedder_config": "sergeyzh/rubert-tiny-turbo",
"embedder_config": get_test_embedder_config(),
}
scorer = KNNScorer(**knn_params)

Expand All @@ -29,7 +30,7 @@ def multilabel_fit_data(dataset):
knn_params = {
"k": 3,
"weights": "distance",
"embedder_config": "sergeyzh/rubert-tiny-turbo",
"embedder_config": get_test_embedder_config(),
}
scorer = KNNScorer(**knn_params)

Expand Down
25 changes: 25 additions & 0 deletions tests/modules/scoring/test_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest

from autointent import Pipeline
from autointent.context.data_handler import DataHandler
from autointent.modules import BertScorer

Expand Down Expand Up @@ -115,3 +116,27 @@ def test_bert_cache_clearing(dataset):
# Should raise exception after clearing cache
with pytest.raises(RuntimeError):
scorer.predict(test_data)


def test_bert_in_pipeline(dataset):
"""Test BertScorer as part of an AutoML pipeline."""
search_space = [
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{
"module_name": "bert",
"classification_model_config": [{"model_name": "prajjwal1/bert-tiny"}],
"num_train_epochs": [1],
"batch_size": [8],
}
],
},
{"node_type": "decision", "target_metric": "decision_accuracy", "search_space": [{"module_name": "argmax"}]},
]

pipeline = Pipeline.from_search_space(search_space)
pipeline.fit(dataset)
predictions = pipeline.predict(["test utterance"])
assert len(predictions) == 1
37 changes: 32 additions & 5 deletions tests/modules/scoring/test_catboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest

from autointent import Pipeline
from autointent.context.data_handler import DataHandler
from autointent.modules import CatBoostScorer
from tests.conftest import get_test_embedder_config
Expand All @@ -17,6 +18,7 @@ def test_catboost_scorer_dump_load(dataset):
data_handler = DataHandler(dataset)

scorer_original = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
depth=6,
Expand Down Expand Up @@ -82,11 +84,11 @@ def test_catboost_prediction_multilabel(dataset):
predictions,
np.array(
[
[0.41777172, 0.5278134, 0.41807876, 0.4174544],
[0.40775846, 0.46434019, 0.42728555, 0.43836945],
[0.4207232, 0.49201536, 0.42798494, 0.41541217],
[0.46765036, 0.45065999, 0.49705517, 0.45052473],
[0.41694272, 0.54160408, 0.40944069, 0.41674984],
[0.37150982, 0.5935175, 0.36279131, 0.37357718],
[0.37309364, 0.53746911, 0.38326219, 0.39884488],
[0.37744044, 0.56529594, 0.37456834, 0.38646843],
[0.41484185, 0.48539558, 0.41669755, 0.42929345],
[0.38344306, 0.58516115, 0.37940454, 0.39640789],
]
),
rtol=0.01,
Expand Down Expand Up @@ -132,6 +134,7 @@ def test_catboost_cache_clearing(dataset):
"""Test that the transformer model properly handles cache clearing."""
data_handler = DataHandler(dataset)
scorer = CatBoostScorer(
embedder_config=get_test_embedder_config(),
iterations=50,
learning_rate=0.05,
depth=6,
Expand All @@ -146,3 +149,27 @@ def test_catboost_cache_clearing(dataset):
scorer.clear_cache()
with pytest.raises(RuntimeError):
scorer.predict(test_data)


def test_catboost_in_pipeline(dataset):
"""Test CatBoostScorer as part of an AutoML pipeline."""
search_space = [
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{
"module_name": "catboost",
"iterations": [50],
"learning_rate": [0.05],
"features_type": ["embedding"],
}
],
},
{"node_type": "decision", "target_metric": "decision_accuracy", "search_space": [{"module_name": "argmax"}]},
]

pipeline = Pipeline.from_search_space(search_space)
pipeline.fit(dataset)
predictions = pipeline.predict(["test utterance"])
assert len(predictions) == 1
24 changes: 24 additions & 0 deletions tests/modules/scoring/test_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import pytest

from autointent import Pipeline
from autointent.configs import VocabConfig
from autointent.context.data_handler import DataHandler
from autointent.modules.scoring import CNNScorer
Expand Down Expand Up @@ -120,3 +121,26 @@ def test_cnn_scorer_dump_load(dataset):
finally:
# Clean up
shutil.rmtree(temp_dir_path, ignore_errors=True) # workaround for windows permission error


def test_cnn_in_pipeline(dataset):
"""Test CNNScorer as part of an AutoML pipeline."""
search_space = [
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{
"module_name": "cnn",
"embed_dim": [8],
"num_train_epochs": [1],
}
],
},
{"node_type": "decision", "target_metric": "decision_accuracy", "search_space": [{"module_name": "argmax"}]},
]

pipeline = Pipeline.from_search_space(search_space)
pipeline.fit(dataset)
predictions = pipeline.predict(["test utterance"])
assert len(predictions) == 1
24 changes: 24 additions & 0 deletions tests/modules/scoring/test_description_bi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from autointent import Pipeline
from autointent.context.data_handler import DataHandler
from autointent.modules import BiEncoderDescriptionScorer

Expand Down Expand Up @@ -56,3 +57,26 @@ def test_description_scorer(dataset, expected_prediction, multilabel):
new_scorer = BiEncoderDescriptionScorer.load(temp_dir)
new_predictions = new_scorer.predict(test_utterances)
np.testing.assert_almost_equal(predictions, new_predictions, decimal=5)


def test_description_bi_in_pipeline(dataset):
"""Test BiEncoderDescriptionScorer as part of an AutoML pipeline."""
search_space = [
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{
"module_name": "description_bi",
"embedder_config": [{"model_name": "sergeyzh/rubert-tiny-turbo"}],
"temperature": [0.3],
}
],
},
{"node_type": "decision", "target_metric": "decision_accuracy", "search_space": [{"module_name": "argmax"}]},
]

pipeline = Pipeline.from_search_space(search_space)
pipeline.fit(dataset)
predictions = pipeline.predict(["test utterance"])
assert len(predictions) == 1
24 changes: 24 additions & 0 deletions tests/modules/scoring/test_description_cross.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pytest

from autointent import Pipeline
from autointent.context.data_handler import DataHandler
from autointent.modules import CrossEncoderDescriptionScorer

Expand Down Expand Up @@ -64,3 +65,26 @@ def test_description_scorer_cross_encoder(dataset, expected_prediction, multilab
np.testing.assert_almost_equal(predictions, loaded_predictions, decimal=5)

new_scorer.clear_cache()


def test_description_cross_in_pipeline(dataset):
"""Test CrossEncoderDescriptionScorer as part of an AutoML pipeline."""
search_space = [
{
"node_type": "scoring",
"target_metric": "scoring_roc_auc",
"search_space": [
{
"module_name": "description_cross",
"cross_encoder_config": [{"model_name": "cross-encoder/ms-marco-MiniLM-L6-v2"}],
"temperature": [0.3],
}
],
},
{"node_type": "decision", "target_metric": "decision_accuracy", "search_space": [{"module_name": "argmax"}]},
]

pipeline = Pipeline.from_search_space(search_space)
pipeline.fit(dataset)
predictions = pipeline.predict(["test utterance"])
assert len(predictions) == 1
Loading
Loading