sb-ai-lab · Dmatryus · Apr 18, 2025 · Apr 21, 2025 · Apr 21, 2025 · Apr 21, 2025
diff --git a/hypex/analyzers/aa.py b/hypex/analyzers/aa.py
@@ -6,17 +6,14 @@
 
 from ..comparators import Chi2Test, KSTest, TTest
 from ..dataset import Dataset, ExperimentData, StatisticRole
-from ..executor import Executor
 from ..experiments.base_complex import IfParamsExperiment, ParamsExperiment
 from ..reporters.aa import OneAADictReporter
 from ..splitters import AASplitter, AASplitterWithStratification
 from ..utils import ID_SPLIT_SYMBOL, BackendsEnum, ExperimentDataEnum
+from .abstract import Analyzer
 
 
-class OneAAStatAnalyzer(Executor):
-    def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
-        return data.set_value(ExperimentDataEnum.analysis_tables, self.id, value)
-
+class OneAAStatAnalyzer(Analyzer):
     def execute(self, data: ExperimentData) -> ExperimentData:
         analysis_tests: list[type] = [TTest, KSTest, Chi2Test]
         executor_ids = data.get_ids(
@@ -68,7 +65,7 @@ def execute(self, data: ExperimentData) -> ExperimentData:
         return self._set_value(data, analysis_dataset)
 
 
-class AAScoreAnalyzer(Executor):
+class AAScoreAnalyzer(Analyzer):
     AA_SPLITER_CLASS_MAPPING: ClassVar[dict] = {
         class_.__name__: class_ for class_ in [AASplitter, AASplitterWithStratification]
     }
@@ -80,16 +77,6 @@ def __init__(self, alpha: float = 0.05, key: str = ""):
         self.__feature_weights = {}
         self.threshold = 1 - (self.alpha * 1.2)
 
-    def _set_value(
-        self, data: ExperimentData, value: Any, key: Any = None
-    ) -> ExperimentData:
-        return data.set_value(
-            ExperimentDataEnum.analysis_tables,
-            executor_id=self.id,
-            key=self.key,
-            value=value,
-        )
-
     def _analyze_aa_score(
         self, data: ExperimentData, score_table: Dataset
     ) -> ExperimentData:

diff --git a/hypex/analyzers/ab.py b/hypex/analyzers/ab.py
@@ -4,25 +4,15 @@
 from typing import Any
 
 from ..comparators import TTest, UTest
-from ..dataset import (
-    Dataset,
-    ExperimentData,
-    StatisticRole,
-    TargetRole,
-    TreatmentRole,
-)
-from ..experiments.base import Executor
+from ..dataset import (Dataset, ExperimentData, StatisticRole, TargetRole,
+                       TreatmentRole)
 from ..extensions.statsmodels import MultiTest, MultitestQuantile
-from ..utils import (
-    ID_SPLIT_SYMBOL,
-    NAME_BORDER_SYMBOL,
-    ABNTestMethodsEnum,
-    BackendsEnum,
-    ExperimentDataEnum,
-)
+from ..utils import (ID_SPLIT_SYMBOL, NAME_BORDER_SYMBOL, ABNTestMethodsEnum,
+                     BackendsEnum)
+from .abstract import Analyzer
 
 
-class ABAnalyzer(Executor):
+class ABAnalyzer(Analyzer):
     def __init__(
         self,
         multitest_method: ABNTestMethodsEnum | None = None,
@@ -41,13 +31,6 @@ def __init__(
         self.random_state = random_state
         super().__init__(key)
 
-    def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
-        return data.set_value(
-            ExperimentDataEnum.analysis_tables,
-            self.id + key if key else self.id,
-            value,
-        )
-
     def execute_multitest(self, data: ExperimentData, p_values: Dataset, **kwargs):
         group_field = data.ds.search_columns(TreatmentRole())[0]
         target_fields = data.ds.search_columns(TargetRole(), search_types=[int, float])

diff --git a/hypex/analyzers/abstract.py b/hypex/analyzers/abstract.py
@@ -0,0 +1,16 @@
+from abc import ABC
+
+from ..dataset import ExperimentData
+from ..executor import Executor
+from ..utils import ExperimentDataEnum
+
+
+class Analyzer(Executor, ABC):
+    """
+    Abstract class for analyzers.
+    """
+
+    def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
+        return data.set_value(
+            ExperimentDataEnum.analysis_tables, self.id, value, key=key
+        )
diff --git a/hypex/analyzers/matching.py b/hypex/analyzers/matching.py
@@ -1,16 +1,11 @@
 from ..dataset.dataset import DatasetAdapter, ExperimentData
 from ..dataset.roles import StatisticRole
-from ..executor.executor import Executor
 from ..operators.operators import MatchingMetrics
 from ..utils.enums import ExperimentDataEnum
+from .abstract import Analyzer
 
 
-class MatchingAnalyzer(Executor):
-    def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
-        return data.set_value(
-            ExperimentDataEnum.analysis_tables, self.id, value, key=key
-        )
-
+class MatchingAnalyzer(Analyzer):
     def execute(self, data: ExperimentData):
         variables = data.variables[
             data.get_one_id(MatchingMetrics, space=ExperimentDataEnum.variables)

diff --git a/hypex/comparators/abstract.py b/hypex/comparators/abstract.py
@@ -4,32 +4,14 @@
 from abc import ABC, abstractmethod
 from typing import Any, Literal
 
-from ..dataset import (
-    ABCRole,
-    Dataset,
-    DatasetAdapter,
-    ExperimentData,
-    GroupingRole,
-    InfoRole,
-    PreTargetRole,
-    StatisticRole,
-    TargetRole,
-    TempTargetRole,
-)
+from ..dataset import (ABCRole, Dataset, DatasetAdapter, ExperimentData,
+                       GroupingRole, InfoRole, PreTargetRole, StatisticRole,
+                       TargetRole, TempTargetRole)
 from ..executor import Calculator
-from ..utils import (
-    NAME_BORDER_SYMBOL,
-    BackendsEnum,
-    ExperimentDataEnum,
-    FromDictTypes,
-    GroupingDataType,
-)
-from ..utils.errors import (
-    AbstractMethodError,
-    NoColumnsError,
-    NoRequiredArgumentError,
-    NotSuitableFieldError,
-)
+from ..utils import (NAME_BORDER_SYMBOL, BackendsEnum, ExperimentDataEnum,
+                     FromDictTypes, GroupingDataType)
+from ..utils.errors import (AbstractMethodError, NoColumnsError,
+                            NoRequiredArgumentError, NotSuitableFieldError)
 
 
 class Comparator(Calculator, ABC):
@@ -58,13 +40,11 @@ def _local_extract_dataset(
 
     @classmethod
     @abstractmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> Any:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Any:
         raise AbstractMethodError
 
     def _get_fields_data(self, data: ExperimentData) -> dict[str, Dataset]:
-        tmp_role = True if data.ds.tmp_roles else False
+        tmp_role = bool(data.ds.tmp_roles)
         group_field_data = data.field_data_search(roles=self.grouping_role)
         target_fields_data = data.field_data_search(
             roles=TempTargetRole() if tmp_role else self.target_roles,
@@ -96,7 +76,7 @@ def _execute_inner_function(
                 else f"{compared_data[i][0]}{NAME_BORDER_SYMBOL}{compared_data[i][1].columns[0]}"
             )
             result[res_name] = DatasetAdapter.to_dataset(
-                cls._inner_function(
+                cls.calc(
                     baseline_data[0 if len(baseline_data) == 1 else i][1],
                     compared_data[i][1],
                     **kwargs,
@@ -165,14 +145,12 @@ def _grouping_data_split(
     def _split_ds_into_columns(
         data: list[tuple[str, Dataset]],
     ) -> list[tuple[str, Dataset]]:
-        result = [
+        return [
             (bucket[0], bucket[1][column])
             for bucket in data
             for column in bucket[1].columns
         ]
 
-        return result
-
     @staticmethod
     def _field_validity_check(
         field_data: Dataset,
@@ -333,12 +311,13 @@ def _split_data_to_buckets(
             )
         else:
             raise ValueError(
-                f"Wrong compare_by argument passed {compare_by}. It can be only one of the following modes: 'groups', 'columns', 'columns_in_groups', 'cross'."
+                f"Wrong compare_by argument passed {compare_by}. It can be only one of the following modes: 'groups', "
+                f"'columns', 'columns_in_groups', 'cross'."
             )
         return baseline_data, compared_data
 
     @classmethod
-    def calc(
+    def _precalc(
         cls,
         compare_by: (
             Literal["groups", "columns", "columns_in_groups", "cross"] | None
@@ -386,7 +365,10 @@ def execute(self, data: ExperimentData) -> ExperimentData:
         )
 
         if len(target_fields_data.columns) == 0:
-            if data.ds.tmp_roles:  # if the column is not suitable for the test, then the target will be empty, but if there is a role tempo, then this is normal behavior
+            if (
+                data.ds.tmp_roles
+            ):  # if the column is not suitable for the test, then the target will be empty,
+                # but if there is a role tempo, then this is normal behavior
                 return data
             else:
                 raise NoColumnsError(TargetRole().role_name)
@@ -425,7 +407,7 @@ def execute(self, data: ExperimentData) -> ExperimentData:
         if len(grouping_data[0]) < 1 or len(grouping_data[1]) < 1:
             raise NotSuitableFieldError(group_field_data, "Grouping")
 
-        compare_result = self.calc(
+        compare_result = self._precalc(
             compare_by=self.compare_by,
             target_fields_data=target_fields_data,
             baseline_field_data=baseline_field_data,

diff --git a/hypex/comparators/comparators.py b/hypex/comparators/comparators.py
@@ -31,12 +31,7 @@ def search_types(self) -> list[type] | None:
         return NUMBER_TYPES_LIST
 
     @classmethod
-    def _inner_function(
-        cls,
-        data: Dataset,
-        test_data: Dataset | None = None,
-        **kwargs,
-    ) -> dict:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> dict:
         test_data = cls._check_test_data(test_data)
         control_mean = data.mean()
         test_mean = test_data.mean()
@@ -66,9 +61,7 @@ def __init__(
         )
 
     @classmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> dict:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> dict:
         size_a = len(data)
         size_b = len(test_data) if isinstance(test_data, Dataset) else 0
 
@@ -82,7 +75,7 @@ def _inner_function(
 
 class PSI(Comparator):
     @classmethod
-    def _inner_function(
+    def calc(
         cls, data: Dataset, test_data: Dataset | None = None, **kwargs
     ) -> dict[str, float]:
         test_data = cls._check_test_data(test_data=test_data)

diff --git a/hypex/comparators/distances.py b/hypex/comparators/distances.py
@@ -3,14 +3,8 @@
 from copy import deepcopy
 from typing import Any, Sequence
 
-from ..dataset import (
-    ABCRole,
-    Dataset,
-    ExperimentData,
-    FeatureRole,
-    GroupingRole,
-    TargetRole,
-)
+from ..dataset import (ABCRole, Dataset, ExperimentData, FeatureRole,
+                       GroupingRole, TargetRole)
 from ..executor import Calculator
 from ..extensions.scipy_linalg import CholeskyExtension, InverseExtension
 from ..utils import ExperimentDataEnum, NotSuitableFieldError
@@ -113,7 +107,8 @@ def execute(self, data: ExperimentData) -> ExperimentData:
         )
         if (
             not target_fields and data.ds.tmp_roles
-        ):  # if the column is not suitable for the test, then the target will be empty, but if there is a role tempo, then this is normal behavior
+        ):  # if the column is not suitable for the test, then the target will be empty, but if there is a role
+            # tempo, then this is normal behavior
             return data
         if group_field[0] in data.groups:  # TODO: to recheck if this is a correct check
             grouping_data = list(data.groups[group_field[0]].items())

diff --git a/hypex/comparators/hypothesis_testing.py b/hypex/comparators/hypothesis_testing.py
@@ -1,12 +1,8 @@
 from __future__ import annotations
 
 from ..dataset import Dataset
-from ..extensions.scipy_stats import (
-    Chi2TestExtension,
-    KSTestExtension,
-    TTestExtension,
-    UTestExtension,
-)
+from ..extensions.scipy_stats import (Chi2TestExtension, KSTestExtension,
+                                      TTestExtension, UTestExtension)
 from ..utils.constants import NUMBER_TYPES_LIST
 from .abstract import StatHypothesisTesting
 
@@ -17,9 +13,7 @@ def search_types(self) -> list[type] | None:
         return NUMBER_TYPES_LIST
 
     @classmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> Dataset:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
         return TTestExtension(kwargs.get("reliability", 0.05)).calc(
             data, other=test_data, **kwargs
         )
@@ -31,9 +25,7 @@ def search_types(self) -> list[type] | None:
         return NUMBER_TYPES_LIST
 
     @classmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> Dataset:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
         return KSTestExtension(kwargs.get("reliability", 0.05)).calc(
             data, other=test_data, **kwargs
         )
@@ -45,9 +37,7 @@ def search_types(self) -> list[type] | None:
         return NUMBER_TYPES_LIST
 
     @classmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> Dataset:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
         return UTestExtension(kwargs.get("reliability", 0.05)).calc(
             data, other=test_data, **kwargs
         )
@@ -59,9 +49,7 @@ def search_types(self) -> list[type] | None:
         return [str]
 
     @classmethod
-    def _inner_function(
-        cls, data: Dataset, test_data: Dataset | None = None, **kwargs
-    ) -> Dataset:
+    def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
         return Chi2TestExtension(reliability=kwargs.get("reliability", 0.05)).calc(
             data, other=test_data, **kwargs
         )