Skip to content
19 changes: 3 additions & 16 deletions hypex/analyzers/aa.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@

from ..comparators import Chi2Test, KSTest, TTest
from ..dataset import Dataset, ExperimentData, StatisticRole
from ..executor import Executor
from ..experiments.base_complex import IfParamsExperiment, ParamsExperiment
from ..reporters.aa import OneAADictReporter
from ..splitters import AASplitter, AASplitterWithStratification
from ..utils import ID_SPLIT_SYMBOL, BackendsEnum, ExperimentDataEnum
from .abstract import Analyzer


class OneAAStatAnalyzer(Executor):
def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
return data.set_value(ExperimentDataEnum.analysis_tables, self.id, value)

class OneAAStatAnalyzer(Analyzer):
def execute(self, data: ExperimentData) -> ExperimentData:
analysis_tests: list[type] = [TTest, KSTest, Chi2Test]
executor_ids = data.get_ids(
Expand Down Expand Up @@ -68,7 +65,7 @@ def execute(self, data: ExperimentData) -> ExperimentData:
return self._set_value(data, analysis_dataset)


class AAScoreAnalyzer(Executor):
class AAScoreAnalyzer(Analyzer):
AA_SPLITER_CLASS_MAPPING: ClassVar[dict] = {
class_.__name__: class_ for class_ in [AASplitter, AASplitterWithStratification]
}
Expand All @@ -80,16 +77,6 @@ def __init__(self, alpha: float = 0.05, key: str = ""):
self.__feature_weights = {}
self.threshold = 1 - (self.alpha * 1.2)

def _set_value(
self, data: ExperimentData, value: Any, key: Any = None
) -> ExperimentData:
return data.set_value(
ExperimentDataEnum.analysis_tables,
executor_id=self.id,
key=self.key,
value=value,
)

def _analyze_aa_score(
self, data: ExperimentData, score_table: Dataset
) -> ExperimentData:
Expand Down
29 changes: 6 additions & 23 deletions hypex/analyzers/ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,15 @@
from typing import Any

from ..comparators import TTest, UTest
from ..dataset import (
Dataset,
ExperimentData,
StatisticRole,
TargetRole,
TreatmentRole,
)
from ..experiments.base import Executor
from ..dataset import (Dataset, ExperimentData, StatisticRole, TargetRole,
TreatmentRole)
from ..extensions.statsmodels import MultiTest, MultitestQuantile
from ..utils import (
ID_SPLIT_SYMBOL,
NAME_BORDER_SYMBOL,
ABNTestMethodsEnum,
BackendsEnum,
ExperimentDataEnum,
)
from ..utils import (ID_SPLIT_SYMBOL, NAME_BORDER_SYMBOL, ABNTestMethodsEnum,
BackendsEnum)
from .abstract import Analyzer


class ABAnalyzer(Executor):
class ABAnalyzer(Analyzer):
def __init__(
self,
multitest_method: ABNTestMethodsEnum | None = None,
Expand All @@ -41,13 +31,6 @@ def __init__(
self.random_state = random_state
super().__init__(key)

def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
return data.set_value(
ExperimentDataEnum.analysis_tables,
self.id + key if key else self.id,
value,
)

def execute_multitest(self, data: ExperimentData, p_values: Dataset, **kwargs):
group_field = data.ds.search_columns(TreatmentRole())[0]
target_fields = data.ds.search_columns(TargetRole(), search_types=[int, float])
Expand Down
16 changes: 16 additions & 0 deletions hypex/analyzers/abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from abc import ABC

from ..dataset import ExperimentData
from ..executor import Executor
from ..utils import ExperimentDataEnum


class Analyzer(Executor, ABC):
"""
Abstract class for analyzers.
"""

def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
return data.set_value(
ExperimentDataEnum.analysis_tables, self.id, value, key=key
)
9 changes: 2 additions & 7 deletions hypex/analyzers/matching.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
from ..dataset.dataset import DatasetAdapter, ExperimentData
from ..dataset.roles import StatisticRole
from ..executor.executor import Executor
from ..operators.operators import MatchingMetrics
from ..utils.enums import ExperimentDataEnum
from .abstract import Analyzer


class MatchingAnalyzer(Executor):
def _set_value(self, data: ExperimentData, value, key=None) -> ExperimentData:
return data.set_value(
ExperimentDataEnum.analysis_tables, self.id, value, key=key
)

class MatchingAnalyzer(Analyzer):
def execute(self, data: ExperimentData):
variables = data.variables[
data.get_one_id(MatchingMetrics, space=ExperimentDataEnum.variables)
Expand Down
56 changes: 19 additions & 37 deletions hypex/comparators/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,14 @@
from abc import ABC, abstractmethod
from typing import Any, Literal

from ..dataset import (
ABCRole,
Dataset,
DatasetAdapter,
ExperimentData,
GroupingRole,
InfoRole,
PreTargetRole,
StatisticRole,
TargetRole,
TempTargetRole,
)
from ..dataset import (ABCRole, Dataset, DatasetAdapter, ExperimentData,
GroupingRole, InfoRole, PreTargetRole, StatisticRole,
TargetRole, TempTargetRole)
from ..executor import Calculator
from ..utils import (
NAME_BORDER_SYMBOL,
BackendsEnum,
ExperimentDataEnum,
FromDictTypes,
GroupingDataType,
)
from ..utils.errors import (
AbstractMethodError,
NoColumnsError,
NoRequiredArgumentError,
NotSuitableFieldError,
)
from ..utils import (NAME_BORDER_SYMBOL, BackendsEnum, ExperimentDataEnum,
FromDictTypes, GroupingDataType)
from ..utils.errors import (AbstractMethodError, NoColumnsError,
NoRequiredArgumentError, NotSuitableFieldError)


class Comparator(Calculator, ABC):
Expand Down Expand Up @@ -58,13 +40,11 @@ def _local_extract_dataset(

@classmethod
@abstractmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> Any:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Any:
raise AbstractMethodError

def _get_fields_data(self, data: ExperimentData) -> dict[str, Dataset]:
tmp_role = True if data.ds.tmp_roles else False
tmp_role = bool(data.ds.tmp_roles)
group_field_data = data.field_data_search(roles=self.grouping_role)
target_fields_data = data.field_data_search(
roles=TempTargetRole() if tmp_role else self.target_roles,
Expand Down Expand Up @@ -96,7 +76,7 @@ def _execute_inner_function(
else f"{compared_data[i][0]}{NAME_BORDER_SYMBOL}{compared_data[i][1].columns[0]}"
)
result[res_name] = DatasetAdapter.to_dataset(
cls._inner_function(
cls.calc(
baseline_data[0 if len(baseline_data) == 1 else i][1],
compared_data[i][1],
**kwargs,
Expand Down Expand Up @@ -165,14 +145,12 @@ def _grouping_data_split(
def _split_ds_into_columns(
data: list[tuple[str, Dataset]],
) -> list[tuple[str, Dataset]]:
result = [
return [
(bucket[0], bucket[1][column])
for bucket in data
for column in bucket[1].columns
]

return result

@staticmethod
def _field_validity_check(
field_data: Dataset,
Expand Down Expand Up @@ -333,12 +311,13 @@ def _split_data_to_buckets(
)
else:
raise ValueError(
f"Wrong compare_by argument passed {compare_by}. It can be only one of the following modes: 'groups', 'columns', 'columns_in_groups', 'cross'."
f"Wrong compare_by argument passed {compare_by}. It can be only one of the following modes: 'groups', "
f"'columns', 'columns_in_groups', 'cross'."
)
return baseline_data, compared_data

@classmethod
def calc(
def _precalc(
cls,
compare_by: (
Literal["groups", "columns", "columns_in_groups", "cross"] | None
Expand Down Expand Up @@ -386,7 +365,10 @@ def execute(self, data: ExperimentData) -> ExperimentData:
)

if len(target_fields_data.columns) == 0:
if data.ds.tmp_roles: # if the column is not suitable for the test, then the target will be empty, but if there is a role tempo, then this is normal behavior
if (
data.ds.tmp_roles
): # if the column is not suitable for the test, then the target will be empty,
# but if there is a role tempo, then this is normal behavior
return data
else:
raise NoColumnsError(TargetRole().role_name)
Expand Down Expand Up @@ -425,7 +407,7 @@ def execute(self, data: ExperimentData) -> ExperimentData:
if len(grouping_data[0]) < 1 or len(grouping_data[1]) < 1:
raise NotSuitableFieldError(group_field_data, "Grouping")

compare_result = self.calc(
compare_result = self._precalc(
compare_by=self.compare_by,
target_fields_data=target_fields_data,
baseline_field_data=baseline_field_data,
Expand Down
13 changes: 3 additions & 10 deletions hypex/comparators/comparators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,7 @@ def search_types(self) -> list[type] | None:
return NUMBER_TYPES_LIST

@classmethod
def _inner_function(
cls,
data: Dataset,
test_data: Dataset | None = None,
**kwargs,
) -> dict:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> dict:
test_data = cls._check_test_data(test_data)
control_mean = data.mean()
test_mean = test_data.mean()
Expand Down Expand Up @@ -66,9 +61,7 @@ def __init__(
)

@classmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> dict:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> dict:
size_a = len(data)
size_b = len(test_data) if isinstance(test_data, Dataset) else 0

Expand All @@ -82,7 +75,7 @@ def _inner_function(

class PSI(Comparator):
@classmethod
def _inner_function(
def calc(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> dict[str, float]:
test_data = cls._check_test_data(test_data=test_data)
Expand Down
13 changes: 4 additions & 9 deletions hypex/comparators/distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,8 @@
from copy import deepcopy
from typing import Any, Sequence

from ..dataset import (
ABCRole,
Dataset,
ExperimentData,
FeatureRole,
GroupingRole,
TargetRole,
)
from ..dataset import (ABCRole, Dataset, ExperimentData, FeatureRole,
GroupingRole, TargetRole)
from ..executor import Calculator
from ..extensions.scipy_linalg import CholeskyExtension, InverseExtension
from ..utils import ExperimentDataEnum, NotSuitableFieldError
Expand Down Expand Up @@ -113,7 +107,8 @@ def execute(self, data: ExperimentData) -> ExperimentData:
)
if (
not target_fields and data.ds.tmp_roles
): # if the column is not suitable for the test, then the target will be empty, but if there is a role tempo, then this is normal behavior
): # if the column is not suitable for the test, then the target will be empty, but if there is a role
# tempo, then this is normal behavior
return data
if group_field[0] in data.groups: # TODO: to recheck if this is a correct check
grouping_data = list(data.groups[group_field[0]].items())
Expand Down
24 changes: 6 additions & 18 deletions hypex/comparators/hypothesis_testing.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
from __future__ import annotations

from ..dataset import Dataset
from ..extensions.scipy_stats import (
Chi2TestExtension,
KSTestExtension,
TTestExtension,
UTestExtension,
)
from ..extensions.scipy_stats import (Chi2TestExtension, KSTestExtension,
TTestExtension, UTestExtension)
from ..utils.constants import NUMBER_TYPES_LIST
from .abstract import StatHypothesisTesting

Expand All @@ -17,9 +13,7 @@ def search_types(self) -> list[type] | None:
return NUMBER_TYPES_LIST

@classmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> Dataset:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
return TTestExtension(kwargs.get("reliability", 0.05)).calc(
data, other=test_data, **kwargs
)
Expand All @@ -31,9 +25,7 @@ def search_types(self) -> list[type] | None:
return NUMBER_TYPES_LIST

@classmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> Dataset:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
return KSTestExtension(kwargs.get("reliability", 0.05)).calc(
data, other=test_data, **kwargs
)
Expand All @@ -45,9 +37,7 @@ def search_types(self) -> list[type] | None:
return NUMBER_TYPES_LIST

@classmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> Dataset:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
return UTestExtension(kwargs.get("reliability", 0.05)).calc(
data, other=test_data, **kwargs
)
Expand All @@ -59,9 +49,7 @@ def search_types(self) -> list[type] | None:
return [str]

@classmethod
def _inner_function(
cls, data: Dataset, test_data: Dataset | None = None, **kwargs
) -> Dataset:
def calc(cls, data: Dataset, test_data: Dataset | None = None, **kwargs) -> Dataset:
return Chi2TestExtension(reliability=kwargs.get("reliability", 0.05)).calc(
data, other=test_data, **kwargs
)
Loading