Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/user_guide/modeling_gcm/model_evaluation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ the chain structure example X→Y→Z again:

If non-root node and the data is categorical:
A functional causal model based on a classifier, i.e., X_i = f(PA_i, N_i).
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model.Here, different model classes are evaluated using the (negative) F1 score and the best performing model class is selected.
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model. Here, different model classes are evaluated using the log loss metric and the best performing model class is selected.

In total, 3 nodes were analyzed:

Expand Down
46 changes: 24 additions & 22 deletions dowhy/gcm/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

from dowhy.gcm import config
from dowhy.gcm.causal_mechanisms import AdditiveNoiseModel, ClassifierFCM, DiscreteAdditiveNoiseModel
Expand All @@ -30,6 +29,7 @@
)
from dowhy.gcm.ml.classification import (
create_ada_boost_classifier,
create_decision_tree_classifier,
create_extra_trees_classifier,
create_gaussian_nb_classifier,
create_knn_classifier,
Expand All @@ -55,8 +55,9 @@
from dowhy.graph import get_ordered_predecessors, is_root_node

_LIST_OF_POTENTIAL_CLASSIFIERS_GOOD = [
partial(create_logistic_regression_classifier, max_iter=10000),
create_hist_gradient_boost_classifier,
partial(create_logistic_regression_classifier, max_iter=10000),
create_decision_tree_classifier,
]
_LIST_OF_POTENTIAL_REGRESSORS_GOOD = [
create_linear_regressor,
Expand Down Expand Up @@ -152,9 +153,8 @@ def __str__(self):
summary_strings.append(
"A functional causal model based on a classifier, i.e., X_i = f(PA_i, N_i).\n"
"Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a "
"class (category) using the conditional probability distribution produced by a "
"classification model."
"Here, different model classes are evaluated using the (negative) F1 score and the best"
"class (category) using the conditional probability distribution produced by a classification model. "
"Here, different model classes are evaluated using the log loss metric and the best"
" performing model class is selected."
)
summary_strings.append("\nIn total, %d nodes were analyzed:" % len(list(self._nodes)))
Expand Down Expand Up @@ -223,7 +223,7 @@ def assign_causal_mechanisms(
A functional causal model based on a classifier, i.e., X_i = f(PA_i, N_i).
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the
conditional probability distribution produced by a classification model. Here, different model classes are evaluated
using the (negative) F1 score and the best performing model class is selected.
using the log loss metric and the best performing model class is selected.

The current model zoo is:

Expand Down Expand Up @@ -528,20 +528,13 @@ def find_best_model(
metric_name = "given"

if metric is None:
metric_name = "(negative) F1"
if is_classification_problem:
metric = lambda y_true, y_preds: -metrics.f1_score(
y_true, y_preds, average="macro", zero_division=0
) # Higher score is better
metric_name = "log loss"
metric = metrics.log_loss # Lower score is better (better calibrated probabilities)
else:
metric_name = "mean squared error (MSE)"
metric = metrics.mean_squared_error

labelBinarizer = None
if is_classification_problem:
labelBinarizer = MultiLabelBinarizer()
labelBinarizer.fit(Y)

if is_classification_problem:
if len(np.unique(Y)) == 1:
raise ValueError(
Expand All @@ -559,20 +552,29 @@ def estimate_average_score(prediction_model_factory: Callable[[], PredictionMode

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
for train_indices, test_indices in kfolds:
if is_classification_problem and len(np.unique(Y[train_indices[:max_samples_per_split]])) == 1:
continue
if is_classification_problem:
unique_training_labels = np.unique(Y[train_indices[:max_samples_per_split]])
unique_test_labels = np.unique(Y[test_indices[:max_samples_per_split]])
if len(unique_training_labels) == 1 or len(unique_test_labels) == 1:
continue

model_instance = prediction_model_factory()
model_instance.fit(X[train_indices[:max_samples_per_split]], Y[train_indices[:max_samples_per_split]])

y_true = Y[test_indices[:max_samples_per_split]]
y_pred = model_instance.predict(X[test_indices[:max_samples_per_split]])
if labelBinarizer is not None:
y_true = labelBinarizer.transform(y_true)
y_pred = labelBinarizer.transform(y_pred)

average_result.append(metric(y_true, y_pred))
if is_classification_problem:
# For classification, use probabilities for log loss calculation
y_pred_proba = model_instance.predict_probabilities(X[test_indices[:max_samples_per_split]])
# Convert string labels to label indices for log_loss
label_to_idx = {label: idx for idx, label in enumerate(unique_test_labels)}
y_true_indices = np.array([label_to_idx[label] for label in y_true.flatten()])
average_result.append(metric(y_true_indices, y_pred_proba))
else:
y_pred = model_instance.predict(X[test_indices[:max_samples_per_split]])
average_result.append(metric(y_true, y_pred))

if len(average_result) == 0:
return float("inf")
Expand Down
5 changes: 5 additions & 0 deletions dowhy/gcm/ml/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from packaging import version
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeClassifier

from dowhy.gcm.ml.prediction_model import PredictionModel

Expand Down Expand Up @@ -107,3 +108,7 @@ def create_polynom_logistic_regression_classifier(
PolynomialFeatures(degree=degree, include_bias=False), LogisticRegression(**kwargs_logistic_regression)
)
)


def create_decision_tree_classifier() -> SklearnClassificationModel:
return SklearnClassificationModel(DecisionTreeClassifier())
104 changes: 97 additions & 7 deletions tests/gcm/test_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from sklearn.linear_model import ElasticNetCV, LassoCV, LinearRegression, LogisticRegression, RidgeCV
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier

from dowhy import gcm
from dowhy.gcm import (
Expand Down Expand Up @@ -40,19 +41,84 @@ def _generate_non_linear_regression_data():


def _generate_linear_classification_data():
X = np.random.normal(0, 1, (1000, 5))
X = np.random.normal(0, 1, (100, 5))
Y = (np.sum(X * np.random.uniform(-5, 5, X.shape[1]), axis=1) > 0).astype(str)

return X, Y


def _generate_non_classification_data():
def _generate_non_linear_classification_data():
X = np.random.normal(0, 1, (1000, 5))
Y = (np.sum(np.exp(X), axis=1) > np.median(np.sum(np.exp(X), axis=1))).astype(str)

return X, Y


def _generate_linear_multiclass_classification_data_with_mixed_features():
"""Generate multi-class classification data with mixed categorical and numerical features (linear relationship)."""
n_samples = 100

# Numerical features
num_feat1 = np.random.normal(0, 1, n_samples)
num_feat2 = np.random.normal(0, 1, n_samples)

# Categorical features
cat_feat1 = np.random.choice(["TypeA", "TypeB"], n_samples)
cat_feat2 = np.random.choice(["Group1", "Group2", "Group3"], n_samples)

# Create target variable based on linear combination of features
# Convert categorical to numerical for decision making
cat1_numeric = np.where(cat_feat1 == "TypeA", 1, -1)
cat2_numeric = np.where(cat_feat2 == "Group1", 2, np.where(cat_feat2 == "Group2", 0, -2))

# Linear combination to determine class
decision_value = 2 * num_feat1 + 1.5 * num_feat2 + 0.8 * cat1_numeric + 0.5 * cat2_numeric

# Convert to 3 classes
Y = np.where(decision_value > 1, "Class_A", np.where(decision_value > -1, "Class_B", "Class_C"))

# Combine features
X = np.column_stack([num_feat1, num_feat2, cat_feat1, cat_feat2])

return X, Y


def _generate_non_linear_multiclass_classification_data_with_mixed_features():
"""Generate multi-class classification data with mixed categorical and numerical features (non-linear relationship)."""
n_samples = 1000

# Numerical features
num_feat1 = np.random.normal(0, 1, n_samples)
num_feat2 = np.random.normal(0, 1, n_samples)

# Categorical features
cat_feat1 = np.random.choice(["TypeA", "TypeB"], n_samples)
cat_feat2 = np.random.choice(["Group1", "Group2", "Group3"], n_samples)

# Create target variable based on non-linear combination of features
# Convert categorical to numerical for decision making
cat1_numeric = np.where(cat_feat1 == "TypeA", 1, -1)
cat2_numeric = np.where(cat_feat2 == "Group1", 2, np.where(cat_feat2 == "Group2", 0, -2))

# Non-linear combination: use exponentials and products
decision_value = (
np.exp(num_feat1 * 0.5)
+ np.sin(num_feat2 * 2)
+ num_feat1 * num_feat2 * 0.3
+ cat1_numeric * np.exp(num_feat2 * 0.2)
+ cat2_numeric * np.cos(num_feat1)
)

# Convert to 3 classes based on percentiles
p33, p67 = np.percentile(decision_value, [33, 67])
Y = np.where(decision_value > p67, "Class_A", np.where(decision_value > p33, "Class_B", "Class_C"))

# Combine features
X = np.column_stack([num_feat1, num_feat2, cat_feat1, cat_feat2])

return X, Y


@flaky(max_runs=3)
def test_given_linear_regression_problem_when_auto_assign_causal_models_with_good_quality_returns_linear_model():
X, Y = _generate_linear_regression_data()
Expand Down Expand Up @@ -148,7 +214,7 @@ def test_given_linear_classification_problem_when_auto_assign_causal_models_with

@flaky(max_runs=3)
def test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model():
X, Y = _generate_non_classification_data()
X, Y = _generate_non_linear_classification_data()

causal_model = ProbabilisticCausalModel(
nx.DiGraph([("X0", "Y"), ("X1", "Y"), ("X2", "Y"), ("X3", "Y"), ("X4", "Y")])
Expand All @@ -164,7 +230,7 @@ def test_given_non_linear_classification_problem_when_auto_assign_causal_models_

@flaky(max_runs=3)
def test_given_non_linear_classification_problem_when_auto_assign_causal_models_with_better_quality_returns_non_linear_model():
X, Y = _generate_non_classification_data()
X, Y = _generate_non_linear_classification_data()

causal_model = ProbabilisticCausalModel(
nx.DiGraph([("X0", "Y"), ("X1", "Y"), ("X2", "Y"), ("X3", "Y"), ("X4", "Y")])
Expand Down Expand Up @@ -384,7 +450,7 @@ def test_given_continuous_data_when_print_auto_summary_then_returns_expected_for

If non-root node and the data is categorical:
A functional causal model based on a classifier, i.e., X_i = f(PA_i, N_i).
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model.Here, different model classes are evaluated using the (negative) F1 score and the best performing model class is selected.
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model. Here, different model classes are evaluated using the log loss metric and the best performing model class is selected.

In total, 6 nodes were analyzed:

Expand Down Expand Up @@ -459,7 +525,7 @@ def test_given_categorical_data_when_print_auto_summary_then_returns_expected_fo

If non-root node and the data is categorical:
A functional causal model based on a classifier, i.e., X_i = f(PA_i, N_i).
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model.Here, different model classes are evaluated using the (negative) F1 score and the best performing model class is selected.
Here, N_i follows a uniform distribution on [0, 1] and is used to randomly sample a class (category) using the conditional probability distribution produced by a classification model. Here, different model classes are evaluated using the log loss metric and the best performing model class is selected.

In total, 6 nodes were analyzed:

Expand All @@ -483,7 +549,7 @@ def test_given_categorical_data_when_print_auto_summary_then_returns_expected_fo
in summary_string
)
assert "This represents the causal relationship as Y := f(X0,X1,X2,X3,X4,N)." in summary_string
assert "For the model selection, the following models were evaluated on the (negative) F1 metric:" in summary_string
assert "For the model selection, the following models were evaluated on the log loss metric:" in summary_string
assert (
"""===Note===
Note, based on the selected auto assignment quality, the set of evaluated models changes.
Expand Down Expand Up @@ -602,3 +668,27 @@ def test_given_missing_data_mixed_numerical_and_categorical_when_auto_assign_mec

# Just check if it doesn't raise errors.
gcm.intrinsic_causal_influence(causal_model, "Z")


@flaky(max_runs=3)
def test_given_linear_multiclass_mixed_features_when_auto_assign_causal_models_with_good_quality_returns_linear_model():
X, Y = _generate_linear_multiclass_classification_data_with_mixed_features()

causal_model = ProbabilisticCausalModel(nx.DiGraph([("X0", "Y"), ("X1", "Y"), ("X2", "Y"), ("X3", "Y")]))
data = {"X" + str(i): X[:, i] for i in range(X.shape[1])}
data.update({"Y": Y})
assign_causal_mechanisms(causal_model, pd.DataFrame(data), quality=AssignmentQuality.GOOD)
assert isinstance(causal_model.causal_mechanism("Y").classifier_model.sklearn_model, LogisticRegression)


@flaky(max_runs=3)
def test_given_non_linear_multiclass_mixed_features_when_auto_assign_causal_models_with_good_quality_returns_non_linear_model():
X, Y = _generate_non_linear_multiclass_classification_data_with_mixed_features()

causal_model = ProbabilisticCausalModel(nx.DiGraph([("X0", "Y"), ("X1", "Y"), ("X2", "Y"), ("X3", "Y")]))
data = {"X" + str(i): X[:, i] for i in range(X.shape[1])}
data.update({"Y": Y})
assign_causal_mechanisms(causal_model, pd.DataFrame(data), quality=AssignmentQuality.GOOD)
assert isinstance(
causal_model.causal_mechanism("Y").classifier_model.sklearn_model, DecisionTreeClassifier
) or isinstance(causal_model.causal_mechanism("Y").classifier_model.sklearn_model, HistGradientBoostingClassifier)