Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions src/fes/datasets/synthetic_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Dict, Any
from utils import calculate_snr

import numpy as np
from numpy import random
Expand Down Expand Up @@ -145,13 +146,4 @@ def generate_grouped_data(n, m, noise_std, redundancy_rate, features_fill, num_g
y_true = X @ w
y = y_true + np.random.standard_normal((n)) * noise_std

return y, X, w, y_true, features_mask, groups_labels


"""
Support utils
"""


def calculate_snr(y_true, noise_std):
return (20 * np.log10(abs(np.where(noise_std == 0, 0, y_true / noise_std)))).mean()
return y, X, w, y_true, features_mask, groups_labels
11 changes: 11 additions & 0 deletions src/fes/datasets/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import numpy as np



"""
Support utils
"""


def calculate_snr(y_true, noise_std):
return (20 * np.log10(abs(np.where(noise_std == 0, 0, y_true / noise_std)))).mean()
14 changes: 14 additions & 0 deletions src/fes/pipelines/data_science/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@
from sklearn.metrics import mean_squared_error, r2_score


# Tool to check dimensionality
def assert_shapes(x, x_shape, y, y_shape):
assert_shape(x, x_shape)
assert_shape(y, y_shape)

shapes = defaultdict(set)
for arr, shape in [(x, x_shape), (y, y_shape)]:
for i, char in enumerate(shape):
if isinstance(char, str):
shapes[char].add(arr.shape[i])
for _, _set in shapes.items():
assert len(_set) == 1, (x, x_shape, y, y_shape)

def fit_model(y, X):
"""
Parameters
Expand Down Expand Up @@ -79,3 +92,4 @@ def evaluate_perm_importance(regressor, y, X, w, y_true, features_mask, paramete

print(f"Approximation with {pi_parameters['explanation_rate']} explanation rate:")
print(f"Number of proposed features: {len(features_hat_idx)}, {er_mse:.3f} MSE, {er_r2:.3f} R2", end='\n\n')

10 changes: 8 additions & 2 deletions src/fes/pipelines/data_science/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@ def perm_importance_pipeline(**kwargs):
[
node(
func=fit_model,
inputs=["y", "X"],
inputs=[
"y", "X"
],
outputs="regressor",
name="fit_model_node",
),
node(
func=evaluate_perm_importance,
inputs=["regressor", "y", "X", "w", "y_true", "features_mask", "parameters"],
inputs=[
"regressor",
"y", "X", "w", "y_true",
"features_mask", "parameters"
],
outputs=None,
name="evaluate_perm_importance_node",
),
Expand Down