diff --git a/methods/catalog/probe/__init__.py b/methods/catalog/probe/__init__.py
new file mode 100644
index 0000000..df86ed3
--- /dev/null
+++ b/methods/catalog/probe/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa
+
+from .model import Probe
\ No newline at end of file
diff --git a/methods/catalog/probe/library/__init__.py b/methods/catalog/probe/library/__init__.py
new file mode 100644
index 0000000..14777c1
--- /dev/null
+++ b/methods/catalog/probe/library/__init__.py
@@ -0,0 +1,3 @@
+# flake8: noqa
+
+from .wachter_rip import probe_recourse
diff --git a/methods/catalog/probe/library/wachter_rip.py b/methods/catalog/probe/library/wachter_rip.py
new file mode 100644
index 0000000..c15bab0
--- /dev/null
+++ b/methods/catalog/probe/library/wachter_rip.py
@@ -0,0 +1,261 @@
+import datetime
+from typing import List, Optional
+
+import numpy as np
+import math
+import torch
+import torch.optim as optim
+import torch.distributions.normal as normal_distribution
+from torch.distributions.multivariate_normal import MultivariateNormal
+from torch import nn
+from torch.autograd import Variable
+
+from methods.processing import reconstruct_encoding_constraints
+
+DECISION_THRESHOLD = 0.5
+
+# Mean and variance for rectified normal distribution:
+# see in here : http://journal-sfds.fr/article/view/669
+
+
+def compute_jacobian(inputs, output, num_classes=1):
+    """
+    :param inputs: Batch X Size (e.g. Depth X Width X Height)
+    :param output: Batch X Classes
+    :return: jacobian: Batch X Classes X Size
+    """
+    assert inputs.requires_grad
+    grad = gradient(output, inputs)
+    return grad
+
+
+def gradient(y, x, grad_outputs=None):
+    """Compute dy/dx @ grad_outputs"""
+    if grad_outputs is None:
+        grad_outputs = torch.tensor(1)
+    grad = torch.autograd.grad(y, [x], grad_outputs=grad_outputs, create_graph=True)[0]
+    return grad
+
+
+def compute_invalidation_rate_closed(torch_model, x, sigma2):
+    # Compute input into CDF
+    prob = torch_model(x)
+    logit_x = torch.log(prob[0][1] / prob[0][0])
+    Sigma2 = sigma2 * torch.eye(x.shape[0])
+    jacobian_x = compute_jacobian(x, logit_x, num_classes=1).reshape(-1)
+    denom = torch.sqrt(sigma2) * torch.norm(jacobian_x, 2)
+    arg = logit_x / denom
+    
+    # Evaluate Gaussian cdf
+    normal = normal_distribution.Normal(loc=0.0, scale=1.0)
+    normal_cdf = normal.cdf(arg)
+    
+    # Get invalidation rate
+    ir = 1 - normal_cdf
+    
+    return ir
+
+
+def perturb_sample(x, n_samples, sigma2):
+    # stack copies of this sample, i.e. n rows of x.
+    X = x.repeat(n_samples, 1)
+    # sample normal distributed values
+    Sigma = torch.eye(x.shape[1]) * sigma2
+    eps = MultivariateNormal(
+        loc=torch.zeros(x.shape[1]), covariance_matrix=Sigma
+    ).sample((n_samples,))
+    
+    return X + eps
+
+def reparametrization_trick(mu, sigma2, n_samples):
+    
+    #var = torch.eye(mu.shape[1]) * sigma2
+    std = torch.sqrt(sigma2)
+    epsilon = MultivariateNormal(loc=torch.zeros(mu.shape[1]), covariance_matrix=torch.eye(mu.shape[1]))
+    epsilon = epsilon.sample((n_samples,))  # standard Gaussian random noise
+    ones = torch.ones_like(epsilon)
+    random_samples = mu.reshape(-1) * ones + std * epsilon
+    
+    return random_samples
+
+
+def compute_invalidation_rate(torch_model, random_samples):
+    yhat = torch_model(random_samples)[:, 1]
+    hat = (yhat > 0.5).float()
+    ir = 1 - torch.mean(hat, 0)
+    return ir
+
+
+def probe_recourse(
+    torch_model,
+    x: np.ndarray,
+    cat_feature_indices: List[int],
+    binary_cat_features: bool = True,
+    feature_costs: Optional[List[float]] = None,
+    lr: float = 0.07,
+    lambda_param: float = 5,
+    y_target: List[int] = [0.45, 0.55],
+    n_iter: int = 500,
+    t_max_min: float = 1.0,
+    norm: int = 1,
+    clamp: bool = False,
+    loss_type: str = "MSE",
+    invalidation_target: float = 0.45,
+    inval_target_eps: float = 0.005,
+    noise_variance: float = 0.01
+) -> np.ndarray:
+    """
+    Generates counterfactual example according to Wachter et.al for input instance x
+
+    Parameters
+    ----------
+    torch_model: black-box-model to discover
+    x: factual to explain
+    cat_feature_indices: list of positions of categorical features in x
+    binary_cat_features: If true, the encoding of x is done by drop_if_binary
+    feature_costs: List with costs per feature
+    lr: learning rate for gradient descent
+    lambda_param: weight factor for feature_cost
+    y_target: List of one-hot-encoded target class
+    n_iter: maximum number of iteration
+    t_max_min: maximum time of search
+    norm: L-norm to calculate cost
+    clamp: If true, feature values will be clamped to (0, 1)
+    loss_type: String for loss function (MSE or BCE)
+
+    Returns
+    -------
+    Counterfactual example as np.ndarray
+    """
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # returns counterfactual instance
+    torch.manual_seed(0)
+    noise_variance = torch.tensor(noise_variance)
+
+    if feature_costs is not None:
+        feature_costs = torch.from_numpy(feature_costs).float().to(device)
+
+    #print("x:", x)
+
+    x = torch.from_numpy(x).float().to(device)
+    y_target = torch.tensor(y_target).float().to(device)
+    lamb = torch.tensor(lambda_param).float().to(device)
+    # x_new is used for gradient search in optimizing process
+    x_new = Variable(x.clone(), requires_grad=True)
+    # x_new_enc is a copy of x_new with reconstructed encoding constraints of x_new
+    # such that categorical data is either 0 or 1
+    x_new_enc = reconstruct_encoding_constraints(
+        x_new, cat_feature_indices, binary_cat_features
+    )
+
+    optimizer = optim.Adam([x_new], lr, amsgrad=True)
+    softmax = nn.Softmax()
+
+    if loss_type == "MSE":
+        loss_fn = torch.nn.MSELoss()
+        f_x_new = softmax(torch_model(x_new))[1]
+    else:
+        loss_fn = torch.nn.BCELoss()
+        f_x_new = torch_model(x_new)[:, 1]
+
+    t0 = datetime.datetime.now()
+    t_max = datetime.timedelta(minutes=t_max_min)
+
+    costs = []
+    ces = []
+
+    random_samples = reparametrization_trick(x_new, noise_variance, n_samples=1000)
+    invalidation_rate = compute_invalidation_rate(torch_model, random_samples)
+    
+    while (f_x_new <= DECISION_THRESHOLD) or (invalidation_rate > invalidation_target + inval_target_eps):
+        # it = 0
+        for it in range(n_iter):
+        # while invalidation_target >= 0.5 and it < n_iter:
+            
+            optimizer.zero_grad()
+            # x_new_enc = reconstruct_encoding_constraints(
+            #     x_new, cat_feature_indices, binary_cat_features
+            # )
+            # use x_new_enc for prediction results to ensure constraints
+            # f_x_new = softmax(torch_model(x_new))[:, 1]
+            f_x_new_binary = torch_model(x_new).squeeze(axis=0)
+
+            cost = (
+                torch.dist(x_new, x, norm)
+                if feature_costs is None
+                else torch.norm(feature_costs * (x_new - x), norm)
+            )
+            
+            # Compute Invalidation loss
+            # output_mean, output_std = compute_output_dist_suff_statistics(torch_model, x_new,
+            #                                                              noise_variance=noise_variance)
+            
+            # normal = normal_distribution.Normal(loc=0.0, scale=1.0)
+            # ratio = torch.divide(output_mean, output_std)
+            # normal_cdf = normal.cdf(ratio)
+            # invalidation_rate = 1 - normal_cdf
+
+            # invalidation_rate = compute_invalidation_rate(torch_model, random_samples)
+            invalidation_rate_c = compute_invalidation_rate_closed(torch_model, x_new, noise_variance)
+            
+            # Compute & update losses
+            loss_invalidation = invalidation_rate_c - invalidation_target
+            # Hinge loss
+            loss_invalidation[loss_invalidation < 0] = 0
+
+            loss = 3 * loss_invalidation + loss_fn(f_x_new_binary, y_target) + lamb * cost
+            loss.backward()
+            optimizer.step()
+
+            random_samples = reparametrization_trick(x_new, noise_variance, n_samples=10000)
+            invalidation_rate = compute_invalidation_rate(torch_model, random_samples)
+
+            # x_pertub = perturb_sample(x_new, sigma2=noise_variance, n_samples=10000)
+            # pred = 1 - torch_model(x_pertub)[:, 1]
+            # invalidation_rate_empirical = torch.mean(pred)
+
+            # print('-----------------------------------------')
+            # print('IR empirical', invalidation_rate_empirical)
+            # print('IR from loss', invalidation_rate)
+            # print('IR loss', loss_invalidation)
+            
+            # clamp potential CF
+            if clamp:
+                x_new.clone().clamp_(0, 1)
+            # it += 1
+            
+            x_new_enc = reconstruct_encoding_constraints(
+                x_new, cat_feature_indices, binary_cat_features
+            )
+            # f_x_new = torch_model(x_new_enc)[:, 1]
+            f_x_new = torch_model(x_new)[:, 1]
+
+        if (f_x_new > DECISION_THRESHOLD) and (invalidation_rate < invalidation_target + inval_target_eps):
+                #print('--------------------------------------')
+                #print('invalidation rate:', invalidation_rate)
+                #print('emp invalidation rate', invalidation_rate_empirical)
+                #print('cost:', cost)
+                #print('classifier output:', f_x_new_binary)
+                
+                costs.append(cost)
+                ces.append(x_new)
+                
+                break
+                
+        lamb -= 0.10
+        
+        if datetime.datetime.now() - t0 > t_max:
+            print("Timeout")
+            break
+
+    if not ces:
+        print("No Counterfactual Explanation Found at that Target Rate - Try Different Target")
+    else:
+        print("Counterfactual Explanation Found")
+        costs = torch.tensor(costs)
+        min_idx = int(torch.argmin(costs).numpy())
+        x_new_enc = ces[min_idx]
+    
+    #print("x_prime ", x_new_enc.cpu().detach().numpy().squeeze(axis=0))
+
+    return x_new_enc.cpu().detach().numpy().squeeze(axis=0)
diff --git a/methods/catalog/probe/model.py b/methods/catalog/probe/model.py
new file mode 100644
index 0000000..d9cc6a7
--- /dev/null
+++ b/methods/catalog/probe/model.py
@@ -0,0 +1,105 @@
+from typing import List
+import pandas as pd
+from sklearn.base import BaseEstimator
+
+from ...api import RecourseMethod
+from methods.catalog.probe.library import probe_recourse
+from methods.processing import (
+    check_counterfactuals,
+    merge_default_parameters,
+)
+
+class Probe(RecourseMethod):
+    """
+    Implementation of Probe framework using Wachter recourse generation from Pawelczyk et.al. [1]_.
+
+    Parameters
+    ----------
+    mlmodel : model.MLModel
+        Black-Box-Model
+    data: data.Data
+        Dataset to perform on
+    hyperparams : dict
+        Dictionary containing hyperparameters. See notes below for its contents.
+
+    Methods
+    -------
+    get_counterfactuals:
+        Generate counterfactual examples for given factuals.
+
+    .. [1] Martin Pawelczyk,Teresa Datta, Johan Van den Heuvel, Gjergji Kasneci, Himabindu Lakkaraju.2023
+            Probabilistically Robust Recourse: Navigating the Trade-offs between Costs and Robustness in Algorithmic Recourse
+            https://openreview.net/pdf?id=sC-PmTsiTB(2023).
+    """
+    _DEFAULT_HYPERPARAMS = {
+        "feature_cost": "_optional_",
+        "lr": 0.001,
+        "lambda_": 0.01,
+        "n_iter": 1000,
+        "t_max_min": 1.0,
+        "norm": 1,
+        "clamp": True,
+        "loss_type": "MSE",
+        "y_target": [0, 1],
+        "binary_cat_features": True,
+        "noise_variance": 0.01,
+        "invalidation_target": 0.45,
+        "inval_target_eps": 0.005,
+    }
+
+    def __init__(self, mlmodel, hyperparams):
+        super().__init__(mlmodel)
+
+        checked_hyperparams = merge_default_parameters(
+            hyperparams, self._DEFAULT_HYPERPARAMS
+        )
+        self._feature_costs = checked_hyperparams["feature_cost"]
+        self._lr = checked_hyperparams["lr"]
+        self._lambda_param = checked_hyperparams["lambda_"]
+        self._n_iter = checked_hyperparams["n_iter"]
+        self._t_max_min = checked_hyperparams["t_max_min"]
+        self._norm = checked_hyperparams["norm"]
+        self._clamp = checked_hyperparams["clamp"]
+        self._loss_type = checked_hyperparams["loss_type"]
+        self._y_target = checked_hyperparams["y_target"]
+        self._binary_cat_features = checked_hyperparams["binary_cat_features"]
+        self._noise_variance = checked_hyperparams["noise_variance"]
+        self._invalidation_target = checked_hyperparams["invalidation_target"]
+        self._inval_target_eps = checked_hyperparams["inval_target_eps"]
+
+    def get_counterfactuals(self, factuals: pd.DataFrame) -> pd.DataFrame:
+        # Normalize and encode data
+        # df_enc_norm_fact = self.encode_normalize_order_factuals(factuals)
+        
+        factuals = self._mlmodel.get_ordered_features(factuals)
+
+        encoded_feature_names = self._mlmodel.data.categorical
+        cat_features_indices = [
+            factuals.columns.get_loc(feature) for feature in encoded_feature_names
+        ]
+
+        df_cfs = factuals.apply(
+            lambda x: probe_recourse(
+                self._mlmodel.raw_model,
+                x.reshape((1, -1)),
+                cat_features_indices,
+                binary_cat_features=self._binary_cat_features,
+                feature_costs=self._feature_costs,
+                lr=self._lr,
+                lambda_param=self._lambda_param,
+                n_iter=self._n_iter,
+                t_max_min=self._t_max_min,
+                norm=self._norm,
+                clamp=self._clamp,
+                loss_type=self._loss_type,
+                invalidation_target=self._invalidation_target,
+                inval_target_eps=self._inval_target_eps,
+                noise_variance=self._noise_variance
+            ),
+            raw=True,
+            axis=1,
+        )
+
+        df_cfs = check_counterfactuals(self._mlmodel, df_cfs, factuals.index)
+        df_cfs = self._mlmodel.get_ordered_features(df_cfs)
+        return df_cfs
diff --git a/methods/catalog/probe/reproduce.py b/methods/catalog/probe/reproduce.py
new file mode 100644
index 0000000..3e88c41
--- /dev/null
+++ b/methods/catalog/probe/reproduce.py
@@ -0,0 +1,218 @@
+import os
+
+import numpy as np
+import pandas as pd
+import torch
+from torch.distributions.multivariate_normal import MultivariateNormal
+
+from data.catalog import DataCatalog
+from models.catalog import ModelCatalog
+from models.negative_instances import predict_negative_instances
+from methods.catalog.probe import Probe
+
+
+training_params_linear = {
+    "adult": {"lr": 0.002,
+              "epochs": 100,
+              "batch_size": 1024},
+    "compass": {"lr": 0.002,
+               "epochs": 25,
+               "batch_size": 128},
+    "credit": {"lr": 0.002,
+                            "epochs": 50,
+                            "batch_size": 2048},
+}
+training_params_ann = {
+    "adult": {"lr": 0.002,
+              "epochs": 30,
+              "batch_size": 1024},
+    "compass": {"lr": 0.002,
+               "epochs": 25,
+               "batch_size": 25},
+    "credit": {"lr": 0.002,
+                            "epochs": 50,
+                            "batch_size": 2048},
+}
+
+training_params = {"linear": training_params_linear,
+                   "mlp": training_params_ann}
+
+def expect(model, test_factual, sigma2, invalidation_target):
+    hyperparams = {"loss_type": "BCE",
+                   "binary_cat_features": False,
+                   "invalidation_target": invalidation_target,
+                   "inval_target_eps": 0.010,
+                   "noise_variance": sigma2,
+                   "n_iter": 200,
+                   "t_max_min": 0.50}
+    df_cfs = Probe(model, hyperparams).get_counterfactuals(test_factual)
+    return df_cfs
+
+def perturb_sample(x, n_samples, sigma2):
+    
+    # stack copies of this sample, i.e. n rows of x.
+    X = x.repeat(n_samples, 1)
+    
+    # sample normal distributed values
+    Sigma = torch.eye(x.shape[1]) * sigma2
+    eps = MultivariateNormal(
+        loc=torch.zeros(x.shape[1]), covariance_matrix=Sigma
+    ).sample((n_samples,))
+
+    return X + eps, Sigma
+
+
+def run_experiment(cf_method,
+                   hidden_width,
+                   data_name,
+                   model_type,
+                   backend,
+                   sigma2,
+                   invalidation_target,
+                   n_cfs=100,
+                   n_samples=10000,
+                   ):
+    
+    print(
+        f"Running experiments with: {cf_method} {data_name} {model_type} {hidden_width}"
+    )
+
+    if model_type == 'mlp':
+        data = DataCatalog(data_name, model_type='mlp', train_split=0.8)
+    else:
+        data = DataCatalog(data_name, model_type='linear', train_split=0.8)
+
+    params = training_params[model_type][data_name]
+    model = ModelCatalog(
+        data, model_type, load_online=False, backend=backend
+    )
+    # model.train(
+    #     learning_rate=params["lr"],
+    #     epochs=params["epochs"],
+    #     batch_size=params["batch_size"],
+    #     hidden_size=hidden_width,
+    # )
+    # model.use_pipeline = False
+
+    factuals = predict_negative_instances(model, data)
+    test_factual = factuals.iloc[:n_cfs]
+
+    # print the list of factuals
+    # for test_factual in test_factual.itertuples():
+    #     print(test_factual)
+    
+    if cf_method == 'probe':
+        df_cfs = expect(model,
+                        test_factual,
+                        sigma2=sigma2,
+                        invalidation_target=invalidation_target)
+    else:
+        raise ValueError(f"cf_method {cf_method} not recognized")
+
+    # print(df_cfs)
+    # df_cfs = df_cfs.drop(columns=data.target)
+
+    result = []
+    cf_predictions = []
+    for i, x in df_cfs.iterrows():
+        x = torch.Tensor(x).unsqueeze(0)
+        X_pert, _ = perturb_sample(x, n_samples, sigma2=sigma2)
+        if backend == "pytorch":
+            prediction = (model.predict(x).squeeze() > 0.5).int()
+            cf_predictions.append(prediction.item())
+            delta_M = torch.mean(
+                (1 - (model.predict(X_pert).squeeze() > 0.5).int()).float()
+            ).item()
+        else:
+            prediction = (model.predict(x).squeeze() > 0.5).astype(int)
+            cf_predictions.append(prediction)
+            delta_M = np.mean(
+                1 - (model.predict(X_pert).squeeze() > 0.5).astype(int)
+            )
+
+        result.append(delta_M)
+    df_cfs["prediction"] = cf_predictions
+
+    folder_name = f"{cf_method}_{data_name}_{model_type}_{hidden_width[0]}_sigma2_{sigma2}_intarget_{invalidation_target}"
+    if not os.path.exists(f"recourse_invalidation_results/{folder_name}"):
+        os.makedirs(f"recourse_invalidation_results/{folder_name}")
+
+    # normalize factual
+    factual_predictions = test_factual[data.target]
+    # test_factual = model.perform_pipeline(test_factual)
+    test_factual["prediction"] = factual_predictions
+
+    df = pd.DataFrame(result)
+    # df.to_csv(
+    #     f"recourse_invalidation_results/{folder_name}/delta_testtest.csv",
+    #     sep=",",
+    #     index=False,
+    # )
+    # test_factual.to_csv(
+    #     f"recourse_invalidation_results/{folder_name}/factual_testtest.csv",
+    #     sep=",",
+    #     index=False,
+    # )
+    # df_cfs.to_csv(
+    #     f"recourse_invalidation_results/{folder_name}/counterfactual_testtest.csv",
+    #     sep=",",
+    #     index=False,
+    # )
+
+    # this is the average Invalidation rate
+    print("Average Recourse Accuracy (AR): ", df_cfs["prediction"].mean())
+    print("Average Invalidation Rate (AIR): ", df.mean().values[0])
+    # compare each row of factual and cf to get the l1 cost
+    test_factual = test_factual.drop(columns=['y'])[df_cfs.columns]
+    print("test_factual columns ", test_factual.columns)
+    print("df_cf columns ", df_cfs.columns)
+    cost = np.mean(np.abs(test_factual.drop(columns=["prediction"]).values - df_cfs.drop(columns=["prediction"]).values).sum(axis=1))
+    print("Average Cost (AC): ", cost)
+
+if __name__ == "__main__":
+    # below is to recreate the results in the table just for the PROBE method
+    sigmas2 = [0.01] #[0.005, 0.01, 0.015, 0.02, 0.025]
+    invalidation_targets = [0.35] #[0.15, 0.20, 0.25, 0.30]
+    # cost_weights = [0.0, 0.25, 0.5, 0.75, 1] unused in the experiment
+    hidden_widths = [[50]]
+    backend = "pytorch"
+    methods = ["probe"]  # "arar", "roar", "wachter"
+    datasets = ["adult"] # ["compass", "credit", "adult"]
+    models = ["mlp", "linear"]
+
+    n_cfs = 5 # just try 50 for faster results
+    n_samples = 10000
+
+    for method in methods:
+        if method == 'probe':
+            for model in models:
+                for dataset in datasets:
+                    if model == "mlp":
+                        for hidden_width in hidden_widths:
+                            for sigma2 in sigmas2:
+                                print(f'Generating recourses for sigma2={sigma2}')
+                                for invalidation_target in invalidation_targets:
+                                    run_experiment(
+                                        method,
+                                        hidden_width,
+                                        dataset,
+                                        model,
+                                        backend,
+                                        n_cfs=n_cfs,
+                                        n_samples=n_samples,
+                                        sigma2=sigma2,
+                                        invalidation_target=invalidation_target)
+                    else:
+                        for sigma2 in sigmas2:
+                            hidden_width = [0]
+                            for invalidation_target in invalidation_targets:
+                                run_experiment(
+                                    method,
+                                    hidden_width,
+                                    dataset,
+                                    model,
+                                    backend,
+                                    n_cfs=n_cfs,
+                                    n_samples=n_samples,
+                                    sigma2=sigma2,
+                                    invalidation_target=invalidation_target)