OpenTabular
diff --git a/‎.gitignore
Lines changed: 1 addition & 1 deletion b/‎.gitignore
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md
Lines changed: 8 additions & 3 deletions b/‎README.md
Lines changed: 8 additions & 3 deletions
diff --git a/‎docs/api/preprocessing/Preprocessor.rst
Lines changed: 0 additions & 5 deletions b/‎docs/api/preprocessing/Preprocessor.rst
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/api/preprocessing/index.rst
Lines changed: 0 additions & 20 deletions b/‎docs/api/preprocessing/index.rst
Lines changed: 0 additions & 20 deletions
diff --git a/‎docs/api/utils/Preprocessor.rst
Lines changed: 0 additions & 5 deletions b/‎docs/api/utils/Preprocessor.rst
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/index.rst
Lines changed: 0 additions & 1 deletion b/‎docs/index.rst
Lines changed: 0 additions & 1 deletion
diff --git a/‎mamba_tabular_summary.pdf
-79.3 KB b/‎mamba_tabular_summary.pdf
-79.3 KB
diff --git a/‎mambular/__init__.py
Lines changed: 1 addition & 2 deletions b/‎mambular/__init__.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎mambular/__version__.py
Lines changed: 1 addition & 1 deletion b/‎mambular/__version__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mambular/arch_utils/layer_utils/embedding_layer.py
Lines changed: 19 additions & 0 deletions b/‎mambular/arch_utils/layer_utils/embedding_layer.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎mambular/base_models/modern_nca.py
Lines changed: 6 additions & 4 deletions b/‎mambular/base_models/modern_nca.py
Lines changed: 6 additions & 4 deletions
@@ -173,4 +173,4 @@ docs/_build/doctrees/*
 docs/_build/html/*
 
 
-dev/*
+dev/*
@@ -23,7 +23,7 @@ Mambular is a Python library for tabular deep learning. It includes models that
 
 <h3>⚡ What's New ⚡</h3>
 <ul>
-  <li>New Models: `Tangos`, `AutoInt`, `Trompt`</li>
+  <li>New Models: `Tangos`, `AutoInt`, `Trompt`, `ModernNCA`</li>
   <li>Pretraining optionality for suitable models.</li>
   <li>Individual preprocessing: preprocess each feature differently, use pre-trained models for categorical encoding</li>
   <li>Extract latent representations of tables</li>
@@ -82,6 +82,8 @@ Mambular is a Python package that brings the power of advanced deep learning arc
 | `AutoInt`        | Automatic Feature Interaction Learning via Self-Attentive Neural Networks introduced [here](https://arxiv.org/abs/1810.11921).                      |
 | `Trompt`        | Trompt: Towards a Better Deep Neural Network for Tabular Data introduced [here](https://arxiv.org/abs/2305.18446).                                  |
 | `Tangos`        | Tangos: Regularizing Tabular Neural Networks through Gradient Orthogonalization and Specialization introduced [here](https://openreview.net/pdf?id=n6H86gW8u0d).                                  |
+| `ModernNCA`        | Revisiting Nearest Neighbor for Tabular Data: A Deep Tabular Baseline Two Decades Later introduced [here](https://arxiv.org/abs/2407.03257).                                  |
+| `TabR` | TabR: Tabular Deep Learning Meets Nearest Neighbors in 2023 [here](https://arxiv.org/abs/2307.14338) |
 
 
 
@@ -118,8 +120,11 @@ pip install mamba-ssm
 
 <h2> Preprocessing </h2>
 
-Mambular simplifies data preprocessing with a range of tools designed for easy transformation of tabular data.
-Specify a default method, or a dictionary defining individual preprocessing methods for each feature.
+Mambular uses pretab preprocessing: https://github.com/OpenTabular/PreTab
+
+Hence, datatypes etc. are detected automatically and all preprocessing methods from pretab as well as from Sklearn.preprocessing are available.
+Additionally, you can specify that each feature is preprocessed differently, according to your requirements, by setting the `feature_preprocessing={}`argument during model initialization.
+For an overview over all available methods: [pretab](https://github.com/OpenTabular/PreTab)
 
 <h3> Data Type Detection and Transformation </h3>
 
 
@@ -31,7 +31,6 @@
 
    api/models/index
    api/base_models/index
-   api/preprocessing/index
    api/data_utils/index
    api/configs/index
 
 
@@ -1,11 +1,10 @@
-from . import base_models, data_utils, models, preprocessing, utils
+from . import base_models, data_utils, models, utils
 from .__version__ import __version__
 
 __all__ = [
     "__version__",
     "base_models",
     "data_utils",
     "models",
-    "preprocessing",
     "utils",
 ]
@@ -17,5 +17,5 @@
 
 # The following line *must* be the last in the module, exactly as formatted:
 
-__version__ = "1.4.0"
+__version__ = "1.5.0"
 
@@ -125,6 +125,8 @@ def __init__(self, num_feature_info, cat_feature_info, emb_feature_info, config)
         if self.layer_norm_after_embedding:
             self.embedding_norm = nn.LayerNorm(self.d_model)
 
+        self.feature_info = (num_feature_info, cat_feature_info, emb_feature_info)
+
     def forward(self, num_features, cat_features, emb_features):
         """Defines the forward pass of the model.
 
@@ -171,6 +173,8 @@ def forward(self, num_features, cat_features, emb_features):
 
         # Process numerical embeddings based on embedding_type
         if self.embedding_type == "plr":
+            # check pre-processing type compatibility with plr
+            self.check_plr_embedding_compatibility(self.feature_info)
             # For PLR, pass all numerical features together
             if num_features is not None:
                 num_features = torch.stack(num_features, dim=1).squeeze(
@@ -226,6 +230,21 @@ def forward(self, num_features, cat_features, emb_features):
             x = self.embedding_dropout(x)
 
         return x
+    
+    def check_plr_embedding_compatibility(self, feature_info:tuple):
+        # List of incompatible preprocessing terms for PLR embedding
+        incompatible_terms = ['ple', 'one-hot', 'polynomial', 'splines', 'sigmoid', 'rbf']
+        
+        # Iterate through each dictionary in the tuple (data)
+        for sub_dict in feature_info:
+            # Iterate through each feature in the current dictionary
+            for feature, properties in sub_dict.items():
+                preprocessing = properties.get('preprocessing', '')
+                
+                # Check for incompatible terms in the preprocessing string
+                for term in incompatible_terms:
+                    if term in preprocessing:
+                        raise ValueError(f"PLR embedding type doesn't work with the '{term}' pre-processing method.\n")
 
 
 class OneHotEncoding(nn.Module):
 
@@ -22,7 +22,7 @@ def __init__(
         self.save_hyperparameters(ignore=["feature_information"])
 
         self.returns_ensemble = False
-        self.uses_nca_candidates = True
+        self.uses_candidates = True
 
         self.T = config.temperature
         self.sample_rate = config.sample_rate
@@ -31,6 +31,7 @@ def __init__(
                 *feature_information,
                 config=config,
             )
+            
             input_dim = np.sum(
                 [len(info) * self.hparams.d_model for info in feature_information]
             )
@@ -75,7 +76,7 @@ def forward(self, *data):
             x = self.post_encoder(x)
         return self.tabular_head(x)
 
-    def nca_train(self, *data, targets, candidate_x, candidate_y):
+    def train_with_candidates(self, *data, targets, candidate_x, candidate_y):
         """NCA-style training forward pass selecting candidates."""
         if self.hparams.use_embeddings:
             x = self.embedding_layer(*data)
@@ -85,6 +86,7 @@ def nca_train(self, *data, targets, candidate_x, candidate_y):
             B, S, D = candidate_x.shape
             candidate_x = candidate_x.reshape(B, S * D)
         else:
+
             x = torch.cat([t for tensors in data for t in tensors], dim=1)
             candidate_x = torch.cat(
                 [t for tensors in candidate_x for t in tensors], dim=1
@@ -129,7 +131,7 @@ def nca_train(self, *data, targets, candidate_x, candidate_y):
 
         return logits
 
-    def nca_validate(self, *data, candidate_x, candidate_y):
+    def validate_with_candidates(self, *data, candidate_x, candidate_y):
         """Validation forward pass with NCA-style candidate selection."""
         if self.hparams.use_embeddings:
             x = self.embedding_layer(*data)
@@ -172,7 +174,7 @@ def nca_validate(self, *data, candidate_x, candidate_y):
 
         return logits
 
-    def nca_predict(self, *data, candidate_x, candidate_y):
+    def predict_with_candidates(self, *data, candidate_x, candidate_y):
         """Prediction forward pass with candidate selection."""
         if self.hparams.use_embeddings:
             x = self.embedding_layer(*data)
Original file line number	Diff line number	Diff line change
`@@ -173,4 +173,4 @@ docs/_build/doctrees/*`
`173`	`173`	`docs/_build/html/*`
`174`	`174`
`175`	`175`
`176`		`-dev/*`
	`176`	`+dev/*`
Original file line number	Diff line number	Diff line change
`@@ -17,5 +17,5 @@`
`17`	`17`
`18`	`18`	`# The following line must be the last in the module, exactly as formatted:`
`19`	`19`
`20`		`-__version__ = "1.4.0"`
	`20`	`+__version__ = "1.5.0"`
`21`	`21`