Quantco
diff --git a/‎metalearners/_narwhals_utils.py
Lines changed: 69 additions & 0 deletions b/‎metalearners/_narwhals_utils.py
Lines changed: 69 additions & 0 deletions
diff --git a/‎metalearners/_utils.py
Lines changed: 24 additions & 15 deletions b/‎metalearners/_utils.py
Lines changed: 24 additions & 15 deletions
@@ -0,0 +1,69 @@
+# Copyright (c) QuantCo 2024-2025
+# SPDX-License-Identifier: BSD-3-Clause
+
+from collections.abc import Sequence
+from types import ModuleType
+
+import narwhals.stable.v1 as nw
+import numpy as np
+import pandas as pd
+import polars as pl
+from narwhals.dependencies import is_into_series
+
+from metalearners._typing import Vector
+
+
+def nw_to_dummies(
+    x: nw.Series, categories: Sequence, column_name: str, drop_first: bool = True
+) -> nw.DataFrame:
+    """Turn a vector into a matrix with dummies.
+
+    This operation is also referred to as one-hot-encoding.
+
+    ``x`` is expected to have values which can be cast to integer.
+    """
+    if len(categories) < 2:
+        raise ValueError(
+            "categories to be used for nw_to_dummies must have at least two "
+            "distinct values."
+        )
+
+    if set(categories) < set(x.unique()):
+        raise ValueError("We observed a value which isn't part of the categories.")
+
+    relevant_categories = categories[1:] if drop_first else categories
+    return x.to_frame().select(
+        [
+            (nw.col(column_name) == cat).cast(nw.Int8).name.suffix(f"_{cat}")
+            for cat in relevant_categories
+        ]
+    )
+
+
+def vector_to_nw(x: Vector, native_namespace: ModuleType | None = None) -> nw.Series:
+    if isinstance(x, np.ndarray):
+        if native_namespace is None:
+            raise ValueError(
+                "x is a numpy object but no native_namespace was provided to "
+                "load it into narwhals."
+            )
+        # narwhals doesn't seem to like 1d numpy arrays. Therefore we first convert to
+        # a 2d np array and then convert the narwhals DataFrame to a narwhals Series.
+        return nw.from_numpy(x.reshape(-1, 1), native_namespace=native_namespace)[
+            "column_0"
+        ]
+    if is_into_series(x):
+        return nw.from_native(x, series_only=True, eager_only=True)
+    raise TypeError(f"Unexpected type {type(x)} for Vector.")
+
+
+def infer_native_namespace(df_nw: nw.DataFrame) -> ModuleType:
+    if df_nw.implementation.name == "PANDAS":
+        return pd
+    if df_nw.implementation.name == "POLARS":
+        return pl
+    raise TypeError("Couldn't infer native_namespace of matrix.")
+
+
+def stringify_column_names(df_nw: nw.DataFrame) -> nw.DataFrame:
+    return df_nw.rename({column: str(column) for column in df_nw.columns})
@@ -10,6 +10,7 @@
 import narwhals.stable.v1 as nw
 import numpy as np
 import pandas as pd
+import polars as pl
 import scipy
 from narwhals.dependencies import is_into_dataframe, is_into_series
 from sklearn.base import is_classifier, is_regressor
@@ -258,24 +259,32 @@ def check_probability(p: float, zero_included=False, one_included=False) -> None
         raise ValueError("Probability p must be less than or equal to 1.")
 
 
-def convert_treatment(treatment: Vector) -> np.ndarray:
-    """Convert to ``np.ndarray`` and adapt dtype, if necessary."""
-    if isinstance(treatment, np.ndarray):
-        new_treatment = treatment.copy()
-    elif nw.dependencies.is_into_series(treatment):
-        new_treatment = nw.from_native(
-            treatment, series_only=True, eager_only=True
-        ).to_numpy()  # type: ignore
-    if new_treatment.dtype == bool:
-        return new_treatment.astype(int)
-    if new_treatment.dtype == float and all(x.is_integer() for x in new_treatment):
-        return new_treatment.astype(int)
-
-    if not pd.api.types.is_integer_dtype(new_treatment):
+def adapt_treatment_dtypes(treatment: Vector) -> Vector:
+    """Cast the dtype of treatment to integer, if necessary.
+
+    Raises if not possible.
+    """
+    if isinstance(treatment, pl.Series):
+        dtype = treatment.dtype
+        if dtype.is_integer():
+            return treatment
+        if dtype.to_python().__name__ == "bool":
+            return treatment.cast(int)
+        if dtype.is_float() and all(x.is_integer() for x in treatment):
+            return treatment.cast(int)
+        raise TypeError(
+            "Treatment must be boolean, integer or float with integer values."
+        )
+
+    if treatment.dtype == bool:
+        return treatment.astype(int)
+    if treatment.dtype == float and all(x.is_integer() for x in treatment):
+        return treatment.astype(int)
+    if not pd.api.types.is_integer_dtype(treatment):
         raise TypeError(
             "Treatment must be boolean, integer or float with integer values."
         )
-    return new_treatment
+    return treatment
 
 
 def supports_categoricals(model: _ScikitModel) -> bool: