probabl-ai · Aml-Ismail · Apr 20, 2025 · Apr 22, 2025 · Apr 22, 2025 · Apr 24, 2025
diff --git a/skore/src/skore/sklearn/train_test_split/train_test_split.py b/skore/src/skore/sklearn/train_test_split/train_test_split.py
@@ -137,27 +137,30 @@ class labels.
 
     new_arrays = list(arrays)
     keys = []
+
     if X is not None:
         new_arrays.append(X)
         keys += ["X"]
+
     if y is not None:
         new_arrays.append(y)
         keys += ["y"]
 
-    if as_dict and arrays:
-        raise ValueError(
-            "When as_dict=True, arrays must be passed as keyword arguments.\n"
-            "Example: train_test_split(X=X, y=y, sw=sample_weight, as_dict=True)"
-        )
+    if as_dict and X is None and y is None:
+        if keyword_arrays:
+            new_arrays = list(keyword_arrays.values())
+        else:
+            X, y = (arrays[0], arrays[1]) if len(arrays) >= 2 else (arrays[0], None)
+            new_arrays = [X, y]
+            keys = ["X", "y"]
 
-    if keyword_arrays:
-        if X is None and y is None:
-            arrays = tuple(
-                keyword_arrays.values()
-            )  # if X and y is not passed but other variables
-        keys += list(keyword_arrays.keys())
-        new_arrays += list(keyword_arrays.values())
+    keys += list(keyword_arrays.keys())
+    new_arrays += list(keyword_arrays.values())
 
+    if not new_arrays:
+        raise ValueError("At least one array must be provided")
+
+    # Perform the train-test split using sklearn
     output = sklearn.model_selection.train_test_split(
         *new_arrays,
         test_size=test_size,
@@ -168,7 +171,10 @@ class labels.
     )
 
     if X is None:
-        X = arrays[0] if len(arrays) == 1 else arrays[-2]
+        if arrays:
+            X = arrays[0] if len(arrays) == 1 else arrays[-2]
+        elif keyword_arrays and "X" in keyword_arrays:
+            X = keyword_arrays["X"]
 
     if y is None and len(arrays) >= 2:
         y = arrays[-1]
@@ -183,7 +189,6 @@ class labels.
         y_test = None
 
     ml_task = _find_ml_task(y)
-
     kwargs = dict(
         arrays=new_arrays,
         test_size=test_size,
@@ -198,6 +203,7 @@ class labels.
         ml_task=ml_task,
     )
 
+    # Display any warnings related to train-test split
     from skore import console  # avoid circular import
 
     for warning_class in TRAIN_TEST_SPLIT_WARNINGS:

diff --git a/skore/tests/unit/sklearn/train_test_split/test_train_test_split.py b/skore/tests/unit/sklearn/train_test_split/test_train_test_split.py
@@ -1,6 +1,7 @@
 import warnings
 from datetime import datetime
 
+import numpy as np
 import pandas
 import polars
 import pytest
@@ -183,17 +184,17 @@ def test_train_test_split_kwargs():
 
 
 def test_train_test_split_dict_kwargs():
-    """Passing data without keyword arguments with return_dict=True
-    should raise ValueError."""
+    """Passing data with positional arguments and as_dict=True should work."""
 
     X = [[1]] * 20
     y = [0] * 10 + [1] * 10
 
-    with pytest.raises(
-        ValueError,
-        match="When as_dict=True, arrays must be passed as keyword arguments",
-    ):
-        train_test_split(X, y, random_state=0, as_dict=True)
+    result = train_test_split(X, y, random_state=0, as_dict=True)
+
+    assert "X_train" in result
+    assert "X_test" in result
+    assert "y_train" in result
+    assert "y_test" in result
 
 
 def test_train_test_split_check_dict():
@@ -221,3 +222,91 @@ def test_train_test_split_check_dict_no_X_no_y():
     output = train_test_split(z=z, random_state=0, as_dict=True)
     keys = output.keys()
     assert list(keys) == ["z_train", "z_test"]
+
+
+def test_train_test_split_as_dict_with_all_keyword_args():
+    """Ensure result is a dict with correct keys when as_dict=True
+    and all arrays are keyword args."""
+    X = np.arange(10).reshape(10, 1)
+    y = np.arange(10)
+    weights = np.ones(10)
+
+    result = train_test_split(
+        X=X,
+        y=y,
+        sample_weights=weights,
+        test_size=0.2,
+        as_dict=True,
+        random_state=0,
+    )
+
+    assert set(result.keys()) == {
+        "X_train",
+        "X_test",
+        "y_train",
+        "y_test",
+        "sample_weights_train",
+        "sample_weights_test",
+    }
+    assert result["X_train"].shape[0] == 8
+    assert result["X_test"].shape[0] == 2
+
+
+def test_train_test_split_as_dict_with_multiple_named_inputs():
+    """Ensure train_test_split works with multiple inputs when using as_dict=True."""
+    X = np.arange(10).reshape(10, 1)
+    y = np.arange(10)
+    z = np.arange(10, 20)
+
+    result = train_test_split(
+        X=X,
+        y=y,
+        z=z,
+        test_size=0.5,
+        as_dict=True,
+        random_state=42,
+    )
+
+    expected_keys = {"X_train", "X_test", "y_train", "y_test", "z_train", "z_test"}
+
+    assert all(key in result for key in expected_keys)
+    assert result["z_train"].shape[0] == 5
+    assert result["z_test"].shape[0] == 5
+
+
+def test_train_test_split_as_dict_with_mixed_input_types():
+    """Ensure train_test_split handles a mix of array-like types with as_dict=True."""
+    X = [[i] for i in range(10)]
+    y = np.arange(10)
+
+    result = train_test_split(X=X, y=y, test_size=0.3, as_dict=True, random_state=1)
+
+    assert set(result.keys()) == {"X_train", "X_test", "y_train", "y_test"}
+    assert len(result["X_train"]) == 7
+    assert len(result["X_test"]) == 3
+
+
+def test_train_test_split_only_X():
+    X = [[1]] * 20
+    result = train_test_split(X=X, random_state=0, as_dict=True)
+    assert "X_train" in result
+    assert "X_test" in result
+
+
+def test_empty_input():
+    X = []
+    y = []
+    with pytest.raises(ValueError):
+        train_test_split(X, y)
+
+
+def test_train_test_split_as_dict_with_positioned_args():
+    X = [[1]] * 20
+    y = [0] * 10 + [1] * 10
+
+    result = train_test_split(X, y, test_size=0.2, as_dict=True, random_state=0)
+
+    assert "X_train" in result
+    assert "X_test" in result
+    assert "y_train" in result
+    assert "y_test" in result