From 28a929810bb960e2476ab420c7c2d4f174d47e84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cauthierj=E2=80=9D?= <“jules.authier@unit8.co”>
Date: Fri, 31 Jan 2025 10:16:19 +0100
Subject: [PATCH 01/29] narwhals implementation for  and test benchmark

---
 darts/timeseries.py   | 173 ++++++++++++++++++++++++++++++++++++++++++
 narwhals_test_time.py | 123 ++++++++++++++++++++++++++++++
 2 files changed, 296 insertions(+)
 create mode 100644 narwhals_test_time.py

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 4b7940e91f..392727c932 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -46,9 +46,11 @@
 
 import matplotlib.axes
 import matplotlib.pyplot as plt
+import narwhals as nw
 import numpy as np
 import pandas as pd
 import xarray as xr
+from narwhals.typing import DataFrameT
 from pandas.tseries.frequencies import to_offset
 from scipy.stats import kurtosis, skew
 
@@ -745,6 +747,177 @@ def from_dataframe(
             fillna_value=fillna_value,
         )
 
+    @classmethod
+    def from_narwhals_dataframe(
+        cls,
+        df: DataFrameT,
+        time_col: Optional[str] = None,
+        value_cols: Optional[Union[list[str], str]] = None,
+        fill_missing_dates: Optional[bool] = False,
+        freq: Optional[Union[str, int]] = None,
+        fillna_value: Optional[float] = None,
+        static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
+        hierarchy: Optional[dict] = None,
+    ) -> Self:
+        """
+        Build a deterministic TimeSeries instance built from a selection of columns of a DataFrame.
+        One column (or the DataFrame index) has to represent the time,
+        and a list of columns `value_cols` has to represent the values for this time series.
+
+        Parameters
+        ----------
+        df
+            The DataFrame
+        time_col
+            The time column name. If set, the column will be cast to a pandas DatetimeIndex (if it contains
+            timestamps) or a RangeIndex (if it contains integers).
+            If not set, the DataFrame index will be used. In this case the DataFrame must contain an index that is
+            either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index that can be converted to a
+            RangeIndex. It is better if the index has no holes; alternatively setting `fill_missing_dates` can in some
+            cases solve these issues (filling holes with NaN, or with the provided `fillna_value` numeric value, if
+            any).
+        value_cols
+            A string or list of strings representing the value column(s) to be extracted from the DataFrame. If set to
+            `None`, the whole DataFrame will be used.
+        fill_missing_dates
+            Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
+            with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the
+            provided timestamps. See :meth:`_fill_missing_dates() <TimeSeries._fill_missing_dates>` for more info.
+        freq
+            Optionally, a string or integer representing the frequency of the underlying index. This is useful in order
+            to fill in missing values if some dates are missing and `fill_missing_dates` is set to `True`.
+            If a string, represents the frequency of the pandas DatetimeIndex (see `offset aliases
+            <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for more info on
+            supported frequencies).
+            If an integer, represents the step size of the pandas Index or pandas RangeIndex.
+        fillna_value
+            Optionally, a numeric value to fill missing values (NaNs) with.
+        static_covariates
+            Optionally, a set of static covariates to be added to the TimeSeries. Either a pandas Series or a pandas
+            DataFrame. If a Series, the index represents the static variables. The covariates are globally 'applied'
+            to all components of the TimeSeries. If a DataFrame, the columns represent the static variables and the
+            rows represent the components of the uni/multivariate TimeSeries. If a single-row DataFrame, the covariates
+            are globally 'applied' to all components of the TimeSeries. If a multi-row DataFrame, the number of
+            rows must match the number of components of the TimeSeries (in this case, the number of columns in
+            ``value_cols``). This adds control for component-specific static covariates.
+        hierarchy
+            Optionally, a dictionary describing the grouping(s) of the time series. The keys are component names, and
+            for a given component name `c`, the value is a list of component names that `c` "belongs" to. For instance,
+            if there is a `total` component, split both in two divisions `d1` and `d2` and in two regions `r1` and `r2`,
+            and four products `d1r1` (in division `d1` and region `r1`), `d2r1`, `d1r2` and `d2r2`, the hierarchy would
+            be encoded as follows.
+
+            .. highlight:: python
+            .. code-block:: python
+
+                hierarchy={
+                    "d1r1": ["d1", "r1"],
+                    "d1r2": ["d1", "r2"],
+                    "d2r1": ["d2", "r1"],
+                    "d2r2": ["d2", "r2"],
+                    "d1": ["total"],
+                    "d2": ["total"],
+                    "r1": ["total"],
+                    "r2": ["total"]
+                }
+            ..
+            The hierarchy can be used to reconcile forecasts (so that the sums of the forecasts at
+            different levels are consistent), see `hierarchical reconciliation
+            <https://unit8co.github.io/darts/generated_api/darts.dataprocessing.transformers.reconciliation.html>`_.
+
+        Returns
+        -------
+        TimeSeries
+            A univariate or multivariate deterministic TimeSeries constructed from the inputs.
+        """
+        df = nw.from_native(df)
+
+        # get values
+        if value_cols is None:
+            if time_col is not None:
+                series_df = df.drop(time_col)
+            else:
+                series_df = df
+        else:
+            if isinstance(value_cols, str):
+                value_cols = [value_cols]
+            series_df = df[value_cols]
+
+        # get time index
+        if time_col:
+            if time_col not in df.columns:
+                raise_log(AttributeError(f"time_col='{time_col}' is not present."))
+            time_col_vals = df[time_col]
+
+            if time_col_vals.dtype == nw.String:
+                # Try to convert to integers if needed
+                try:
+                    time_col_vals = time_col_vals.cast(nw.Int64)
+                except Exception:
+                    pass
+
+            if time_col_vals.dtype == nw.Int64 or time_col_vals.dtype == np.integer:
+                # We have to check all integers appear only once to have a valid index
+                if time_col_vals.is_duplicated().any():
+                    raise_log(
+                        ValueError(
+                            "The provided integer time index column contains duplicate values."
+                        )
+                    )
+
+                # Temporarily use an integer Index to sort the values, and replace by a
+                # RangeIndex in `TimeSeries.from_xarray()`
+                time_index = time_col_vals.to_list()
+
+            elif time_col_vals.dtype == nw.String:
+                # The integer conversion failed; try datetimes
+                try:
+                    time_index = nw.Datetime(time_col_vals)
+                except Exception:
+                    raise_log(
+                        AttributeError(
+                            "'time_col' is of 'Utf8' dtype but doesn't contain valid timestamps"
+                        )
+                    )
+            elif time_col_vals.dtype == nw.Datetime:
+                time_index = time_col_vals.to_list()
+            else:
+                raise_log(
+                    AttributeError(
+                        "Invalid type of `time_col`: it needs to be of either 'Utf8', 'Datetime' or 'Int64' dtype."
+                    )
+                )
+        else:
+            time_col_vals = nw.maybe_get_index(df)
+            if time_col_vals is None:
+                raise_log(ValueError("No time column or index found in the DataFrame."))
+            # if we are here, the dataframe was pandas
+            raise_if_not(
+                isinstance(time_col_vals, VALID_INDEX_TYPES)
+                or np.issubdtype(time_col_vals.dtype, np.integer),
+                "If time_col is not specified, the DataFrame must be indexed either with "
+                "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
+                logger,
+            )
+            time_index = time_col_vals.to_list()
+
+        xa = xr.DataArray(
+            series_df.to_numpy()[:, :, np.newaxis],
+            dims=(time_col if time_col else DIMS[0],) + DIMS[-2:],
+            coords={
+                time_col if time_col else DIMS[0]: time_index,
+                DIMS[1]: series_df.columns,
+            },
+            attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
+        )
+
+        return cls.from_xarray(
+            xa=xa,
+            fill_missing_dates=fill_missing_dates,
+            freq=freq,
+            fillna_value=fillna_value,
+        )
+
     @classmethod
     def from_group_dataframe(
         cls,
diff --git a/narwhals_test_time.py b/narwhals_test_time.py
new file mode 100644
index 0000000000..de34a90552
--- /dev/null
+++ b/narwhals_test_time.py
@@ -0,0 +1,123 @@
+import time
+import warnings
+from itertools import product
+
+import numpy as np
+import pandas as pd
+
+from darts.timeseries import TimeSeries
+
+# Suppress all warnings
+warnings.filterwarnings("ignore")
+
+
+def create_random_dataframes(
+    num_rows: int = 10,
+    num_columns: int = 3,
+    index: bool = True,
+    start_date: str = "2023-01-01",
+    freq: str = "D",
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Create three pandas DataFrames with random data and dates as the index or as a column.
+
+    Parameters:
+    - num_rows (int): The number of rows in the DataFrames.
+    - num_columns (int): The number of columns in the DataFrames.
+    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
+    - start_date (str): The start date for the date range (used only if date_format is 'date').
+    - freq (str): The frequency of the date range (used only if date_format is 'date').
+
+    Returns:
+    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
+    """
+    # Set a random seed for reproducibility
+    np.random.seed(42)
+
+    # Generate a date range or integer list based on the date_format parameter
+    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
+    integer_values = list(range(1, num_rows + 1))
+    numpy_values = np.array(
+        pd.date_range(start=start_date, periods=num_rows, freq=freq),
+        dtype="datetime64[D]",
+    )
+
+    # Create random data for the DataFrames
+    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
+
+    # Create the DataFrames
+    df_date = pd.DataFrame(data)
+    df_numpy = pd.DataFrame(data)
+    df_integer = pd.DataFrame(data)
+
+    col_names = df_date.columns.values
+
+    # Set the date as index or as a column based on the index parameter
+    if index:
+        df_date.index = date_values
+        df_numpy.index = numpy_values
+        df_integer.index = integer_values
+    else:
+        df_date["date"] = date_values
+        df_numpy["date"] = numpy_values
+        df_integer["date"] = integer_values
+
+    if index:
+        time_col = None
+    else:
+        time_col = "date"
+
+    return [
+        [df_date, col_names, time_col],
+        [df_numpy, col_names, time_col],
+        [df_integer, col_names, time_col],
+    ]
+
+
+def test_dataframes() -> list:
+    test_config = product(
+        [10, 100, 1000, 10000],
+        [10, 100, 500, 1000],
+        [True, False],
+    )
+
+    dataframes_list = [
+        create_random_dataframes(
+            num_rows=num_rows, num_columns=num_columns, index=index
+        )
+        for num_rows, num_columns, index in test_config
+    ]
+
+    return dataframes_list
+
+
+df_list = test_dataframes()
+
+############ PANDAS ############
+pandas_timer = time.time()
+for df_config in df_list:
+    for df, col_names, time_col in df_config:
+        _ = TimeSeries.from_dataframe(
+            df, value_cols=col_names, time_col=time_col, freq=None
+        )
+        df_shuffle = df.sample(frac=1)
+        _ = TimeSeries.from_dataframe(
+            df_shuffle, value_cols=col_names, time_col=time_col, freq=None
+        )
+pandas_timer = time.time() - pandas_timer
+
+############ NARWHALS ############
+narwhals_timer = time.time()
+for df_config in df_list:
+    for df, col_names, time_col in df_config:
+        _ = TimeSeries.from_narwhals_dataframe(
+            df, value_cols=col_names, time_col=time_col, freq=None
+        )
+        df_shuffle = df.sample(frac=1)
+        _ = TimeSeries.from_narwhals_dataframe(
+            df_shuffle, value_cols=col_names, time_col=time_col, freq=None
+        )
+narwhals_timer = time.time() - narwhals_timer
+
+print("pandas processing time: ", pandas_timer)
+print("narwhals processing time: ", narwhals_timer)

From 0041203af98b103b97ed91ecca651a299473e3d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cauthierj=E2=80=9D?= <“jules.authier@unit8.co”>
Date: Tue, 4 Feb 2025 09:52:47 +0100
Subject: [PATCH 02/29] changes from MarcoGorelli incorporated

---
 darts/timeseries.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 39c63ee418..cdd4cc0d18 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -857,7 +857,7 @@ def from_narwhals_dataframe(
                 except Exception:
                     pass
 
-            if time_col_vals.dtype == nw.Int64 or time_col_vals.dtype == np.integer:
+            if time_col_vals.dtype.is_integer():
                 # We have to check all integers appear only once to have a valid index
                 if time_col_vals.is_duplicated().any():
                     raise_log(
@@ -873,7 +873,7 @@ def from_narwhals_dataframe(
             elif time_col_vals.dtype == nw.String:
                 # The integer conversion failed; try datetimes
                 try:
-                    time_index = nw.Datetime(time_col_vals)
+                    time_index = time_col_vals.str.to_datetime()
                 except Exception:
                     raise_log(
                         AttributeError(

From 576e88e20fbd7e4af5713eec58408feeb163b50e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E2=80=9Cauthierj=E2=80=9D?= <“jules.authier@unit8.co”>
Date: Thu, 6 Feb 2025 09:27:11 +0100
Subject: [PATCH 03/29] improvement thanks to reviewers

---
 darts/timeseries.py   |  5 ++-
 narwhals_test_time.py | 93 +++++++++++++++++++++++++++++--------------
 2 files changed, 67 insertions(+), 31 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index cdd4cc0d18..5062696784 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -836,7 +836,8 @@ def from_narwhals_dataframe(
         # get values
         if value_cols is None:
             if time_col is not None:
-                series_df = df.drop(time_col)
+                # series_df = df.drop(time_col)
+                series_df = df.loc[:, df.columns != time_col]
             else:
                 series_df = df
         else:
@@ -869,6 +870,7 @@ def from_narwhals_dataframe(
                 # Temporarily use an integer Index to sort the values, and replace by a
                 # RangeIndex in `TimeSeries.from_xarray()`
                 time_index = time_col_vals.to_list()
+                # time_index = pd.Index(time_col_vals)
 
             elif time_col_vals.dtype == nw.String:
                 # The integer conversion failed; try datetimes
@@ -882,6 +884,7 @@ def from_narwhals_dataframe(
                     )
             elif time_col_vals.dtype == nw.Datetime:
                 time_index = time_col_vals.to_list()
+                # time_index = pd.DatetimeIndex(time_col_vals)
             else:
                 raise_log(
                     AttributeError(
diff --git a/narwhals_test_time.py b/narwhals_test_time.py
index de34a90552..f699b8851d 100644
--- a/narwhals_test_time.py
+++ b/narwhals_test_time.py
@@ -76,8 +76,8 @@ def create_random_dataframes(
 
 def test_dataframes() -> list:
     test_config = product(
-        [10, 100, 1000, 10000],
-        [10, 100, 500, 1000],
+        [10, 100, 1000],
+        [10, 100, 500],
         [True, False],
     )
 
@@ -93,31 +93,64 @@ def test_dataframes() -> list:
 
 df_list = test_dataframes()
 
-############ PANDAS ############
-pandas_timer = time.time()
-for df_config in df_list:
-    for df, col_names, time_col in df_config:
-        _ = TimeSeries.from_dataframe(
-            df, value_cols=col_names, time_col=time_col, freq=None
-        )
-        df_shuffle = df.sample(frac=1)
-        _ = TimeSeries.from_dataframe(
-            df_shuffle, value_cols=col_names, time_col=time_col, freq=None
-        )
-pandas_timer = time.time() - pandas_timer
-
-############ NARWHALS ############
-narwhals_timer = time.time()
-for df_config in df_list:
-    for df, col_names, time_col in df_config:
-        _ = TimeSeries.from_narwhals_dataframe(
-            df, value_cols=col_names, time_col=time_col, freq=None
-        )
-        df_shuffle = df.sample(frac=1)
-        _ = TimeSeries.from_narwhals_dataframe(
-            df_shuffle, value_cols=col_names, time_col=time_col, freq=None
-        )
-narwhals_timer = time.time() - narwhals_timer
-
-print("pandas processing time: ", pandas_timer)
-print("narwhals processing time: ", narwhals_timer)
+num_iter = 5
+pandas_global_timer = 0
+narwhals_global_timer = 0
+
+for _ in range(num_iter):
+    pandas_timer = 0
+    narwhals_timer = 0
+    for df_config in df_list:
+        for df, col_names, time_col in df_config:
+            for i in range(2):
+                # on the second run we shuffle the data
+                if i == 1:
+                    df = df.sample(frac=1)
+
+                # pandas processing time
+                begin = time.time()
+                pandas_timeseries = TimeSeries.from_dataframe(
+                    df, value_cols=col_names, time_col=time_col, freq=None
+                )
+                end = time.time()
+                pandas_timer += end - begin
+
+                # narwhals processing time
+                begin_nw = time.time()
+                narwhals_timeseries = TimeSeries.from_narwhals_dataframe(
+                    df, value_cols=col_names, time_col=time_col, freq=None
+                )
+                end_nw = time.time()
+                narwhals_timer += end_nw - begin_nw
+
+                # Check if the TimeSeries objects are equal
+                try:
+                    assert pandas_timeseries.time_index.equals(
+                        narwhals_timeseries.time_index
+                    )
+                except AssertionError as e:
+                    print(
+                        f"Index assertion failed for DataFrame with columns {col_names} and time_col {time_col}: {e}"
+                    )
+                try:
+                    np.testing.assert_array_almost_equal(
+                        pandas_timeseries.all_values(), narwhals_timeseries.all_values()
+                    )
+                except AssertionError as e:
+                    print(
+                        f"Equal assertion failed for DataFrame with columns {col_names} and time_col {time_col}: {e}"
+                    )
+
+    print("pandas processing time: ", pandas_timer)
+    print("narwhals processing time: ", narwhals_timer, "\n")
+    pandas_global_timer += pandas_timer
+    narwhals_global_timer += narwhals_timer
+
+pandas_global_timer /= num_iter
+narwhals_global_timer /= num_iter
+
+print("Average pandas processing time: ", pandas_global_timer)
+print("Average narwhals processing time: ", narwhals_global_timer)
+
+diff_in_fraction = (-pandas_global_timer + narwhals_global_timer) / pandas_global_timer
+print(f"Average processing time difference: {diff_in_fraction:.2%}")

From dbe2cd9259a0e84ff4ba6e9e84fccda111910ce2 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 7 Feb 2025 14:54:50 +0100
Subject: [PATCH 04/29] added comments about slow and fast parts of the code

---
 darts/timeseries.py   | 14 +++++++-------
 narwhals_test_time.py |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 5062696784..844f2129b7 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -658,7 +658,7 @@ def from_dataframe(
         else:
             if isinstance(value_cols, str):
                 value_cols = [value_cols]
-            series_df = df[value_cols]
+            series_df = df[value_cols]  # slow
 
         # get time index
         if time_col:
@@ -734,14 +734,14 @@ def from_dataframe(
         if series_df.columns.name:
             series_df.columns.name = None
 
-        xa = xr.DataArray(
+        xa = xr.DataArray(  # fast
             series_df.values[:, :, np.newaxis],
             dims=(time_index.name,) + DIMS[-2:],
             coords={time_index.name: time_index, DIMS[1]: series_df.columns},
             attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
         )
 
-        return cls.from_xarray(
+        return cls.from_xarray(  # slow
             xa=xa,
             fill_missing_dates=fill_missing_dates,
             freq=freq,
@@ -843,7 +843,7 @@ def from_narwhals_dataframe(
         else:
             if isinstance(value_cols, str):
                 value_cols = [value_cols]
-            series_df = df[value_cols]
+            series_df = df[value_cols]  # quite slow
 
         # get time index
         if time_col:
@@ -903,9 +903,9 @@ def from_narwhals_dataframe(
                 "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
                 logger,
             )
-            time_index = time_col_vals.to_list()
+            time_index = time_col_vals.to_list()  # slow
 
-        xa = xr.DataArray(
+        xa = xr.DataArray(  # really slow
             series_df.to_numpy()[:, :, np.newaxis],
             dims=(time_col if time_col else DIMS[0],) + DIMS[-2:],
             coords={
@@ -915,7 +915,7 @@ def from_narwhals_dataframe(
             attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
         )
 
-        return cls.from_xarray(
+        return cls.from_xarray(  # really slow
             xa=xa,
             fill_missing_dates=fill_missing_dates,
             freq=freq,
diff --git a/narwhals_test_time.py b/narwhals_test_time.py
index f699b8851d..c55f19c231 100644
--- a/narwhals_test_time.py
+++ b/narwhals_test_time.py
@@ -76,8 +76,8 @@ def create_random_dataframes(
 
 def test_dataframes() -> list:
     test_config = product(
-        [10, 100, 1000],
-        [10, 100, 500],
+        [10, 100, 1000, 10000],
+        [10, 100, 500, 1000],
         [True, False],
     )
 

From b2ffc674b3453e634814564784089d3928577228 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Mon, 10 Feb 2025 15:09:18 +0100
Subject: [PATCH 05/29] using pandas index to avoid .to_list()

---
 darts/timeseries.py   | 13 +++++++------
 narwhals_test_time.py | 17 +++++++++--------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 844f2129b7..cb8f8bc57b 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -849,6 +849,8 @@ def from_narwhals_dataframe(
         if time_col:
             if time_col not in df.columns:
                 raise_log(AttributeError(f"time_col='{time_col}' is not present."))
+
+            time_index = pd.Index([])
             time_col_vals = df[time_col]
 
             if time_col_vals.dtype == nw.String:
@@ -869,13 +871,13 @@ def from_narwhals_dataframe(
 
                 # Temporarily use an integer Index to sort the values, and replace by a
                 # RangeIndex in `TimeSeries.from_xarray()`
-                time_index = time_col_vals.to_list()
-                # time_index = pd.Index(time_col_vals)
+                time_index = pd.Index(time_col_vals)
 
             elif time_col_vals.dtype == nw.String:
                 # The integer conversion failed; try datetimes
                 try:
                     time_index = time_col_vals.str.to_datetime()
+                    time_index = pd.DatetimeIndex(time_index)
                 except Exception:
                     raise_log(
                         AttributeError(
@@ -883,8 +885,7 @@ def from_narwhals_dataframe(
                         )
                     )
             elif time_col_vals.dtype == nw.Datetime:
-                time_index = time_col_vals.to_list()
-                # time_index = pd.DatetimeIndex(time_col_vals)
+                time_index = pd.DatetimeIndex(time_col_vals)
             else:
                 raise_log(
                     AttributeError(
@@ -903,9 +904,9 @@ def from_narwhals_dataframe(
                 "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
                 logger,
             )
-            time_index = time_col_vals.to_list()  # slow
+            time_index = time_col_vals
 
-        xa = xr.DataArray(  # really slow
+        xa = xr.DataArray(
             series_df.to_numpy()[:, :, np.newaxis],
             dims=(time_col if time_col else DIMS[0],) + DIMS[-2:],
             coords={
diff --git a/narwhals_test_time.py b/narwhals_test_time.py
index c55f19c231..799fe4ef68 100644
--- a/narwhals_test_time.py
+++ b/narwhals_test_time.py
@@ -97,7 +97,7 @@ def test_dataframes() -> list:
 pandas_global_timer = 0
 narwhals_global_timer = 0
 
-for _ in range(num_iter):
+for iter in range(num_iter + 1):
     pandas_timer = 0
     narwhals_timer = 0
     for df_config in df_list:
@@ -140,17 +140,18 @@ def test_dataframes() -> list:
                     print(
                         f"Equal assertion failed for DataFrame with columns {col_names} and time_col {time_col}: {e}"
                     )
-
-    print("pandas processing time: ", pandas_timer)
-    print("narwhals processing time: ", narwhals_timer, "\n")
-    pandas_global_timer += pandas_timer
-    narwhals_global_timer += narwhals_timer
+    # throw first iteration away, memory initialization
+    if iter > 0:
+        print(f"pandas processing time: {pandas_timer:.4f}")
+        print(f"narwhals processing time: {narwhals_timer:.4f} \n")
+        pandas_global_timer += pandas_timer
+        narwhals_global_timer += narwhals_timer
 
 pandas_global_timer /= num_iter
 narwhals_global_timer /= num_iter
 
-print("Average pandas processing time: ", pandas_global_timer)
-print("Average narwhals processing time: ", narwhals_global_timer)
+print(f"Average pandas processing time: {pandas_global_timer:.4f}")
+print(f"Average narwhals processing time: {narwhals_global_timer:.4f} \n")
 
 diff_in_fraction = (-pandas_global_timer + narwhals_global_timer) / pandas_global_timer
 print(f"Average processing time difference: {diff_in_fraction:.2%}")

From 79312c91b68ef8546f57cab3639caa62600310a7 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Mon, 10 Feb 2025 17:43:58 +0100
Subject: [PATCH 06/29] bug fix added

---
 darts/timeseries.py | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index cb8f8bc57b..9ee0c63a42 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -836,8 +836,7 @@ def from_narwhals_dataframe(
         # get values
         if value_cols is None:
             if time_col is not None:
-                # series_df = df.drop(time_col)
-                series_df = df.loc[:, df.columns != time_col]
+                series_df = df.drop(time_col)
             else:
                 series_df = df
         else:
@@ -904,15 +903,31 @@ def from_narwhals_dataframe(
                 "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
                 logger,
             )
-            time_index = time_col_vals
+            # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
+            # pandas.DataFrame loses the tz information if it's not its index
+            if (
+                isinstance(time_col_vals, pd.DatetimeIndex)
+                and time_col_vals.tz is not None
+            ):
+                logger.warning(
+                    "The provided DatetimeIndex was associated with a timezone, which is currently not supported "
+                    "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
+                    f"`ts.time_index.tz_localize({time_col_vals.tz})` when exporting the results."
+                    "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
+                    "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
+                    "original timezone."
+                )
+                time_index = time_col_vals.tz_localize(None)
+            else:
+                time_index = time_col_vals
+
+        if not time_index.name:
+            time_index.name = time_col if time_col else DIMS[0]
 
         xa = xr.DataArray(
             series_df.to_numpy()[:, :, np.newaxis],
-            dims=(time_col if time_col else DIMS[0],) + DIMS[-2:],
-            coords={
-                time_col if time_col else DIMS[0]: time_index,
-                DIMS[1]: series_df.columns,
-            },
+            dims=(time_index.name,) + DIMS[-2:],
+            coords={time_index.name: time_index, DIMS[1]: series_df.columns},
             attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
         )
 

From b08a74ffa360e028417c8fc73a3e16dcb34026fd Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Tue, 11 Feb 2025 15:52:20 +0100
Subject: [PATCH 07/29] updated test script

---
 narwhals_test_time.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/narwhals_test_time.py b/narwhals_test_time.py
index 799fe4ef68..80fd76323b 100644
--- a/narwhals_test_time.py
+++ b/narwhals_test_time.py
@@ -15,6 +15,7 @@ def create_random_dataframes(
     num_rows: int = 10,
     num_columns: int = 3,
     index: bool = True,
+    col_names_given: bool = True,
     start_date: str = "2023-01-01",
     freq: str = "D",
 ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
@@ -50,7 +51,10 @@ def create_random_dataframes(
     df_numpy = pd.DataFrame(data)
     df_integer = pd.DataFrame(data)
 
-    col_names = df_date.columns.values
+    if col_names_given:
+        col_names = df_date.columns.values
+    else:
+        col_names = None
 
     # Set the date as index or as a column based on the index parameter
     if index:
@@ -79,13 +83,17 @@ def test_dataframes() -> list:
         [10, 100, 1000, 10000],
         [10, 100, 500, 1000],
         [True, False],
+        [True, False],
     )
 
     dataframes_list = [
         create_random_dataframes(
-            num_rows=num_rows, num_columns=num_columns, index=index
+            num_rows=num_rows,
+            num_columns=num_columns,
+            index=index,
+            col_names_given=col_names_given,
         )
-        for num_rows, num_columns, index in test_config
+        for num_rows, num_columns, index, col_names_given in test_config
     ]
 
     return dataframes_list

From 2425fbe51cdbf80b7c482a76f1a33e17db7b670a Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Wed, 12 Feb 2025 11:56:42 +0100
Subject: [PATCH 08/29] narwhals timeseries added

---
 darts/timeseries.py | 60 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 9ee0c63a42..9732076404 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -50,7 +50,7 @@
 import numpy as np
 import pandas as pd
 import xarray as xr
-from narwhals.typing import DataFrameT
+from narwhals.typing import IntoDataFrame, IntoSeries
 from pandas.tseries.frequencies import to_offset
 from scipy.stats import kurtosis, skew
 
@@ -751,7 +751,7 @@ def from_dataframe(
     @classmethod
     def from_narwhals_dataframe(
         cls,
-        df: DataFrameT,
+        df: IntoDataFrame,
         time_col: Optional[str] = None,
         value_cols: Optional[Union[list[str], str]] = None,
         fill_missing_dates: Optional[bool] = False,
@@ -1204,6 +1204,62 @@ def from_series(
             static_covariates=static_covariates,
         )
 
+    @classmethod
+    def from_narwhals_series(
+        cls,
+        pd_series: IntoSeries,
+        fill_missing_dates: Optional[bool] = False,
+        freq: Optional[Union[str, int]] = None,
+        fillna_value: Optional[float] = None,
+        static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
+    ) -> Self:
+        """
+        Build a univariate deterministic series from a pandas Series.
+
+        The series must contain an index that is either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index
+        that can be converted into a RangeIndex. It is better if the index has no holes; alternatively setting
+        `fill_missing_dates` can in some cases solve these issues (filling holes with NaN, or with the provided
+        `fillna_value` numeric value, if any).
+
+        Parameters
+        ----------
+        pd_series
+            The pandas Series instance.
+        fill_missing_dates
+            Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
+            with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the
+            provided timestamps. See :meth:`_fill_missing_dates() <TimeSeries._fill_missing_dates>` for more info.
+        freq
+            Optionally, a string or integer representing the frequency of the underlying index. This is useful in order
+            to fill in missing values if some dates are missing and `fill_missing_dates` is set to `True`.
+            If a string, represents the frequency of the pandas DatetimeIndex (see `offset aliases
+            <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for more info on
+            supported frequencies).
+            If an integer, represents the step size of the pandas Index or pandas RangeIndex.
+        fillna_value
+            Optionally, a numeric value to fill missing values (NaNs) with.
+        static_covariates
+            Optionally, a set of static covariates to be added to the TimeSeries. Either a pandas Series or a
+            single-row pandas DataFrame. If a Series, the index represents the static variables. If a DataFrame, the
+            columns represent the static variables and the single row represents the univariate TimeSeries component.
+
+        Returns
+        -------
+        TimeSeries
+            A univariate and deterministic TimeSeries constructed from the inputs.
+        """
+        nw_series = nw.from_native(pd_series, allow_series=True)
+        df = nw_series.to_frame()
+        return cls.from_dataframe(
+            df,
+            time_col=None,
+            value_cols=None,
+            fill_missing_dates=fill_missing_dates,
+            freq=freq,
+            fillna_value=fillna_value,
+            static_covariates=static_covariates,
+        )
+
     @classmethod
     def from_times_and_values(
         cls,

From 36300f2849c8460e763a606f7cd4024b57fe6fd5 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 14 Feb 2025 13:14:27 +0100
Subject: [PATCH 09/29] from_series changed, names changed

---
 darts/timeseries.py | 239 +-------------------------------------------
 1 file changed, 2 insertions(+), 237 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 9732076404..877d849a9d 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -570,186 +570,6 @@ def from_csv(
 
     @classmethod
     def from_dataframe(
-        cls,
-        df: pd.DataFrame,
-        time_col: Optional[str] = None,
-        value_cols: Optional[Union[list[str], str]] = None,
-        fill_missing_dates: Optional[bool] = False,
-        freq: Optional[Union[str, int]] = None,
-        fillna_value: Optional[float] = None,
-        static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
-        hierarchy: Optional[dict] = None,
-    ) -> Self:
-        """
-        Build a deterministic TimeSeries instance built from a selection of columns of a DataFrame.
-        One column (or the DataFrame index) has to represent the time,
-        and a list of columns `value_cols` has to represent the values for this time series.
-
-        Parameters
-        ----------
-        df
-            The DataFrame
-        time_col
-            The time column name. If set, the column will be cast to a pandas DatetimeIndex (if it contains
-            timestamps) or a RangeIndex (if it contains integers).
-            If not set, the DataFrame index will be used. In this case the DataFrame must contain an index that is
-            either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index that can be converted to a
-            RangeIndex. It is better if the index has no holes; alternatively setting `fill_missing_dates` can in some
-            cases solve these issues (filling holes with NaN, or with the provided `fillna_value` numeric value, if
-            any).
-        value_cols
-            A string or list of strings representing the value column(s) to be extracted from the DataFrame. If set to
-            `None`, the whole DataFrame will be used.
-        fill_missing_dates
-            Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
-            with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the
-            provided timestamps. See :meth:`_fill_missing_dates() <TimeSeries._fill_missing_dates>` for more info.
-        freq
-            Optionally, a string or integer representing the frequency of the underlying index. This is useful in order
-            to fill in missing values if some dates are missing and `fill_missing_dates` is set to `True`.
-            If a string, represents the frequency of the pandas DatetimeIndex (see `offset aliases
-            <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for more info on
-            supported frequencies).
-            If an integer, represents the step size of the pandas Index or pandas RangeIndex.
-        fillna_value
-            Optionally, a numeric value to fill missing values (NaNs) with.
-        static_covariates
-            Optionally, a set of static covariates to be added to the TimeSeries. Either a pandas Series or a pandas
-            DataFrame. If a Series, the index represents the static variables. The covariates are globally 'applied'
-            to all components of the TimeSeries. If a DataFrame, the columns represent the static variables and the
-            rows represent the components of the uni/multivariate TimeSeries. If a single-row DataFrame, the covariates
-            are globally 'applied' to all components of the TimeSeries. If a multi-row DataFrame, the number of
-            rows must match the number of components of the TimeSeries (in this case, the number of columns in
-            ``value_cols``). This adds control for component-specific static covariates.
-        hierarchy
-            Optionally, a dictionary describing the grouping(s) of the time series. The keys are component names, and
-            for a given component name `c`, the value is a list of component names that `c` "belongs" to. For instance,
-            if there is a `total` component, split both in two divisions `d1` and `d2` and in two regions `r1` and `r2`,
-            and four products `d1r1` (in division `d1` and region `r1`), `d2r1`, `d1r2` and `d2r2`, the hierarchy would
-            be encoded as follows.
-
-            .. highlight:: python
-            .. code-block:: python
-
-                hierarchy={
-                    "d1r1": ["d1", "r1"],
-                    "d1r2": ["d1", "r2"],
-                    "d2r1": ["d2", "r1"],
-                    "d2r2": ["d2", "r2"],
-                    "d1": ["total"],
-                    "d2": ["total"],
-                    "r1": ["total"],
-                    "r2": ["total"]
-                }
-            ..
-            The hierarchy can be used to reconcile forecasts (so that the sums of the forecasts at
-            different levels are consistent), see `hierarchical reconciliation
-            <https://unit8co.github.io/darts/generated_api/darts.dataprocessing.transformers.reconciliation.html>`_.
-
-        Returns
-        -------
-        TimeSeries
-            A univariate or multivariate deterministic TimeSeries constructed from the inputs.
-        """
-
-        # get values
-        if value_cols is None:
-            series_df = df.loc[:, df.columns != time_col]
-        else:
-            if isinstance(value_cols, str):
-                value_cols = [value_cols]
-            series_df = df[value_cols]  # slow
-
-        # get time index
-        if time_col:
-            if time_col not in df.columns:
-                raise_log(AttributeError(f"time_col='{time_col}' is not present."))
-
-            time_index = pd.Index([])
-            time_col_vals = df[time_col]
-
-            if np.issubdtype(time_col_vals.dtype, object):
-                # Try to convert to integers if needed
-                try:
-                    time_col_vals = time_col_vals.astype(int)
-                except ValueError:
-                    pass
-
-            if np.issubdtype(time_col_vals.dtype, np.integer):
-                # We have to check all integers appear only once to have a valid index
-                raise_if(
-                    time_col_vals.duplicated().any(),
-                    "The provided integer time index column contains duplicate values.",
-                )
-
-                # Temporarily use an integer Index to sort the values, and replace by a
-                # RangeIndex in `TimeSeries.from_xarray()`
-                time_index = pd.Index(time_col_vals)
-
-            elif np.issubdtype(time_col_vals.dtype, object):
-                # The integer conversion failed; try datetimes
-                try:
-                    time_index = pd.DatetimeIndex(time_col_vals)
-                except ValueError:
-                    raise_log(
-                        AttributeError(
-                            "'time_col' is of 'object' dtype but doesn't contain valid timestamps"
-                        )
-                    )
-            elif np.issubdtype(time_col_vals.dtype, np.datetime64):
-                time_index = pd.DatetimeIndex(time_col_vals)
-            else:
-                raise_log(
-                    AttributeError(
-                        "Invalid type of `time_col`: it needs to be of either 'str', 'datetime' or 'int' dtype."
-                    )
-                )
-            time_index.name = time_col
-        else:
-            raise_if_not(
-                isinstance(df.index, VALID_INDEX_TYPES)
-                or np.issubdtype(df.index.dtype, np.integer),
-                "If time_col is not specified, the DataFrame must be indexed either with "
-                "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
-                logger,
-            )
-            # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
-            # pandas.DataFrame loses the tz information if it's not its index
-            if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None:
-                logger.warning(
-                    "The provided DatetimeIndex was associated with a timezone, which is currently not supported "
-                    "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
-                    f"`ts.time_index.tz_localize({df.index.tz})` when exporting the results."
-                    "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
-                    "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
-                    "original timezone."
-                )
-                time_index = df.index.tz_localize(None)
-            else:
-                time_index = df.index
-
-        if not time_index.name:
-            time_index.name = time_col if time_col else DIMS[0]
-
-        if series_df.columns.name:
-            series_df.columns.name = None
-
-        xa = xr.DataArray(  # fast
-            series_df.values[:, :, np.newaxis],
-            dims=(time_index.name,) + DIMS[-2:],
-            coords={time_index.name: time_index, DIMS[1]: series_df.columns},
-            attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
-        )
-
-        return cls.from_xarray(  # slow
-            xa=xa,
-            fill_missing_dates=fill_missing_dates,
-            freq=freq,
-            fillna_value=fillna_value,
-        )
-
-    @classmethod
-    def from_narwhals_dataframe(
         cls,
         df: IntoDataFrame,
         time_col: Optional[str] = None,
@@ -1151,61 +971,6 @@ def from_group(static_cov_vals, group):
 
     @classmethod
     def from_series(
-        cls,
-        pd_series: pd.Series,
-        fill_missing_dates: Optional[bool] = False,
-        freq: Optional[Union[str, int]] = None,
-        fillna_value: Optional[float] = None,
-        static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
-    ) -> Self:
-        """
-        Build a univariate deterministic series from a pandas Series.
-
-        The series must contain an index that is either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index
-        that can be converted into a RangeIndex. It is better if the index has no holes; alternatively setting
-        `fill_missing_dates` can in some cases solve these issues (filling holes with NaN, or with the provided
-        `fillna_value` numeric value, if any).
-
-        Parameters
-        ----------
-        pd_series
-            The pandas Series instance.
-        fill_missing_dates
-            Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
-            with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the
-            provided timestamps. See :meth:`_fill_missing_dates() <TimeSeries._fill_missing_dates>` for more info.
-        freq
-            Optionally, a string or integer representing the frequency of the underlying index. This is useful in order
-            to fill in missing values if some dates are missing and `fill_missing_dates` is set to `True`.
-            If a string, represents the frequency of the pandas DatetimeIndex (see `offset aliases
-            <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ for more info on
-            supported frequencies).
-            If an integer, represents the step size of the pandas Index or pandas RangeIndex.
-        fillna_value
-            Optionally, a numeric value to fill missing values (NaNs) with.
-        static_covariates
-            Optionally, a set of static covariates to be added to the TimeSeries. Either a pandas Series or a
-            single-row pandas DataFrame. If a Series, the index represents the static variables. If a DataFrame, the
-            columns represent the static variables and the single row represents the univariate TimeSeries component.
-
-        Returns
-        -------
-        TimeSeries
-            A univariate and deterministic TimeSeries constructed from the inputs.
-        """
-        df = pd.DataFrame(pd_series)
-        return cls.from_dataframe(
-            df,
-            time_col=None,
-            value_cols=None,
-            fill_missing_dates=fill_missing_dates,
-            freq=freq,
-            fillna_value=fillna_value,
-            static_covariates=static_covariates,
-        )
-
-    @classmethod
-    def from_narwhals_series(
         cls,
         pd_series: IntoSeries,
         fill_missing_dates: Optional[bool] = False,
@@ -1224,7 +989,7 @@ def from_narwhals_series(
         Parameters
         ----------
         pd_series
-            The pandas Series instance.
+            A Series instance.
         fill_missing_dates
             Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
             with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the
@@ -1248,7 +1013,7 @@ def from_narwhals_series(
         TimeSeries
             A univariate and deterministic TimeSeries constructed from the inputs.
         """
-        nw_series = nw.from_native(pd_series, allow_series=True)
+        nw_series = nw.from_native(pd_series, series_only=True)
         df = nw_series.to_frame()
         return cls.from_dataframe(
             df,

From ba01df1245090b60762f6afa1435a83d27f9ac75 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 14 Feb 2025 13:22:27 +0100
Subject: [PATCH 10/29] changelog updated

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d7df8c0969..b69b2c2a33 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 **Improved**
 
+- Implemented the `from_dataframe()` and `from_series()` methods with [Narwhals](https://github.com/narwhals-dev/narwhals), a compatibility layer between dataframe librairies. From now on, Darts can transform pandas, polars, arrows and my other dataframes into `TimeSeries`. [#2661](https://github.com/unit8co/darts/pull/2661) by [Jules Authier](https://github.com/authierj)
 - New model: `StatsForecastAutoTBATS`. This model offers the [AutoTBATS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotbats) model from Nixtla's `statsforecasts` library. [#2611](https://github.com/unit8co/darts/pull/2611) by [He Weilin](https://github.com/cnhwl).
 - Added the `title` attribute to `TimeSeries.plot()`. This allows to set a title for the plot. [#2639](https://github.com/unit8co/darts/pull/2639) by [Jonathan Koch](https://github.com/jonathankoch99).
 - Added parameter `component_wise` to `show_anomalies()` to separately plot each component in multivariate series. [#2544](https://github.com/unit8co/darts/pull/2544) by [He Weilin](https://github.com/cnhwl).

From 2e39269a1713d75b5ac36643bd12b24700f85baa Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Mon, 17 Feb 2025 15:40:19 +0100
Subject: [PATCH 11/29] small improvement

---
 darts/timeseries.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 877d849a9d..b771b22a5b 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -695,8 +695,8 @@ def from_dataframe(
             elif time_col_vals.dtype == nw.String:
                 # The integer conversion failed; try datetimes
                 try:
-                    time_index = time_col_vals.str.to_datetime()
-                    time_index = pd.DatetimeIndex(time_index)
+                    # time_index = time_col_vals.str.to_datetime()
+                    time_index = pd.DatetimeIndex(time_col_vals)
                 except Exception:
                     raise_log(
                         AttributeError(

From 1a9a266247187b3c09c1d21c9bb22d993f6d5a0d Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Mon, 17 Feb 2025 17:01:18 +0100
Subject: [PATCH 12/29] clean test scripts added

---
 from_df_timing.py     | 174 +++++++++++++++++++++++++++++++++++++++++
 from_df_timing_col.py | 176 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 350 insertions(+)
 create mode 100644 from_df_timing.py
 create mode 100644 from_df_timing_col.py

diff --git a/from_df_timing.py b/from_df_timing.py
new file mode 100644
index 0000000000..823e018105
--- /dev/null
+++ b/from_df_timing.py
@@ -0,0 +1,174 @@
+import argparse
+import json
+import time
+import warnings
+from itertools import product
+
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+from darts.timeseries import TimeSeries
+
+# Suppress all warnings
+warnings.filterwarnings("ignore")
+
+
+def test_from_dataframe(f_name: str):
+    return getattr(TimeSeries, f_name)
+
+
+def create_random_dataframes(
+    num_rows: int = 10,
+    num_columns: int = 3,
+    index: bool = True,
+    col_names_given: bool = True,
+    start_date: str = "1900-01-01",
+    freq: str = "D",
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Create three pandas DataFrames with random data and dates as the index or as a column.
+
+    Parameters:
+    - num_rows (int): The number of rows in the DataFrames.
+    - num_columns (int): The number of columns in the DataFrames.
+    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
+    - start_date (str): The start date for the date range (used only if date_format is 'date').
+    - freq (str): The frequency of the date range (used only if date_format is 'date').
+
+    Returns:
+    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
+    """
+    # Set a random seed for reproducibility
+    np.random.seed(42)
+
+    # Generate a date range or integer list based on the date_format parameter
+    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
+    integer_values = list(range(1, num_rows + 1))
+    numpy_values = np.array(
+        pd.date_range(start=start_date, periods=num_rows, freq=freq),
+        dtype="datetime64[D]",
+    )
+
+    # Create random data for the DataFrames
+    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
+
+    # Create the DataFrames
+    df_date = pd.DataFrame(data)
+    df_numpy = pd.DataFrame(data)
+    df_integer = pd.DataFrame(data)
+
+    if col_names_given:
+        col_names = df_date.columns.values
+    else:
+        col_names = None
+
+    # Set the date as index or as a column based on the index parameter
+    if index:
+        df_date.index = date_values
+        df_numpy.index = numpy_values
+        df_integer.index = integer_values
+    else:
+        df_date["date"] = date_values
+        df_numpy["date"] = numpy_values
+        df_integer["date"] = integer_values
+
+    if index:
+        time_col = None
+    else:
+        time_col = "date"
+
+    return [
+        [df_date, col_names, time_col],
+        [df_numpy, col_names, time_col],
+        [df_integer, col_names, time_col],
+    ]
+
+
+def test_dataframes() -> list:
+    test_config = product(
+        [10, 100, 1000, 10000, 100000],
+        [100],
+        [True, False],
+        [True, False],
+    )
+
+    dataframes_list = [
+        create_random_dataframes(
+            num_rows=num_rows,
+            num_columns=num_columns,
+            index=index,
+            col_names_given=col_names_given,
+        )
+        for num_rows, num_columns, index, col_names_given in test_config
+    ]
+
+    return dataframes_list
+
+
+def calculate_processing_time(
+    f_name: str,
+    num_iter: int,
+    save_path="/Users/julesauthier/Documents/darts/from_df_times/data/",
+):
+    df_list = test_dataframes()
+    df_func = test_from_dataframe(f_name)
+
+    # Initialize dictionaries to store processing times
+    times = {}
+
+    # Initialize the progress bar
+    total_iterations = (
+        len(df_list) * 2 * 3
+    )  # 2 iterations per dataframe configuration, 3 df per config
+    progress_bar = tqdm(total=total_iterations, desc="Processing DataFrames")
+
+    for df_config in df_list:
+        for df, col_names, time_col in df_config:
+            num_rows = len(df)
+            dict_entry = str(num_rows)
+
+            for i in range(2):
+                # on the second run we shuffle the data
+                if i == 1:
+                    df = df.sample(frac=1)
+                    dict_entry += "_shuffled"
+
+                begin = time.time()
+                for _ in range(num_iter):
+                    _ = df_func(df, value_cols=col_names, time_col=time_col, freq=None)
+                end = time.time()
+                timer = (end - begin) / num_iter
+
+                if dict_entry not in times:
+                    times[dict_entry] = timer
+                else:
+                    times[dict_entry] += timer
+
+                # Update the progress bar
+                progress_bar.update(1)
+
+    file_name = f_name + "_avg_time_" + str(num_iter) + "_iter.json"
+
+    # Store the average times in separate JSON files
+    with open(save_path + file_name, "w") as f:
+        json.dump(times, f, indent=4)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="The function to test and the number of iter can "
+    )
+    parser.add_argument(
+        "--f_name", type=str, default="from_dataframe", help="method to time"
+    )
+    parser.add_argument(
+        "--n_iter", type=int, default=100, help="number of function call"
+    )
+
+    args = parser.parse_args()
+
+    f_name = args.f_name
+    n_iter = args.n_iter
+
+    calculate_processing_time(f_name, n_iter)
diff --git a/from_df_timing_col.py b/from_df_timing_col.py
new file mode 100644
index 0000000000..4487654ea4
--- /dev/null
+++ b/from_df_timing_col.py
@@ -0,0 +1,176 @@
+import argparse
+import json
+import time
+import warnings
+from itertools import product
+
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+
+from darts.timeseries import TimeSeries
+
+# Suppress all warnings
+warnings.filterwarnings("ignore")
+
+
+def test_from_dataframe(f_name: str):
+    return getattr(TimeSeries, f_name)
+
+
+def create_random_dataframes(
+    num_rows: int = 10,
+    num_columns: int = 3,
+    index: bool = True,
+    col_names_given: bool = True,
+    start_date: str = "1900-01-01",
+    freq: str = "D",
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    """
+    Create three pandas DataFrames with random data and dates as the index or as a column.
+
+    Parameters:
+    - num_rows (int): The number of rows in the DataFrames.
+    - num_columns (int): The number of columns in the DataFrames.
+    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
+    - start_date (str): The start date for the date range (used only if date_format is 'date').
+    - freq (str): The frequency of the date range (used only if date_format is 'date').
+
+    Returns:
+    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
+    """
+    # Set a random seed for reproducibility
+    np.random.seed(42)
+
+    # Generate a date range or integer list based on the date_format parameter
+    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
+    integer_values = list(range(1, num_rows + 1))
+    numpy_values = np.array(
+        pd.date_range(start=start_date, periods=num_rows, freq=freq),
+        dtype="datetime64[D]",
+    )
+
+    # Create random data for the DataFrames
+    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
+
+    # Create the DataFrames
+    df_date = pd.DataFrame(data)
+    df_numpy = pd.DataFrame(data)
+    df_integer = pd.DataFrame(data)
+
+    if col_names_given:
+        col_names = df_date.columns.values
+    else:
+        col_names = None
+
+    # Set the date as index or as a column based on the index parameter
+    if index:
+        df_date.index = date_values
+        df_numpy.index = numpy_values
+        df_integer.index = integer_values
+    else:
+        df_date["date"] = date_values
+        df_numpy["date"] = numpy_values
+        df_integer["date"] = integer_values
+
+    if index:
+        time_col = None
+    else:
+        time_col = "date"
+
+    return [
+        [df_date, col_names, time_col],
+        [df_numpy, col_names, time_col],
+        [df_integer, col_names, time_col],
+    ]
+
+
+def test_dataframes() -> list:
+    test_config = product(
+        [1000],
+        [1, 10, 100, 1000],
+        [True, False],
+        [True, False],
+    )
+
+    dataframes_list = [
+        create_random_dataframes(
+            num_rows=num_rows,
+            num_columns=num_columns,
+            index=index,
+            col_names_given=col_names_given,
+        )
+        for num_rows, num_columns, index, col_names_given in test_config
+    ]
+
+    return dataframes_list
+
+
+def calculate_processing_time(
+    f_name: str,
+    num_iter: int,
+    save_path="/Users/julesauthier/Documents/darts/from_df_times/data/",
+):
+    df_list = test_dataframes()
+    df_func = test_from_dataframe(f_name)
+
+    # Initialize dictionaries to store processing times
+    times = {}
+
+    # Initialize the progress bar
+    total_iterations = (
+        len(df_list) * 2 * 3
+    )  # 2 iterations per dataframe configuration, 3 df per config
+    progress_bar = tqdm(total=total_iterations, desc="Processing DataFrames")
+
+    for df_config in df_list:
+        for df, col_names, time_col in df_config:
+            num_cols = df.shape[1]
+            if num_cols > 1 and (num_cols % 2 == 1 or num_cols == 2):
+                num_cols -= 1
+            dict_entry = str(num_cols)
+
+            for i in range(2):
+                # on the second run we shuffle the data
+                if i == 1:
+                    df = df.sample(frac=1)
+                    dict_entry += "_shuffled"
+
+                begin = time.time()
+                for _ in range(num_iter):
+                    _ = df_func(df, value_cols=col_names, time_col=time_col, freq=None)
+                end = time.time()
+                timer = (end - begin) / num_iter
+
+                if dict_entry not in times:
+                    times[dict_entry] = timer
+                else:
+                    times[dict_entry] += timer
+
+                # Update the progress bar
+                progress_bar.update(1)
+
+    file_name = f_name + "_avg_time_cols_" + str(num_iter) + "_iter.json"
+
+    # Store the average times in separate JSON files
+    with open(save_path + file_name, "w") as f:
+        json.dump(times, f, indent=4)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="The function to test and the number of iter can "
+    )
+    parser.add_argument(
+        "--f_name", type=str, default="from_dataframe", help="method to time"
+    )
+    parser.add_argument(
+        "--n_iter", type=int, default=100, help="number of function call"
+    )
+
+    args = parser.parse_args()
+
+    f_name = args.f_name
+    n_iter = args.n_iter
+
+    calculate_processing_time(f_name, n_iter)

From 2c24a395ed69ea0e52803a3a4e5a5395c406dd6f Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Wed, 19 Feb 2025 10:47:38 +0100
Subject: [PATCH 13/29] BUGFIX added for non_pandas df

---
 darts/timeseries.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index b771b22a5b..3326c41ea1 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -655,10 +655,7 @@ def from_dataframe(
 
         # get values
         if value_cols is None:
-            if time_col is not None:
-                series_df = df.drop(time_col)
-            else:
-                series_df = df
+            series_df = df.drop(time_col) if time_col else df
         else:
             if isinstance(value_cols, str):
                 value_cols = [value_cols]
@@ -669,7 +666,6 @@ def from_dataframe(
             if time_col not in df.columns:
                 raise_log(AttributeError(f"time_col='{time_col}' is not present."))
 
-            time_index = pd.Index([])
             time_col_vals = df[time_col]
 
             if time_col_vals.dtype == nw.String:
@@ -680,14 +676,12 @@ def from_dataframe(
                     pass
 
             if time_col_vals.dtype.is_integer():
-                # We have to check all integers appear only once to have a valid index
                 if time_col_vals.is_duplicated().any():
                     raise_log(
                         ValueError(
                             "The provided integer time index column contains duplicate values."
                         )
                     )
-
                 # Temporarily use an integer Index to sort the values, and replace by a
                 # RangeIndex in `TimeSeries.from_xarray()`
                 time_index = pd.Index(time_col_vals)
@@ -695,7 +689,6 @@ def from_dataframe(
             elif time_col_vals.dtype == nw.String:
                 # The integer conversion failed; try datetimes
                 try:
-                    # time_index = time_col_vals.str.to_datetime()
                     time_index = pd.DatetimeIndex(time_col_vals)
                 except Exception:
                     raise_log(
@@ -704,6 +697,18 @@ def from_dataframe(
                         )
                     )
             elif time_col_vals.dtype == nw.Datetime:
+                # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
+                # pandas.DataFrame loses the tz information if it's not its index
+                if time_col_vals.dtype.time_zone is not None:
+                    logger.warning(
+                        "The provided Datetime data was associated with a timezone, which is currently not supported "
+                        "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
+                        f"`ts.time_index.tz_localize({time_col_vals.dtype.time_zone})` when exporting the results."
+                        "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
+                        "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
+                        "original timezone."
+                    )
+                    time_col_vals = time_col_vals.dt.replace_time_zone(None)
                 time_index = pd.DatetimeIndex(time_col_vals)
             else:
                 raise_log(

From 89f23fb68f3dd959914a4d00e18961344dd5aa79 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Wed, 19 Feb 2025 10:48:01 +0100
Subject: [PATCH 14/29] tests added for polars df

---
 darts/tests/test_timeseries.py | 123 ++++++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 32 deletions(-)

diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py
index 41b04aebd4..d8f1505751 100644
--- a/darts/tests/test_timeseries.py
+++ b/darts/tests/test_timeseries.py
@@ -5,6 +5,7 @@
 
 import numpy as np
 import pandas as pd
+import polars as pl
 import pytest
 import xarray as xr
 from scipy.stats import kurtosis, skew
@@ -2506,7 +2507,16 @@ def test_tail_numeric_time_index(self):
 
 
 class TestTimeSeriesFromDataFrame:
-    def test_from_dataframe_sunny_day(self):
+    def pd_to_backend(self, df, backend, index=False):
+        if backend == "pandas":
+            return df
+        elif backend == "polars":
+            if index:
+                return pl.from_pandas(df.reset_index())
+            return pl.from_pandas(df)
+
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_from_dataframe_sunny_day(self, backend):
         data_dict = {"Time": pd.date_range(start="20180501", end="20200301", freq="MS")}
         data_dict["Values1"] = np.random.uniform(
             low=-10, high=10, size=len(data_dict["Time"])
@@ -2520,40 +2530,55 @@ def test_from_dataframe_sunny_day(self):
         data_pd2["Time"] = data_pd2["Time"].apply(lambda date: str(date))
         data_pd3 = data_pd1.set_index("Time")
 
-        data_darts1 = TimeSeries.from_dataframe(df=data_pd1, time_col="Time")
-        data_darts2 = TimeSeries.from_dataframe(df=data_pd2, time_col="Time")
-        data_darts3 = TimeSeries.from_dataframe(df=data_pd3)
+        data_darts1 = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(data_pd1, backend), time_col="Time"
+        )
+        data_darts2 = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(data_pd2, backend), time_col="Time"
+        )
+        data_darts3 = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(data_pd3, backend, index=True),
+            time_col=None if backend == "pandas" else "Time",
+        )
 
         assert data_darts1 == data_darts2
         assert data_darts1 == data_darts3
 
-    def test_time_col_convert_string_integers(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_string_integers(self, backend):
         expected = np.array(list(range(3, 10)))
         data_dict = {"Time": expected.astype(str)}
         data_dict["Values1"] = np.random.uniform(
             low=-10, high=10, size=len(data_dict["Time"])
         )
         df = pd.DataFrame(data_dict)
-        ts = TimeSeries.from_dataframe(df=df, time_col="Time")
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend), time_col="Time"
+        )
 
         assert set(ts.time_index.values.tolist()) == set(expected)
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    def test_time_col_convert_integers(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_integers(self, backend):
         expected = np.array(list(range(10)))
         data_dict = {"Time": expected}
         data_dict["Values1"] = np.random.uniform(
             low=-10, high=10, size=len(data_dict["Time"])
         )
+
         df = pd.DataFrame(data_dict)
-        ts = TimeSeries.from_dataframe(df=df, time_col="Time")
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend), time_col="Time"
+        )
 
         assert set(ts.time_index.values.tolist()) == set(expected)
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    def test_fail_with_bad_integer_time_col(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_fail_with_bad_integer_time_col(self, backend):
         bad_time_col_vals = np.array([4, 0, 1, 2])
         data_dict = {"Time": bad_time_col_vals}
         data_dict["Values1"] = np.random.uniform(
@@ -2561,9 +2586,12 @@ def test_fail_with_bad_integer_time_col(self):
         )
         df = pd.DataFrame(data_dict)
         with pytest.raises(ValueError):
-            TimeSeries.from_dataframe(df=df, time_col="Time")
+            TimeSeries.from_dataframe(
+                df=self.pd_to_backend(df, backend), time_col="Time"
+            )
 
-    def test_time_col_convert_rangeindex(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_rangeindex(self, backend):
         for expected_l, step in zip([[4, 0, 2, 3, 1], [8, 0, 4, 6, 2]], [1, 2]):
             expected = np.array(expected_l)
             data_dict = {"Time": expected}
@@ -2571,7 +2599,9 @@ def test_time_col_convert_rangeindex(self):
                 low=-10, high=10, size=len(data_dict["Time"])
             )
             df = pd.DataFrame(data_dict)
-            ts = TimeSeries.from_dataframe(df=df, time_col="Time")
+            ts = TimeSeries.from_dataframe(
+                df=self.pd_to_backend(df, backend), time_col="Time"
+            )
 
             # check type (should convert to RangeIndex):
             assert type(ts.time_index) is pd.RangeIndex
@@ -2586,31 +2616,38 @@ def test_time_col_convert_rangeindex(self):
             ]
             assert np.all(ar1 == ar2)
 
-    def test_time_col_convert_datetime(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_datetime(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected}
         data_dict["Values1"] = np.random.uniform(
             low=-10, high=10, size=len(data_dict["Time"])
         )
         df = pd.DataFrame(data_dict)
-        ts = TimeSeries.from_dataframe(df=df, time_col="Time")
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend), time_col="Time"
+        )
 
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    def test_time_col_convert_datetime_strings(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_datetime_strings(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected.values.astype(str)}
         data_dict["Values1"] = np.random.uniform(
             low=-10, high=10, size=len(data_dict["Time"])
         )
         df = pd.DataFrame(data_dict)
-        ts = TimeSeries.from_dataframe(df=df, time_col="Time")
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend), time_col="Time"
+        )
 
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    def test_time_col_with_tz(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_with_tz_df(self, backend):
         # numpy and xarray don't support "timezone aware" pd.DatetimeIndex
         # the BUGFIX removes timezone information without conversion
 
@@ -2621,13 +2658,10 @@ def test_time_col_with_tz(self):
         # pd.DataFrame loses the tz information unless it is contained in its index
         # (other columns are silently converted to UTC, with tz attribute set to None)
         df = pd.DataFrame(data=values, index=time_range_MS)
-        ts = TimeSeries.from_dataframe(df=df)
-        assert list(ts.time_index) == list(time_range_MS.tz_localize(None))
-        assert list(ts.time_index.tz_localize("CET")) == list(time_range_MS)
-        assert ts.time_index.tz is None
-
-        serie = pd.Series(data=values, index=time_range_MS)
-        ts = TimeSeries.from_series(pd_series=serie)
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend, index=True),
+            time_col=None if backend == "pandas" else "index",
+        )
         assert list(ts.time_index) == list(time_range_MS.tz_localize(None))
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_MS)
         assert ts.time_index.tz is None
@@ -2643,23 +2677,42 @@ def test_time_col_with_tz(self):
         values = np.random.uniform(low=-10, high=10, size=len(time_range_H))
 
         df = pd.DataFrame(data=values, index=time_range_H)
-        ts = TimeSeries.from_dataframe(df=df)
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend, index=True),
+            time_col=None if backend == "pandas" else "index",
+        )
         assert list(ts.time_index) == list(time_range_H.tz_localize(None))
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_H)
         assert ts.time_index.tz is None
 
-        series = pd.Series(data=values, index=time_range_H)
-        ts = TimeSeries.from_series(pd_series=series)
+        ts = TimeSeries.from_times_and_values(times=time_range_H, values=values)
         assert list(ts.time_index) == list(time_range_H.tz_localize(None))
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_H)
         assert ts.time_index.tz is None
 
-        ts = TimeSeries.from_times_and_values(times=time_range_H, values=values)
+    def test_time_col_with_tz_series(self):
+        time_range_MS = pd.date_range(
+            start="20180501", end="20200301", freq="MS", tz="CET"
+        )
+        values = np.random.uniform(low=-10, high=10, size=len(time_range_MS))
+        serie = pd.Series(data=values, index=time_range_MS)
+        ts = TimeSeries.from_series(pd_series=serie)
+        assert list(ts.time_index) == list(time_range_MS.tz_localize(None))
+        assert list(ts.time_index.tz_localize("CET")) == list(time_range_MS)
+        assert ts.time_index.tz is None
+
+        time_range_H = pd.date_range(
+            start="20200518", end="20200521", freq=freqs["h"], tz="CET"
+        )
+        values = np.random.uniform(low=-10, high=10, size=len(time_range_H))
+        series = pd.Series(data=values, index=time_range_H)
+        ts = TimeSeries.from_series(pd_series=series)
         assert list(ts.time_index) == list(time_range_H.tz_localize(None))
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_H)
         assert ts.time_index.tz is None
 
-    def test_time_col_convert_garbage(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_time_col_convert_garbage(self, backend):
         expected = [
             "2312312asdfdw",
             "asdfsdf432sdf",
@@ -2674,9 +2727,12 @@ def test_time_col_convert_garbage(self):
         df = pd.DataFrame(data_dict)
 
         with pytest.raises(AttributeError):
-            TimeSeries.from_dataframe(df=df, time_col="Time")
+            TimeSeries.from_dataframe(
+                df=self.pd_to_backend(df, backend), time_col="Time"
+            )
 
-    def test_df_named_columns_index(self):
+    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    def test_df_named_columns_index(self, backend):
         time_index = generate_index(
             start=pd.Timestamp("2000-01-01"), length=4, freq="D", name="index"
         )
@@ -2686,7 +2742,10 @@ def test_df_named_columns_index(self):
             columns=["y"],
         )
         df.columns.name = "id"
-        ts = TimeSeries.from_dataframe(df)
+        ts = TimeSeries.from_dataframe(
+            df=self.pd_to_backend(df, backend, index=True),
+            time_col=None if backend == "pandas" else "index",
+        )
 
         exp_ts = TimeSeries.from_times_and_values(
             times=time_index,

From de0a32d7d07e67989f03d57b0bb5e5a54ca7013b Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Wed, 19 Feb 2025 11:25:04 +0100
Subject: [PATCH 15/29] polars and narwhals added to dependencies. Ideally,
 polars should be an optional dependency.

---
 requirements/core.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements/core.txt b/requirements/core.txt
index 0245c46194..3081922e93 100644
--- a/requirements/core.txt
+++ b/requirements/core.txt
@@ -1,9 +1,11 @@
 holidays>=0.11.1
 joblib>=0.16.0
 matplotlib>=3.3.0
+narwhals>=1.25.1
 nfoursid>=1.0.0
 numpy>=1.19.0,<2.0.0
 pandas>=1.0.5
+polars>=1.0.0
 pmdarima>=1.8.0
 pyod>=0.9.5
 requests>=2.22.0

From 16bac00cf7f25623808c5492818529255723eff6 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Thu, 20 Feb 2025 15:25:59 +0100
Subject: [PATCH 16/29] refactoring pd_series and pd_dataframe

---
 darts/timeseries.py | 42 ++++++++++++++----------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 3326c41ea1..5d2c9a0946 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -1581,18 +1581,15 @@ def pd_series(self, copy=True) -> pd.Series:
         """
         self._assert_univariate()
         self._assert_deterministic()
+
+        data = self._xa[:, 0, 0].values
+        index = self._time_index
+        name = self.components[0]
+
         if copy:
-            return pd.Series(
-                self._xa[:, 0, 0].values.copy(),
-                index=self._time_index.copy(),
-                name=self.components[0],
-            )
+            return pd.Series(data=data.copy(), index=index.copy(), name=name)
         else:
-            return pd.Series(
-                self._xa[:, 0, 0].values,
-                index=self._time_index,
-                name=self.components[0],
-            )
+            return pd.Series(data=data, index=index, name=name)
 
     def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
         """
@@ -1613,6 +1610,7 @@ def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
         pandas.DataFrame
             The Pandas DataFrame representation of this time series
         """
+
         if not self.is_deterministic:
             if not suppress_warnings:
                 logger.warning(
@@ -1628,32 +1626,20 @@ def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
                 "_s".join((comp_name, str(sample_id)))
                 for comp_name, sample_id in itertools.product(comp_name, samples)
             ]
+            data = self._xa.stack(data=(DIMS[1], DIMS[2]))
+            index = self._time_index
 
             if copy:
                 return pd.DataFrame(
-                    self._xa.stack(data=(DIMS[1], DIMS[2])).values.copy(),
-                    index=self._time_index.copy(),
-                    columns=df_col_names.copy(),
+                    data=data.copy(), index=index.copy(), columns=df_col_names.copy()
                 )
             else:
-                return pd.DataFrame(
-                    self._xa.stack(data=(DIMS[1], DIMS[2])).values,
-                    index=self._time_index,
-                    columns=df_col_names,
-                )
+                return pd.DataFrame(data=data, index=index, columns=df_col_names)
         else:
             if copy:
-                return pd.DataFrame(
-                    self._xa[:, :, 0].values.copy(),
-                    index=self._time_index.copy(),
-                    columns=self._xa.get_index(DIMS[1]).copy(),
-                )
+                return self._xa[:, :, 0].copy().to_pandas()
             else:
-                return pd.DataFrame(
-                    self._xa[:, :, 0].values,
-                    index=self._time_index,
-                    columns=self._xa.get_index(DIMS[1]),
-                )
+                return self._xa[:, :, 0].to_pandas()
 
     def quantile_df(self, quantile=0.5) -> pd.DataFrame:
         """

From 09509109996d48991edf276e1a1b87a0614c0294 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 21 Feb 2025 10:07:21 +0100
Subject: [PATCH 17/29] removed test scripts from git repo

---
 from_df_timing.py     | 174 -----------------------------------------
 from_df_timing_col.py | 176 ------------------------------------------
 2 files changed, 350 deletions(-)
 delete mode 100644 from_df_timing.py
 delete mode 100644 from_df_timing_col.py

diff --git a/from_df_timing.py b/from_df_timing.py
deleted file mode 100644
index 823e018105..0000000000
--- a/from_df_timing.py
+++ /dev/null
@@ -1,174 +0,0 @@
-import argparse
-import json
-import time
-import warnings
-from itertools import product
-
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-
-from darts.timeseries import TimeSeries
-
-# Suppress all warnings
-warnings.filterwarnings("ignore")
-
-
-def test_from_dataframe(f_name: str):
-    return getattr(TimeSeries, f_name)
-
-
-def create_random_dataframes(
-    num_rows: int = 10,
-    num_columns: int = 3,
-    index: bool = True,
-    col_names_given: bool = True,
-    start_date: str = "1900-01-01",
-    freq: str = "D",
-) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    """
-    Create three pandas DataFrames with random data and dates as the index or as a column.
-
-    Parameters:
-    - num_rows (int): The number of rows in the DataFrames.
-    - num_columns (int): The number of columns in the DataFrames.
-    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
-    - start_date (str): The start date for the date range (used only if date_format is 'date').
-    - freq (str): The frequency of the date range (used only if date_format is 'date').
-
-    Returns:
-    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
-    """
-    # Set a random seed for reproducibility
-    np.random.seed(42)
-
-    # Generate a date range or integer list based on the date_format parameter
-    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
-    integer_values = list(range(1, num_rows + 1))
-    numpy_values = np.array(
-        pd.date_range(start=start_date, periods=num_rows, freq=freq),
-        dtype="datetime64[D]",
-    )
-
-    # Create random data for the DataFrames
-    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
-
-    # Create the DataFrames
-    df_date = pd.DataFrame(data)
-    df_numpy = pd.DataFrame(data)
-    df_integer = pd.DataFrame(data)
-
-    if col_names_given:
-        col_names = df_date.columns.values
-    else:
-        col_names = None
-
-    # Set the date as index or as a column based on the index parameter
-    if index:
-        df_date.index = date_values
-        df_numpy.index = numpy_values
-        df_integer.index = integer_values
-    else:
-        df_date["date"] = date_values
-        df_numpy["date"] = numpy_values
-        df_integer["date"] = integer_values
-
-    if index:
-        time_col = None
-    else:
-        time_col = "date"
-
-    return [
-        [df_date, col_names, time_col],
-        [df_numpy, col_names, time_col],
-        [df_integer, col_names, time_col],
-    ]
-
-
-def test_dataframes() -> list:
-    test_config = product(
-        [10, 100, 1000, 10000, 100000],
-        [100],
-        [True, False],
-        [True, False],
-    )
-
-    dataframes_list = [
-        create_random_dataframes(
-            num_rows=num_rows,
-            num_columns=num_columns,
-            index=index,
-            col_names_given=col_names_given,
-        )
-        for num_rows, num_columns, index, col_names_given in test_config
-    ]
-
-    return dataframes_list
-
-
-def calculate_processing_time(
-    f_name: str,
-    num_iter: int,
-    save_path="/Users/julesauthier/Documents/darts/from_df_times/data/",
-):
-    df_list = test_dataframes()
-    df_func = test_from_dataframe(f_name)
-
-    # Initialize dictionaries to store processing times
-    times = {}
-
-    # Initialize the progress bar
-    total_iterations = (
-        len(df_list) * 2 * 3
-    )  # 2 iterations per dataframe configuration, 3 df per config
-    progress_bar = tqdm(total=total_iterations, desc="Processing DataFrames")
-
-    for df_config in df_list:
-        for df, col_names, time_col in df_config:
-            num_rows = len(df)
-            dict_entry = str(num_rows)
-
-            for i in range(2):
-                # on the second run we shuffle the data
-                if i == 1:
-                    df = df.sample(frac=1)
-                    dict_entry += "_shuffled"
-
-                begin = time.time()
-                for _ in range(num_iter):
-                    _ = df_func(df, value_cols=col_names, time_col=time_col, freq=None)
-                end = time.time()
-                timer = (end - begin) / num_iter
-
-                if dict_entry not in times:
-                    times[dict_entry] = timer
-                else:
-                    times[dict_entry] += timer
-
-                # Update the progress bar
-                progress_bar.update(1)
-
-    file_name = f_name + "_avg_time_" + str(num_iter) + "_iter.json"
-
-    # Store the average times in separate JSON files
-    with open(save_path + file_name, "w") as f:
-        json.dump(times, f, indent=4)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="The function to test and the number of iter can "
-    )
-    parser.add_argument(
-        "--f_name", type=str, default="from_dataframe", help="method to time"
-    )
-    parser.add_argument(
-        "--n_iter", type=int, default=100, help="number of function call"
-    )
-
-    args = parser.parse_args()
-
-    f_name = args.f_name
-    n_iter = args.n_iter
-
-    calculate_processing_time(f_name, n_iter)
diff --git a/from_df_timing_col.py b/from_df_timing_col.py
deleted file mode 100644
index 4487654ea4..0000000000
--- a/from_df_timing_col.py
+++ /dev/null
@@ -1,176 +0,0 @@
-import argparse
-import json
-import time
-import warnings
-from itertools import product
-
-import numpy as np
-import pandas as pd
-from tqdm import tqdm
-
-from darts.timeseries import TimeSeries
-
-# Suppress all warnings
-warnings.filterwarnings("ignore")
-
-
-def test_from_dataframe(f_name: str):
-    return getattr(TimeSeries, f_name)
-
-
-def create_random_dataframes(
-    num_rows: int = 10,
-    num_columns: int = 3,
-    index: bool = True,
-    col_names_given: bool = True,
-    start_date: str = "1900-01-01",
-    freq: str = "D",
-) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    """
-    Create three pandas DataFrames with random data and dates as the index or as a column.
-
-    Parameters:
-    - num_rows (int): The number of rows in the DataFrames.
-    - num_columns (int): The number of columns in the DataFrames.
-    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
-    - start_date (str): The start date for the date range (used only if date_format is 'date').
-    - freq (str): The frequency of the date range (used only if date_format is 'date').
-
-    Returns:
-    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
-    """
-    # Set a random seed for reproducibility
-    np.random.seed(42)
-
-    # Generate a date range or integer list based on the date_format parameter
-    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
-    integer_values = list(range(1, num_rows + 1))
-    numpy_values = np.array(
-        pd.date_range(start=start_date, periods=num_rows, freq=freq),
-        dtype="datetime64[D]",
-    )
-
-    # Create random data for the DataFrames
-    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
-
-    # Create the DataFrames
-    df_date = pd.DataFrame(data)
-    df_numpy = pd.DataFrame(data)
-    df_integer = pd.DataFrame(data)
-
-    if col_names_given:
-        col_names = df_date.columns.values
-    else:
-        col_names = None
-
-    # Set the date as index or as a column based on the index parameter
-    if index:
-        df_date.index = date_values
-        df_numpy.index = numpy_values
-        df_integer.index = integer_values
-    else:
-        df_date["date"] = date_values
-        df_numpy["date"] = numpy_values
-        df_integer["date"] = integer_values
-
-    if index:
-        time_col = None
-    else:
-        time_col = "date"
-
-    return [
-        [df_date, col_names, time_col],
-        [df_numpy, col_names, time_col],
-        [df_integer, col_names, time_col],
-    ]
-
-
-def test_dataframes() -> list:
-    test_config = product(
-        [1000],
-        [1, 10, 100, 1000],
-        [True, False],
-        [True, False],
-    )
-
-    dataframes_list = [
-        create_random_dataframes(
-            num_rows=num_rows,
-            num_columns=num_columns,
-            index=index,
-            col_names_given=col_names_given,
-        )
-        for num_rows, num_columns, index, col_names_given in test_config
-    ]
-
-    return dataframes_list
-
-
-def calculate_processing_time(
-    f_name: str,
-    num_iter: int,
-    save_path="/Users/julesauthier/Documents/darts/from_df_times/data/",
-):
-    df_list = test_dataframes()
-    df_func = test_from_dataframe(f_name)
-
-    # Initialize dictionaries to store processing times
-    times = {}
-
-    # Initialize the progress bar
-    total_iterations = (
-        len(df_list) * 2 * 3
-    )  # 2 iterations per dataframe configuration, 3 df per config
-    progress_bar = tqdm(total=total_iterations, desc="Processing DataFrames")
-
-    for df_config in df_list:
-        for df, col_names, time_col in df_config:
-            num_cols = df.shape[1]
-            if num_cols > 1 and (num_cols % 2 == 1 or num_cols == 2):
-                num_cols -= 1
-            dict_entry = str(num_cols)
-
-            for i in range(2):
-                # on the second run we shuffle the data
-                if i == 1:
-                    df = df.sample(frac=1)
-                    dict_entry += "_shuffled"
-
-                begin = time.time()
-                for _ in range(num_iter):
-                    _ = df_func(df, value_cols=col_names, time_col=time_col, freq=None)
-                end = time.time()
-                timer = (end - begin) / num_iter
-
-                if dict_entry not in times:
-                    times[dict_entry] = timer
-                else:
-                    times[dict_entry] += timer
-
-                # Update the progress bar
-                progress_bar.update(1)
-
-    file_name = f_name + "_avg_time_cols_" + str(num_iter) + "_iter.json"
-
-    # Store the average times in separate JSON files
-    with open(save_path + file_name, "w") as f:
-        json.dump(times, f, indent=4)
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="The function to test and the number of iter can "
-    )
-    parser.add_argument(
-        "--f_name", type=str, default="from_dataframe", help="method to time"
-    )
-    parser.add_argument(
-        "--n_iter", type=int, default=100, help="number of function call"
-    )
-
-    args = parser.parse_args()
-
-    f_name = args.f_name
-    n_iter = args.n_iter
-
-    calculate_processing_time(f_name, n_iter)

From 5afc7210d77fd1fc57d84fbdbc1f53f8f8aad7cd Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Fri, 21 Feb 2025 19:59:32 +0100
Subject: [PATCH 18/29] Update CHANGELOG.md

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59d0f2565f..c9f9287295 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 
 **Improved**
 
-- Implemented the `from_dataframe()` and `from_series()` methods with [Narwhals](https://github.com/narwhals-dev/narwhals), a compatibility layer between dataframe librairies. From now on, Darts can transform pandas, polars, arrows and many other dataframes into `TimeSeries`. [#2661](https://github.com/unit8co/darts/pull/2661) by [Jules Authier](https://github.com/authierj)
+- `TimeSeries.from_dataframe()` and `from_series()` now support creating `TimeSeries` from additional backends (Polars, PyArrow, ...). We leverage `narwhals` as the compatibility layer between dataframe libraries. See the `narwhals` [documentation](https://narwhals-dev.github.io/narwhals/) for all supported backends. [#2661](https://github.com/unit8co/darts/pull/2661) by [Jules Authier](https://github.com/authierj)
 - Added ONNX support for torch-based models with method `TorchForecastingModel.to_onnx()`. Check out [this example](https://unit8co.github.io/darts/userguide/gpu_and_tpu_usage.html#exporting-model-to-onnx-format-for-inference) from the user guide on how to export and load a model for inference. [#2620](https://github.com/unit8co/darts/pull/2620) by [Antoine Madrona](https://github.com/madtoinou)
 - Made method `ForecastingModel.untrained_model()` public. Use this method to get a new (untrained) model instance created with the same parameters. [#2684](https://github.com/unit8co/darts/pull/2684) by [Timon Erhart](https://github.com/turbotimon)
 - Made it possbile to run the quickstart notebook `00-quickstart.ipynb` locally. [#2691](https://github.com/unit8co/darts/pull/2691) by [Jules Authier](https://github.com/authierj)

From 7877dd6e986faaaf7121be6d40285cd8137416bb Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Fri, 21 Feb 2025 20:00:32 +0100
Subject: [PATCH 19/29] Update darts/timeseries.py

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/timeseries.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 5d2c9a0946..8d2c8601c6 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -994,7 +994,10 @@ def from_series(
         Parameters
         ----------
         pd_series
-            A Series instance.
+            The Series, or anything which can be converted to a narwhals Series (e.g. pandas.Series, ...)
+            polars.DataFrame, ...). See the `narwhals documentation
+            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more
+            information.
         fill_missing_dates
             Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)
             with NaN values. This requires either a provided `freq` or the possibility to infer the frequency from the

From 102a26c7c084380392a6b777271320614ceaeaf3 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 21 Feb 2025 20:15:59 +0100
Subject: [PATCH 20/29] easy corrections applied

---
 darts/timeseries.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 5d2c9a0946..e03a12456d 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -690,10 +690,10 @@ def from_dataframe(
                 # The integer conversion failed; try datetimes
                 try:
                     time_index = pd.DatetimeIndex(time_col_vals)
-                except Exception:
+                except ValueError:
                     raise_log(
                         AttributeError(
-                            "'time_col' is of 'Utf8' dtype but doesn't contain valid timestamps"
+                            "'time_col' is of 'String' dtype but doesn't contain valid timestamps"
                         )
                     )
             elif time_col_vals.dtype == nw.Datetime:
@@ -713,13 +713,20 @@ def from_dataframe(
             else:
                 raise_log(
                     AttributeError(
-                        "Invalid type of `time_col`: it needs to be of either 'Utf8', 'Datetime' or 'Int64' dtype."
+                        "Invalid type of `time_col`: it needs to be of either 'String', 'Datetime' or 'Int' dtype."
                     )
                 )
         else:
             time_col_vals = nw.maybe_get_index(df)
             if time_col_vals is None:
-                raise_log(ValueError("No time column or index found in the DataFrame."))
+                raise_log(
+                    ValueError(
+                        "No time column or index found in the DataFrame. `time_col=None` "
+                        "is only supported for pandas DataFrame which is indexed with one of the "
+                        "supported index types: a DatetimeIndex, a RangeIndex, or an integer "
+                        "Index that can be converted into a RangeIndex.",
+                    ),
+                )
             # if we are here, the dataframe was pandas
             raise_if_not(
                 isinstance(time_col_vals, VALID_INDEX_TYPES)
@@ -756,7 +763,7 @@ def from_dataframe(
             attrs={STATIC_COV_TAG: static_covariates, HIERARCHY_TAG: hierarchy},
         )
 
-        return cls.from_xarray(  # really slow
+        return cls.from_xarray(
             xa=xa,
             fill_missing_dates=fill_missing_dates,
             freq=freq,
@@ -984,7 +991,7 @@ def from_series(
         static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
     ) -> Self:
         """
-        Build a univariate deterministic series from a pandas Series.
+        Build a univariate deterministic TimeSeries from a Series
 
         The series must contain an index that is either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index
         that can be converted into a RangeIndex. It is better if the index has no holes; alternatively setting

From 56a20c1e41162a35e4e914f19b2729a6135d5ae1 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Thu, 27 Feb 2025 08:17:44 +0100
Subject: [PATCH 21/29] narwhals_test_time removed

---
 narwhals_test_time.py | 165 ------------------------------------------
 1 file changed, 165 deletions(-)
 delete mode 100644 narwhals_test_time.py

diff --git a/narwhals_test_time.py b/narwhals_test_time.py
deleted file mode 100644
index 80fd76323b..0000000000
--- a/narwhals_test_time.py
+++ /dev/null
@@ -1,165 +0,0 @@
-import time
-import warnings
-from itertools import product
-
-import numpy as np
-import pandas as pd
-
-from darts.timeseries import TimeSeries
-
-# Suppress all warnings
-warnings.filterwarnings("ignore")
-
-
-def create_random_dataframes(
-    num_rows: int = 10,
-    num_columns: int = 3,
-    index: bool = True,
-    col_names_given: bool = True,
-    start_date: str = "2023-01-01",
-    freq: str = "D",
-) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    """
-    Create three pandas DataFrames with random data and dates as the index or as a column.
-
-    Parameters:
-    - num_rows (int): The number of rows in the DataFrames.
-    - num_columns (int): The number of columns in the DataFrames.
-    - index (bool): If True, the date is the index of the DataFrame. If False, the date is a column named 'date'.
-    - start_date (str): The start date for the date range (used only if date_format is 'date').
-    - freq (str): The frequency of the date range (used only if date_format is 'date').
-
-    Returns:
-    - tuple: A tuple containing three DataFrames (df_date, df_numpy, df_integer).
-    """
-    # Set a random seed for reproducibility
-    np.random.seed(42)
-
-    # Generate a date range or integer list based on the date_format parameter
-    date_values = pd.date_range(start=start_date, periods=num_rows, freq=freq)
-    integer_values = list(range(1, num_rows + 1))
-    numpy_values = np.array(
-        pd.date_range(start=start_date, periods=num_rows, freq=freq),
-        dtype="datetime64[D]",
-    )
-
-    # Create random data for the DataFrames
-    data = {f"col_{i}": np.random.randn(num_rows) for i in range(num_columns)}
-
-    # Create the DataFrames
-    df_date = pd.DataFrame(data)
-    df_numpy = pd.DataFrame(data)
-    df_integer = pd.DataFrame(data)
-
-    if col_names_given:
-        col_names = df_date.columns.values
-    else:
-        col_names = None
-
-    # Set the date as index or as a column based on the index parameter
-    if index:
-        df_date.index = date_values
-        df_numpy.index = numpy_values
-        df_integer.index = integer_values
-    else:
-        df_date["date"] = date_values
-        df_numpy["date"] = numpy_values
-        df_integer["date"] = integer_values
-
-    if index:
-        time_col = None
-    else:
-        time_col = "date"
-
-    return [
-        [df_date, col_names, time_col],
-        [df_numpy, col_names, time_col],
-        [df_integer, col_names, time_col],
-    ]
-
-
-def test_dataframes() -> list:
-    test_config = product(
-        [10, 100, 1000, 10000],
-        [10, 100, 500, 1000],
-        [True, False],
-        [True, False],
-    )
-
-    dataframes_list = [
-        create_random_dataframes(
-            num_rows=num_rows,
-            num_columns=num_columns,
-            index=index,
-            col_names_given=col_names_given,
-        )
-        for num_rows, num_columns, index, col_names_given in test_config
-    ]
-
-    return dataframes_list
-
-
-df_list = test_dataframes()
-
-num_iter = 5
-pandas_global_timer = 0
-narwhals_global_timer = 0
-
-for iter in range(num_iter + 1):
-    pandas_timer = 0
-    narwhals_timer = 0
-    for df_config in df_list:
-        for df, col_names, time_col in df_config:
-            for i in range(2):
-                # on the second run we shuffle the data
-                if i == 1:
-                    df = df.sample(frac=1)
-
-                # pandas processing time
-                begin = time.time()
-                pandas_timeseries = TimeSeries.from_dataframe(
-                    df, value_cols=col_names, time_col=time_col, freq=None
-                )
-                end = time.time()
-                pandas_timer += end - begin
-
-                # narwhals processing time
-                begin_nw = time.time()
-                narwhals_timeseries = TimeSeries.from_narwhals_dataframe(
-                    df, value_cols=col_names, time_col=time_col, freq=None
-                )
-                end_nw = time.time()
-                narwhals_timer += end_nw - begin_nw
-
-                # Check if the TimeSeries objects are equal
-                try:
-                    assert pandas_timeseries.time_index.equals(
-                        narwhals_timeseries.time_index
-                    )
-                except AssertionError as e:
-                    print(
-                        f"Index assertion failed for DataFrame with columns {col_names} and time_col {time_col}: {e}"
-                    )
-                try:
-                    np.testing.assert_array_almost_equal(
-                        pandas_timeseries.all_values(), narwhals_timeseries.all_values()
-                    )
-                except AssertionError as e:
-                    print(
-                        f"Equal assertion failed for DataFrame with columns {col_names} and time_col {time_col}: {e}"
-                    )
-    # throw first iteration away, memory initialization
-    if iter > 0:
-        print(f"pandas processing time: {pandas_timer:.4f}")
-        print(f"narwhals processing time: {narwhals_timer:.4f} \n")
-        pandas_global_timer += pandas_timer
-        narwhals_global_timer += narwhals_timer
-
-pandas_global_timer /= num_iter
-narwhals_global_timer /= num_iter
-
-print(f"Average pandas processing time: {pandas_global_timer:.4f}")
-print(f"Average narwhals processing time: {narwhals_global_timer:.4f} \n")
-
-diff_in_fraction = (-pandas_global_timer + narwhals_global_timer) / pandas_global_timer
-print(f"Average processing time difference: {diff_in_fraction:.2%}")

From f764e198e9732565dc2228dbac4771557764750a Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Thu, 27 Feb 2025 08:18:33 +0100
Subject: [PATCH 22/29] Update requirements/core.txt

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 requirements/core.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements/core.txt b/requirements/core.txt
index 3081922e93..21abb02049 100644
--- a/requirements/core.txt
+++ b/requirements/core.txt
@@ -5,7 +5,6 @@ narwhals>=1.25.1
 nfoursid>=1.0.0
 numpy>=1.19.0,<2.0.0
 pandas>=1.0.5
-polars>=1.0.0
 pmdarima>=1.8.0
 pyod>=0.9.5
 requests>=2.22.0

From 319a48f7f171155447356372fb300fbcb0ad590e Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Thu, 27 Feb 2025 08:40:38 +0100
Subject: [PATCH 23/29] Update darts/timeseries.py

Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
---
 darts/timeseries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index ff018572e2..cb17052938 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -651,7 +651,7 @@ def from_dataframe(
         TimeSeries
             A univariate or multivariate deterministic TimeSeries constructed from the inputs.
         """
-        df = nw.from_native(df)
+        df = nw.from_native(df, eager_only=True, pass_through=False)
 
         # get values
         if value_cols is None:

From e8925f107e3bd6bd0ba172dcc9325159eca07acd Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Thu, 27 Feb 2025 14:50:34 +0100
Subject: [PATCH 24/29] most corrections added

---
 darts/timeseries.py | 124 +++++++++++++++++++++-----------------------
 1 file changed, 60 insertions(+), 64 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index ff018572e2..ae353656c7 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -33,6 +33,7 @@
 ``TimeSeries`` documentation for more information on covariates.
 """
 
+import contextlib
 import itertools
 import pickle
 import re
@@ -588,7 +589,10 @@ def from_dataframe(
         Parameters
         ----------
         df
-            The DataFrame
+            The DataFrame, or anything which can be converted to a narwhals DataFrame (e.g. pandas.DataFrame,
+            polars.DataFrame, ...). See the `narwhals documentation
+            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more
+            information.
         time_col
             The time column name. If set, the column will be cast to a pandas DatetimeIndex (if it contains
             timestamps) or a RangeIndex (if it contains integers).
@@ -651,29 +655,28 @@ def from_dataframe(
         TimeSeries
             A univariate or multivariate deterministic TimeSeries constructed from the inputs.
         """
-        df = nw.from_native(df)
+        df = nw.from_native(df, eager_only=True, pass_through=False)
+        time_zone = None
 
         # get values
         if value_cols is None:
             series_df = df.drop(time_col) if time_col else df
         else:
-            if isinstance(value_cols, str):
+            if isinstance(value_cols, (str, int)):
                 value_cols = [value_cols]
-            series_df = df[value_cols]  # quite slow
+            series_df = df[value_cols]
 
         # get time index
         if time_col:
             if time_col not in df.columns:
                 raise_log(AttributeError(f"time_col='{time_col}' is not present."))
 
-            time_col_vals = df[time_col]
+            time_col_vals = df.get_column(time_col)
 
             if time_col_vals.dtype == nw.String:
                 # Try to convert to integers if needed
-                try:
+                with contextlib.suppress(Exception):
                     time_col_vals = time_col_vals.cast(nw.Int64)
-                except Exception:
-                    pass
 
             if time_col_vals.dtype.is_integer():
                 if time_col_vals.is_duplicated().any():
@@ -686,7 +689,7 @@ def from_dataframe(
                 # RangeIndex in `TimeSeries.from_xarray()`
                 time_index = pd.Index(time_col_vals)
 
-            elif time_col_vals.dtype == nw.String:
+            elif isinstance(time_col_vals.dtype, nw.String):
                 # The integer conversion failed; try datetimes
                 try:
                     time_index = pd.DatetimeIndex(time_col_vals)
@@ -696,18 +699,10 @@ def from_dataframe(
                             "'time_col' is of 'String' dtype but doesn't contain valid timestamps"
                         )
                     )
-            elif time_col_vals.dtype == nw.Datetime:
-                # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
-                # pandas.DataFrame loses the tz information if it's not its index
-                if time_col_vals.dtype.time_zone is not None:
-                    logger.warning(
-                        "The provided Datetime data was associated with a timezone, which is currently not supported "
-                        "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
-                        f"`ts.time_index.tz_localize({time_col_vals.dtype.time_zone})` when exporting the results."
-                        "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
-                        "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
-                        "original timezone."
-                    )
+            elif isinstance(time_col_vals.dtype, nw.Datetime):
+                # remember time zone here as polars converts to UTC
+                time_zone = time_col_vals.dtype.time_zone
+                if time_zone is not None:
                     time_col_vals = time_col_vals.dt.replace_time_zone(None)
                 time_index = pd.DatetimeIndex(time_col_vals)
             else:
@@ -717,8 +712,8 @@ def from_dataframe(
                     )
                 )
         else:
-            time_col_vals = nw.maybe_get_index(df)
-            if time_col_vals is None:
+            time_index = nw.maybe_get_index(df)
+            if time_index is None:
                 raise_log(
                     ValueError(
                         "No time column or index found in the DataFrame. `time_col=None` "
@@ -728,30 +723,33 @@ def from_dataframe(
                     ),
                 )
             # if we are here, the dataframe was pandas
-            raise_if_not(
-                isinstance(time_col_vals, VALID_INDEX_TYPES)
-                or np.issubdtype(time_col_vals.dtype, np.integer),
-                "If time_col is not specified, the DataFrame must be indexed either with "
-                "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex",
-                logger,
-            )
-            # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
-            # pandas.DataFrame loses the tz information if it's not its index
-            if (
-                isinstance(time_col_vals, pd.DatetimeIndex)
-                and time_col_vals.tz is not None
+            if not (
+                isinstance(time_index, VALID_INDEX_TYPES)
+                or np.issubdtype(time_index.dtype, np.integer)
             ):
-                logger.warning(
-                    "The provided DatetimeIndex was associated with a timezone, which is currently not supported "
-                    "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
-                    f"`ts.time_index.tz_localize({time_col_vals.tz})` when exporting the results."
-                    "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
-                    "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
-                    "original timezone."
+                raise_log(
+                    ValueError(
+                        "If time_col is not specified, the DataFrame must be indexed either with "
+                        "a DatetimeIndex, a RangeIndex, or an integer Index that can be converted into a RangeIndex"
+                    ),
+                    logger,
                 )
-                time_index = time_col_vals.tz_localize(None)
-            else:
-                time_index = time_col_vals
+            if isinstance(time_index, pd.DatetimeIndex):
+                time_zone = time_index.tz
+                if time_zone is not None:
+                    # remove and remember time zone here as pandas converts to UTC
+                    time_index = time_index.tz_localize(None)
+
+        # BUGFIX : force time-index to be timezone naive as xarray doesn't support it
+        if time_zone is not None:
+            logger.warning(
+                "The provided DatetimeIndex was associated with a timezone, which is currently not supported "
+                "by xarray. To avoid unexpected behaviour, the tz information was removed. Consider calling "
+                f"`ts.time_index.tz_localize({time_zone})` when exporting the results."
+                "To plot the series with the right time steps, consider setting the matplotlib.pyplot "
+                "`rcParams['timezone']` parameter to automatically convert the time axis back to the "
+                "original timezone."
+            )
 
         if not time_index.name:
             time_index.name = time_col if time_col else DIMS[0]
@@ -1028,7 +1026,7 @@ def from_series(
         TimeSeries
             A univariate and deterministic TimeSeries constructed from the inputs.
         """
-        nw_series = nw.from_native(pd_series, series_only=True)
+        nw_series = nw.from_native(pd_series, series_only=True, pass_through=False)
         df = nw_series.to_frame()
         return cls.from_dataframe(
             df,
@@ -1597,9 +1595,10 @@ def pd_series(self, copy=True) -> pd.Series:
         name = self.components[0]
 
         if copy:
-            return pd.Series(data=data.copy(), index=index.copy(), name=name)
-        else:
-            return pd.Series(data=data, index=index, name=name)
+            data = data.copy()
+            index = index.copy()
+
+        return pd.Series(data=data, index=index, name=name)
 
     def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
         """
@@ -1620,7 +1619,6 @@ def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
         pandas.DataFrame
             The Pandas DataFrame representation of this time series
         """
-
         if not self.is_deterministic:
             if not suppress_warnings:
                 logger.warning(
@@ -1632,24 +1630,22 @@ def pd_dataframe(self, copy=True, suppress_warnings=False) -> pd.DataFrame:
 
             comp_name = list(self.components)
             samples = range(self.n_samples)
-            df_col_names = [
+            columns = [
                 "_s".join((comp_name, str(sample_id)))
                 for comp_name, sample_id in itertools.product(comp_name, samples)
             ]
-            data = self._xa.stack(data=(DIMS[1], DIMS[2]))
-            index = self._time_index
-
-            if copy:
-                return pd.DataFrame(
-                    data=data.copy(), index=index.copy(), columns=df_col_names.copy()
-                )
-            else:
-                return pd.DataFrame(data=data, index=index, columns=df_col_names)
+            data = self._xa.stack(data=(DIMS[1], DIMS[2])).values
         else:
-            if copy:
-                return self._xa[:, :, 0].copy().to_pandas()
-            else:
-                return self._xa[:, :, 0].to_pandas()
+            columns = self._xa.get_index(DIMS[1])
+            data = self._xa[:, :, 0].values
+        index = self._time_index
+
+        if copy:
+            columns = columns.copy()
+            data = data.copy()
+            index = index.copy()
+
+        return pd.DataFrame(data=data, index=index, columns=columns)
 
     def quantile_df(self, quantile=0.5) -> pd.DataFrame:
         """

From 11d17c1e7c45036e6f3048043a9d6a369d7491ec Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Thu, 27 Feb 2025 15:17:54 +0100
Subject: [PATCH 25/29] polars tests removed

---
 darts/tests/test_timeseries.py | 37 +++++++++++++++++-----------------
 darts/timeseries.py            |  1 +
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py
index d8f1505751..8245efc194 100644
--- a/darts/tests/test_timeseries.py
+++ b/darts/tests/test_timeseries.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import pandas as pd
-import polars as pl
 import pytest
 import xarray as xr
 from scipy.stats import kurtosis, skew
@@ -2507,15 +2506,15 @@ def test_tail_numeric_time_index(self):
 
 
 class TestTimeSeriesFromDataFrame:
-    def pd_to_backend(self, df, backend, index=False):
-        if backend == "pandas":
-            return df
-        elif backend == "polars":
-            if index:
-                return pl.from_pandas(df.reset_index())
-            return pl.from_pandas(df)
-
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    # def pd_to_backend(self, df, backend, index=False):
+    #     if backend == "pandas":
+    #         return df
+    #     elif backend == "polars":
+    #         if index:
+    #             return pl.from_pandas(df.reset_index())
+    #         return pl.from_pandas(df)
+
+    @pytest.mark.parametrize("backend", "pandas")
     def test_from_dataframe_sunny_day(self, backend):
         data_dict = {"Time": pd.date_range(start="20180501", end="20200301", freq="MS")}
         data_dict["Values1"] = np.random.uniform(
@@ -2544,7 +2543,7 @@ def test_from_dataframe_sunny_day(self, backend):
         assert data_darts1 == data_darts2
         assert data_darts1 == data_darts3
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_string_integers(self, backend):
         expected = np.array(list(range(3, 10)))
         data_dict = {"Time": expected.astype(str)}
@@ -2560,7 +2559,7 @@ def test_time_col_convert_string_integers(self, backend):
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_integers(self, backend):
         expected = np.array(list(range(10)))
         data_dict = {"Time": expected}
@@ -2577,7 +2576,7 @@ def test_time_col_convert_integers(self, backend):
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_fail_with_bad_integer_time_col(self, backend):
         bad_time_col_vals = np.array([4, 0, 1, 2])
         data_dict = {"Time": bad_time_col_vals}
@@ -2590,7 +2589,7 @@ def test_fail_with_bad_integer_time_col(self, backend):
                 df=self.pd_to_backend(df, backend), time_col="Time"
             )
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_rangeindex(self, backend):
         for expected_l, step in zip([[4, 0, 2, 3, 1], [8, 0, 4, 6, 2]], [1, 2]):
             expected = np.array(expected_l)
@@ -2616,7 +2615,7 @@ def test_time_col_convert_rangeindex(self, backend):
             ]
             assert np.all(ar1 == ar2)
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_datetime(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected}
@@ -2631,7 +2630,7 @@ def test_time_col_convert_datetime(self, backend):
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_datetime_strings(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected.values.astype(str)}
@@ -2646,7 +2645,7 @@ def test_time_col_convert_datetime_strings(self, backend):
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_with_tz_df(self, backend):
         # numpy and xarray don't support "timezone aware" pd.DatetimeIndex
         # the BUGFIX removes timezone information without conversion
@@ -2711,7 +2710,7 @@ def test_time_col_with_tz_series(self):
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_H)
         assert ts.time_index.tz is None
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_time_col_convert_garbage(self, backend):
         expected = [
             "2312312asdfdw",
@@ -2731,7 +2730,7 @@ def test_time_col_convert_garbage(self, backend):
                 df=self.pd_to_backend(df, backend), time_col="Time"
             )
 
-    @pytest.mark.parametrize("backend", ["pandas", "polars"])
+    @pytest.mark.parametrize("backend", "pandas")
     def test_df_named_columns_index(self, backend):
         time_index = generate_index(
             start=pd.Timestamp("2000-01-01"), length=4, freq="D", name="index"
diff --git a/darts/timeseries.py b/darts/timeseries.py
index ae353656c7..dd80f780b1 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -711,6 +711,7 @@ def from_dataframe(
                         "Invalid type of `time_col`: it needs to be of either 'String', 'Datetime' or 'Int' dtype."
                     )
                 )
+            time_index.name = time_col
         else:
             time_index = nw.maybe_get_index(df)
             if time_index is None:

From f9f5aa8f164600fd136080fe5a1f5d251c85fcce Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Thu, 27 Feb 2025 16:09:36 +0100
Subject: [PATCH 26/29] tests corrected

---
 darts/tests/test_timeseries.py | 36 +++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py
index 8245efc194..f4d52477d3 100644
--- a/darts/tests/test_timeseries.py
+++ b/darts/tests/test_timeseries.py
@@ -2506,15 +2506,15 @@ def test_tail_numeric_time_index(self):
 
 
 class TestTimeSeriesFromDataFrame:
-    # def pd_to_backend(self, df, backend, index=False):
-    #     if backend == "pandas":
-    #         return df
-    #     elif backend == "polars":
-    #         if index:
-    #             return pl.from_pandas(df.reset_index())
-    #         return pl.from_pandas(df)
-
-    @pytest.mark.parametrize("backend", "pandas")
+    def pd_to_backend(self, df, backend, index=False):
+        if backend == "pandas":
+            return df
+        # elif backend == "polars":
+        #     if index:
+        #         return pl.from_pandas(df.reset_index())
+        #     return pl.from_pandas(df)
+
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_from_dataframe_sunny_day(self, backend):
         data_dict = {"Time": pd.date_range(start="20180501", end="20200301", freq="MS")}
         data_dict["Values1"] = np.random.uniform(
@@ -2543,7 +2543,7 @@ def test_from_dataframe_sunny_day(self, backend):
         assert data_darts1 == data_darts2
         assert data_darts1 == data_darts3
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_string_integers(self, backend):
         expected = np.array(list(range(3, 10)))
         data_dict = {"Time": expected.astype(str)}
@@ -2559,7 +2559,7 @@ def test_time_col_convert_string_integers(self, backend):
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_integers(self, backend):
         expected = np.array(list(range(10)))
         data_dict = {"Time": expected}
@@ -2576,7 +2576,7 @@ def test_time_col_convert_integers(self, backend):
         assert ts.time_index.dtype == int
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_fail_with_bad_integer_time_col(self, backend):
         bad_time_col_vals = np.array([4, 0, 1, 2])
         data_dict = {"Time": bad_time_col_vals}
@@ -2589,7 +2589,7 @@ def test_fail_with_bad_integer_time_col(self, backend):
                 df=self.pd_to_backend(df, backend), time_col="Time"
             )
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_rangeindex(self, backend):
         for expected_l, step in zip([[4, 0, 2, 3, 1], [8, 0, 4, 6, 2]], [1, 2]):
             expected = np.array(expected_l)
@@ -2615,7 +2615,7 @@ def test_time_col_convert_rangeindex(self, backend):
             ]
             assert np.all(ar1 == ar2)
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_datetime(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected}
@@ -2630,7 +2630,7 @@ def test_time_col_convert_datetime(self, backend):
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_datetime_strings(self, backend):
         expected = pd.date_range(start="20180501", end="20200301", freq="MS")
         data_dict = {"Time": expected.values.astype(str)}
@@ -2645,7 +2645,7 @@ def test_time_col_convert_datetime_strings(self, backend):
         assert ts.time_index.dtype == "datetime64[ns]"
         assert ts.time_index.name == "Time"
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_with_tz_df(self, backend):
         # numpy and xarray don't support "timezone aware" pd.DatetimeIndex
         # the BUGFIX removes timezone information without conversion
@@ -2710,7 +2710,7 @@ def test_time_col_with_tz_series(self):
         assert list(ts.time_index.tz_localize("CET")) == list(time_range_H)
         assert ts.time_index.tz is None
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_time_col_convert_garbage(self, backend):
         expected = [
             "2312312asdfdw",
@@ -2730,7 +2730,7 @@ def test_time_col_convert_garbage(self, backend):
                 df=self.pd_to_backend(df, backend), time_col="Time"
             )
 
-    @pytest.mark.parametrize("backend", "pandas")
+    @pytest.mark.parametrize("backend", ["pandas"])
     def test_df_named_columns_index(self, backend):
         time_index = generate_index(
             start=pd.Timestamp("2000-01-01"), length=4, freq="D", name="index"

From c13cc1d3352571f4e6bae67f4e6e19f575842369 Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:28:49 +0100
Subject: [PATCH 27/29] Update darts/timeseries.py

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/timeseries.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index dd80f780b1..e1590b22be 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -1000,9 +1000,9 @@ def from_series(
         Parameters
         ----------
         pd_series
-            The Series, or anything which can be converted to a narwhals Series (e.g. pandas.Series, ...)
-            polars.DataFrame, ...). See the `narwhals documentation
-            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more
+            The Series, or anything which can be converted to a narwhals Series (e.g. pandas.Series, ...). See the 
+            `narwhals documentation 
+            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more 
             information.
         fill_missing_dates
             Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)

From 370d7619d8bb773fb4ff667df0e1e3d7a8b5852a Mon Sep 17 00:00:00 2001
From: Jules Authier <55801833+authierj@users.noreply.github.com>
Date: Fri, 28 Feb 2025 16:29:35 +0100
Subject: [PATCH 28/29] Update darts/timeseries.py

Co-authored-by: Dennis Bader <dennis.bader@gmx.ch>
---
 darts/timeseries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index e1590b22be..317a053ad0 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -990,7 +990,7 @@ def from_series(
         static_covariates: Optional[Union[pd.Series, pd.DataFrame]] = None,
     ) -> Self:
         """
-        Build a univariate deterministic TimeSeries from a Series
+        Build a univariate deterministic TimeSeries from a Series.
 
         The series must contain an index that is either a pandas DatetimeIndex, a pandas RangeIndex, or a pandas Index
         that can be converted into a RangeIndex. It is better if the index has no holes; alternatively setting

From 3fa924f429b38b0526e658a3ddcd27ca7169a1b4 Mon Sep 17 00:00:00 2001
From: authierj <jules.authier@unit8.co>
Date: Fri, 28 Feb 2025 16:36:59 +0100
Subject: [PATCH 29/29] no time_col, define one

---
 darts/timeseries.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/darts/timeseries.py b/darts/timeseries.py
index 317a053ad0..2a1b336bcf 100644
--- a/darts/timeseries.py
+++ b/darts/timeseries.py
@@ -715,16 +715,14 @@ def from_dataframe(
         else:
             time_index = nw.maybe_get_index(df)
             if time_index is None:
-                raise_log(
-                    ValueError(
-                        "No time column or index found in the DataFrame. `time_col=None` "
-                        "is only supported for pandas DataFrame which is indexed with one of the "
-                        "supported index types: a DatetimeIndex, a RangeIndex, or an integer "
-                        "Index that can be converted into a RangeIndex.",
-                    ),
+                time_index = pd.RangeIndex(len(df))
+                logger.info(
+                    "No time column specified (`time_col=None`) and no index found in the DataFrame. Defaulting to "
+                    "`pandas.RangeIndex(len(df))`. If this is not desired consider adding a time column "
+                    "to your dataframe and defining `time_col`."
                 )
             # if we are here, the dataframe was pandas
-            if not (
+            elif not (
                 isinstance(time_index, VALID_INDEX_TYPES)
                 or np.issubdtype(time_index.dtype, np.integer)
             ):
@@ -1000,9 +998,9 @@ def from_series(
         Parameters
         ----------
         pd_series
-            The Series, or anything which can be converted to a narwhals Series (e.g. pandas.Series, ...). See the 
-            `narwhals documentation 
-            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more 
+            The Series, or anything which can be converted to a narwhals Series (e.g. pandas.Series, ...). See the
+            `narwhals documentation
+            <https://narwhals-dev.github.io/narwhals/api-reference/narwhals/#narwhals.from_native>`_ for more
             information.
         fill_missing_dates
             Optionally, a boolean value indicating whether to fill missing dates (or indices in case of integer index)