-
Notifications
You must be signed in to change notification settings - Fork 153
fix: Preserve pandas column name attribute #2363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
feaad95
f28599f
fc982d3
f557dd5
46c29e7
d5fea77
f3b4cf9
9f3c415
bd7fd09
987a688
7f39ada
0d105a3
13166e7
afaa9ec
2cf041c
984a57b
823293b
788bc1f
1e745d4
e1ef8b5
28795f2
5efaff2
f202c57
e024c54
5387649
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,11 +21,11 @@ | |
from narwhals._pandas_like.utils import check_column_names_are_unique | ||
from narwhals._pandas_like.utils import convert_str_slice_to_int_slice | ||
from narwhals._pandas_like.utils import get_dtype_backend | ||
from narwhals._pandas_like.utils import horizontal_concat | ||
from narwhals._pandas_like.utils import native_to_narwhals_dtype | ||
from narwhals._pandas_like.utils import object_native_to_narwhals_dtype | ||
from narwhals._pandas_like.utils import pivot_table | ||
from narwhals._pandas_like.utils import rename | ||
from narwhals._pandas_like.utils import rename_axis | ||
from narwhals._pandas_like.utils import select_columns_by_name | ||
from narwhals._pandas_like.utils import set_index | ||
from narwhals.dependencies import is_numpy_array_1d | ||
|
@@ -120,6 +120,7 @@ def __init__( | |
validate_backend_version(self._implementation, self._backend_version) | ||
if validate_column_names: | ||
check_column_names_are_unique(native_dataframe.columns) | ||
self._native_columns_name = native_dataframe.columns.name | ||
|
||
@classmethod | ||
def from_arrow(cls, data: IntoArrowTable, /, *, context: _FullContext) -> Self: | ||
|
@@ -251,7 +252,12 @@ def _with_version(self: Self, version: Version) -> Self: | |
|
||
def _with_native(self: Self, df: Any, *, validate_column_names: bool = True) -> Self: | ||
return self.__class__( | ||
df, | ||
rename_axis( | ||
df, | ||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
columns=self._native_columns_name, | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. π€ i'm wondering if we could/should just set else we need to trust pandas' copy-on-write / There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I trust your judgement here - but should I would be concerned as a pandas user? |
||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
version=self._version, | ||
|
@@ -504,11 +510,8 @@ def select(self: PandasLikeDataFrame, *exprs: PandasLikeExpr) -> PandasLikeDataF | |
# return empty dataframe, like Polars does | ||
return self._with_native(self.native.__class__(), validate_column_names=False) | ||
new_series = align_series_full_broadcast(*new_series) | ||
df = horizontal_concat( | ||
[s.native for s in new_series], | ||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
) | ||
namespace = self.__narwhals_namespace__() | ||
df = namespace._horizontal_concat([s.native for s in new_series]) | ||
return self._with_native(df, validate_column_names=True) | ||
|
||
def drop_nulls( | ||
|
@@ -531,13 +534,7 @@ def with_row_index(self: Self, name: str) -> Self: | |
row_index = namespace._series.from_iterable( | ||
range(len(frame)), context=self, index=frame.index | ||
).alias(name) | ||
return self._with_native( | ||
horizontal_concat( | ||
[row_index.native, frame], | ||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
) | ||
) | ||
return self._with_native(namespace._horizontal_concat([row_index.native, frame])) | ||
|
||
def row(self: Self, index: int) -> tuple[Any, ...]: | ||
return tuple(x for x in self.native.iloc[index]) | ||
|
@@ -571,11 +568,8 @@ def with_columns( | |
series = self.native[name] | ||
to_concat.append(series) | ||
to_concat.extend(self._extract_comparand(s) for s in name_columns.values()) | ||
df = horizontal_concat( | ||
to_concat, | ||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
) | ||
namespace = self.__narwhals_namespace__() | ||
df = namespace._horizontal_concat(to_concat) | ||
return self._with_native(df, validate_column_names=False) | ||
|
||
def rename(self: Self, mapping: Mapping[str, str]) -> Self: | ||
|
@@ -633,7 +627,12 @@ def collect( | |
import pandas as pd # ignore-banned-import | ||
|
||
return PandasLikeDataFrame( | ||
self.to_pandas(), | ||
rename_axis( | ||
self.to_pandas(), | ||
implementation=self._implementation, | ||
backend_version=self._backend_version, | ||
columns=self._native_columns_name, | ||
), | ||
implementation=Implementation.PANDAS, | ||
backend_version=parse_version(pd), | ||
version=self._version, | ||
|
FBruzzesi marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -488,8 +488,8 @@ def _from_native_impl( # noqa: PLR0915 | |
return DataFrame( | ||
PandasLikeDataFrame( | ||
native_object, | ||
backend_version=parse_version(pd), | ||
implementation=Implementation.PANDAS, | ||
backend_version=parse_version(pd), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mental sanity for symmetry with other pandas-like and order of the spec π |
||
version=version, | ||
validate_column_names=True, | ||
), | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
import pytest | ||
|
||
import narwhals.stable.v1 as nw | ||
|
||
if TYPE_CHECKING: | ||
from tests.utils import Constructor | ||
|
||
|
||
def test_ops_preserve_column_index_name(constructor: Constructor) -> None: | ||
if not any(x in str(constructor) for x in ("pandas", "modin", "cudf", "dask")): | ||
pytest.skip( | ||
reason="Dataframe columns is a list and do not have a `name` like a pandas Index does" | ||
) | ||
|
||
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]} | ||
df_native = constructor(data) | ||
df_native.columns.name = "foo" # type: ignore[union-attr] | ||
|
||
df = nw.from_native(df_native) | ||
|
||
result = df.with_columns(b=nw.col("a") + 1, c=nw.col("a") * 2).select("c", "b") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wanted to concatenate two methods here, mostly for the sake of it. |
||
|
||
assert result.to_native().columns.name == "foo" # type: ignore[union-attr] | ||
assert result.lazy().collect(backend="pandas").to_native().columns.name == "foo" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dask
select
andwith_columns
have no issues since we use.assign
method for both!