Skip to content

Commit dfc91a4

Browse files
docs: more args and returns documentation in DataFrame class (#1600)
* more args and returns documentation in DataFrame class * address comment * more documentation
1 parent 0c933ee commit dfc91a4

File tree

1 file changed

+91
-1
lines changed

1 file changed

+91
-1
lines changed

narwhals/dataframe.py

+91-1
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,9 @@ def row(self, index: int) -> tuple[Any, ...]:
11001100
Arguments:
11011101
index: Row number.
11021102
1103+
Returns:
1104+
A tuple of the values in the selected row.
1105+
11031106
Notes:
11041107
cuDF doesn't support this method.
11051108
@@ -1133,6 +1136,14 @@ def row(self, index: int) -> tuple[Any, ...]:
11331136
def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
11341137
"""Pipe function call.
11351138
1139+
Arguments:
1140+
function: Function to apply.
1141+
args: Positional arguments to pass to function.
1142+
kwargs: Keyword arguments to pass to function.
1143+
1144+
Returns:
1145+
The original object with the function applied.
1146+
11361147
Examples:
11371148
>>> import polars as pl
11381149
>>> import pandas as pd
@@ -1175,12 +1186,15 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
11751186
return super().pipe(function, *args, **kwargs)
11761187

11771188
def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
1178-
"""Drop null values.
1189+
"""Drop rows that contain null values.
11791190
11801191
Arguments:
11811192
subset: Column name(s) for which null values are considered. If set to None
11821193
(default), use all columns.
11831194
1195+
Returns:
1196+
The original object with the rows removed that contained the null values.
1197+
11841198
Notes:
11851199
pandas and Polars handle null values differently. Polars distinguishes
11861200
between NaN and Null, whereas pandas doesn't.
@@ -1221,6 +1235,12 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
12211235
def with_row_index(self, name: str = "index") -> Self:
12221236
"""Insert column which enumerates rows.
12231237
1238+
Arguments:
1239+
name: The name of the column as a string. The default is "index".
1240+
1241+
Returns:
1242+
The original object with the column added.
1243+
12241244
Examples:
12251245
Construct pandas as polars DataFrames:
12261246
@@ -1264,6 +1284,9 @@ def with_row_index(self, name: str = "index") -> Self:
12641284
def schema(self) -> Schema:
12651285
r"""Get an ordered mapping of column names to their data type.
12661286
1287+
Returns:
1288+
A Narwhals Schema object that displays the mapping of column names.
1289+
12671290
Examples:
12681291
>>> import polars as pl
12691292
>>> import pandas as pd
@@ -1300,6 +1323,9 @@ def schema(self) -> Schema:
13001323
def collect_schema(self: Self) -> Schema:
13011324
r"""Get an ordered mapping of column names to their data type.
13021325
1326+
Returns:
1327+
A Narwhals Schema object that displays the mapping of column names.
1328+
13031329
Examples:
13041330
>>> import polars as pl
13051331
>>> import pandas as pd
@@ -1337,6 +1363,9 @@ def collect_schema(self: Self) -> Schema:
13371363
def columns(self) -> list[str]:
13381364
"""Get column names.
13391365
1366+
Returns:
1367+
The column names stored in a list.
1368+
13401369
Examples:
13411370
>>> import pandas as pd
13421371
>>> import polars as pl
@@ -1397,6 +1426,9 @@ def rows(
13971426
in the same order as the frame columns. Setting named=True will
13981427
return rows of dictionaries instead.
13991428
1429+
Returns:
1430+
The data as a list of rows.
1431+
14001432
Examples:
14011433
>>> import pandas as pd
14021434
>>> import polars as pl
@@ -1452,6 +1484,9 @@ def iter_rows(
14521484
internally while iterating over the data.
14531485
See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html
14541486
1487+
Returns:
1488+
An iterator over the DataFrame of rows.
1489+
14551490
Notes:
14561491
cuDF doesn't support this method.
14571492
@@ -1561,6 +1596,9 @@ def select(
15611596
**named_exprs: Additional columns to select, specified as keyword arguments.
15621597
The columns will be renamed to the keyword used.
15631598
1599+
Returns:
1600+
The dataframe containing only the selected columns.
1601+
15641602
Examples:
15651603
>>> import pandas as pd
15661604
>>> import polars as pl
@@ -1674,6 +1712,9 @@ def rename(self, mapping: dict[str, str]) -> Self:
16741712
Arguments:
16751713
mapping: Key value pairs that map from old name to new name.
16761714
1715+
Returns:
1716+
The dataframe with the specified columns renamed.
1717+
16771718
Examples:
16781719
>>> import pandas as pd
16791720
>>> import polars as pl
@@ -1716,6 +1757,9 @@ def head(self, n: int = 5) -> Self:
17161757
n: Number of rows to return. If a negative value is passed, return all rows
17171758
except the last `abs(n)`.
17181759
1760+
Returns:
1761+
A subset of the dataframe of shape (n, n_columns).
1762+
17191763
Examples:
17201764
>>> import pandas as pd
17211765
>>> import polars as pl
@@ -1762,6 +1806,9 @@ def tail(self, n: int = 5) -> Self:
17621806
n: Number of rows to return. If a negative value is passed, return all rows
17631807
except the first `abs(n)`.
17641808
1809+
Returns:
1810+
A subset of the dataframe of shape (n, n_columns).
1811+
17651812
Examples:
17661813
>>> import pandas as pd
17671814
>>> import polars as pl
@@ -1804,6 +1851,9 @@ def tail(self, n: int = 5) -> Self:
18041851
def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self:
18051852
"""Remove columns from the dataframe.
18061853
1854+
Returns:
1855+
The dataframe with the specified columns removed.
1856+
18071857
Arguments:
18081858
*columns: Names of the columns that should be removed from the dataframe.
18091859
strict: Validate that all column names exist in the schema and throw an
@@ -1890,6 +1940,9 @@ def unique(
18901940
expensive to compute. Settings this to `True` blocks the possibility
18911941
to run on the streaming engine for Polars.
18921942
1943+
Returns:
1944+
The dataframe with the duplicate rows removed.
1945+
18931946
Examples:
18941947
>>> import pandas as pd
18951948
>>> import polars as pl
@@ -1939,6 +1992,9 @@ def filter(
19391992
Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly
19401993
joined with the other filter conditions using &.
19411994
1995+
Returns:
1996+
The filtered dataframe.
1997+
19421998
Examples:
19431999
>>> import pandas as pd
19442000
>>> import polars as pl
@@ -2153,6 +2209,9 @@ def sort(
21532209
specified per column by passing a sequence of booleans.
21542210
nulls_last: Place null values last.
21552211
2212+
Returns:
2213+
The sorted dataframe.
2214+
21562215
Warning:
21572216
Unlike Polars, it is not possible to specify a sequence of booleans for
21582217
`nulls_last` in order to control per-column behaviour. Instead a single
@@ -2518,6 +2577,9 @@ def is_duplicated(self: Self) -> Series[Any]:
25182577
def is_empty(self: Self) -> bool:
25192578
r"""Check if the dataframe is empty.
25202579
2580+
Returns:
2581+
A boolean indicating whether the dataframe is empty (True) or not (False).
2582+
25212583
Examples:
25222584
>>> import narwhals as nw
25232585
>>> import pandas as pd
@@ -2600,6 +2662,9 @@ def is_unique(self: Self) -> Series[Any]:
26002662
def null_count(self: Self) -> Self:
26012663
r"""Create a new DataFrame that shows the null counts per column.
26022664
2665+
Returns:
2666+
A dataframe of shape (1, n_columns).
2667+
26032668
Notes:
26042669
pandas and Polars handle null values differently. Polars distinguishes
26052670
between NaN and Null, whereas pandas doesn't.
@@ -2651,6 +2716,13 @@ def null_count(self: Self) -> Self:
26512716
def item(self: Self, row: int | None = None, column: int | str | None = None) -> Any:
26522717
r"""Return the DataFrame as a scalar, or return the element at the given row/column.
26532718
2719+
Arguments:
2720+
row: The *n*-th row.
2721+
column: The column selected via an integer or a string (column name).
2722+
2723+
Returns:
2724+
A scalar or the specified element in the dataframe.
2725+
26542726
Notes:
26552727
If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1).
26562728
With row/col, this is equivalent to df[row,col].
@@ -2682,6 +2754,9 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
26822754
def clone(self) -> Self:
26832755
r"""Create a copy of this DataFrame.
26842756
2757+
Returns:
2758+
An identical copy of the original dataframe.
2759+
26852760
Examples:
26862761
>>> import narwhals as nw
26872762
>>> import pandas as pd
@@ -2721,6 +2796,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
27212796
n: Gather every *n*-th row.
27222797
offset: Starting index.
27232798
2799+
Returns:
2800+
The dataframe containing only the selected rows.
2801+
27242802
Examples:
27252803
>>> import narwhals as nw
27262804
>>> import pandas as pd
@@ -2790,6 +2868,9 @@ def pivot(
27902868
separator: Used as separator/delimiter in generated column names in case of
27912869
multiple `values` columns.
27922870
2871+
Returns:
2872+
A new dataframe.
2873+
27932874
Examples:
27942875
>>> import narwhals as nw
27952876
>>> import pandas as pd
@@ -2841,6 +2922,9 @@ def pivot(
28412922
def to_arrow(self: Self) -> pa.Table:
28422923
r"""Convert to arrow table.
28432924
2925+
Returns:
2926+
A new PyArrow table.
2927+
28442928
Examples:
28452929
>>> import narwhals as nw
28462930
>>> import pandas as pd
@@ -2890,6 +2974,9 @@ def sample(
28902974
seed: Seed for the random number generator. If set to None (default), a random
28912975
seed is generated for each sample operation.
28922976
2977+
Returns:
2978+
A new dataframe.
2979+
28932980
Notes:
28942981
The results may not be consistent across libraries.
28952982
@@ -2956,6 +3043,9 @@ def unpivot(
29563043
variable_name: Name to give to the `variable` column. Defaults to "variable".
29573044
value_name: Name to give to the `value` column. Defaults to "value".
29583045
3046+
Returns:
3047+
The unpivoted dataframe.
3048+
29593049
Notes:
29603050
If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
29613051
but with `index` replacing `id_vars` and `on` replacing `value_vars`.

0 commit comments

Comments
 (0)