docs: more args and returns documentation in DataFrame class (#1600)

marenwestermann · web-flow · commit dfc91a485c36 · 2024-12-19T13:31:05.000Z
* more args and returns documentation in DataFrame class

* address comment

* more documentation
diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py
@@ -1100,6 +1100,9 @@ def row(self, index: int) -> tuple[Any, ...]:
         Arguments:
             index: Row number.
 
+        Returns:
+            A tuple of the values in the selected row.
+
         Notes:
             cuDF doesn't support this method.
 
@@ -1133,6 +1136,14 @@ def row(self, index: int) -> tuple[Any, ...]:
     def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Self:
         """Pipe function call.
 
+        Arguments:
+            function: Function to apply.
+            args: Positional arguments to pass to function.
+            kwargs: Keyword arguments to pass to function.
+
+        Returns:
+            The original object with the function applied.
+
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
@@ -1175,12 +1186,15 @@ def pipe(self, function: Callable[[Any], Self], *args: Any, **kwargs: Any) -> Se
         return super().pipe(function, *args, **kwargs)
 
     def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
-        """Drop null values.
+        """Drop rows that contain null values.
 
         Arguments:
             subset: Column name(s) for which null values are considered. If set to None
                 (default), use all columns.
 
+        Returns:
+            The original object with the rows removed that contained the null values.
+
         Notes:
             pandas and Polars handle null values differently. Polars distinguishes
             between NaN and Null, whereas pandas doesn't.
@@ -1221,6 +1235,12 @@ def drop_nulls(self: Self, subset: str | list[str] | None = None) -> Self:
     def with_row_index(self, name: str = "index") -> Self:
         """Insert column which enumerates rows.
 
+        Arguments:
+            name: The name of the column as a string. The default is "index".
+
+        Returns:
+            The original object with the column added.
+
         Examples:
             Construct pandas as polars DataFrames:
 
@@ -1264,6 +1284,9 @@ def with_row_index(self, name: str = "index") -> Self:
     def schema(self) -> Schema:
         r"""Get an ordered mapping of column names to their data type.
 
+        Returns:
+            A Narwhals Schema object that displays the mapping of column names.
+
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
@@ -1300,6 +1323,9 @@ def schema(self) -> Schema:
     def collect_schema(self: Self) -> Schema:
         r"""Get an ordered mapping of column names to their data type.
 
+        Returns:
+            A Narwhals Schema object that displays the mapping of column names.
+
         Examples:
             >>> import polars as pl
             >>> import pandas as pd
@@ -1337,6 +1363,9 @@ def collect_schema(self: Self) -> Schema:
     def columns(self) -> list[str]:
         """Get column names.
 
+        Returns:
+            The column names stored in a list.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1397,6 +1426,9 @@ def rows(
                 in the same order as the frame columns. Setting named=True will
                 return rows of dictionaries instead.
 
+        Returns:
+            The data as a list of rows.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1452,6 +1484,9 @@ def iter_rows(
                 internally while iterating over the data.
                 See https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.iter_rows.html
 
+        Returns:
+            An iterator over the DataFrame of rows.
+
         Notes:
             cuDF doesn't support this method.
 
@@ -1561,6 +1596,9 @@ def select(
             **named_exprs: Additional columns to select, specified as keyword arguments.
                             The columns will be renamed to the keyword used.
 
+        Returns:
+            The dataframe containing only the selected columns.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1674,6 +1712,9 @@ def rename(self, mapping: dict[str, str]) -> Self:
         Arguments:
             mapping: Key value pairs that map from old name to new name.
 
+        Returns:
+            The dataframe with the specified columns renamed.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1716,6 +1757,9 @@ def head(self, n: int = 5) -> Self:
             n: Number of rows to return. If a negative value is passed, return all rows
                 except the last `abs(n)`.
 
+        Returns:
+            A subset of the dataframe of shape (n, n_columns).
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1762,6 +1806,9 @@ def tail(self, n: int = 5) -> Self:
             n: Number of rows to return. If a negative value is passed, return all rows
                 except the first `abs(n)`.
 
+        Returns:
+            A subset of the dataframe of shape (n, n_columns).
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1804,6 +1851,9 @@ def tail(self, n: int = 5) -> Self:
     def drop(self, *columns: str | Iterable[str], strict: bool = True) -> Self:
         """Remove columns from the dataframe.
 
+        Returns:
+            The dataframe with the specified columns removed.
+
         Arguments:
             *columns: Names of the columns that should be removed from the dataframe.
             strict: Validate that all column names exist in the schema and throw an
@@ -1890,6 +1940,9 @@ def unique(
                 expensive to compute. Settings this to `True` blocks the possibility
                 to run on the streaming engine for Polars.
 
+        Returns:
+            The dataframe with the duplicate rows removed.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -1939,6 +1992,9 @@ def filter(
                 Each constraint will behave the same as `nw.col(name).eq(value)`, and will be implicitly
                 joined with the other filter conditions using &.
 
+        Returns:
+            The filtered dataframe.
+
         Examples:
             >>> import pandas as pd
             >>> import polars as pl
@@ -2153,6 +2209,9 @@ def sort(
                 specified per column by passing a sequence of booleans.
             nulls_last: Place null values last.
 
+        Returns:
+            The sorted dataframe.
+
         Warning:
             Unlike Polars, it is not possible to specify a sequence of booleans for
             `nulls_last` in order to control per-column behaviour. Instead a single
@@ -2518,6 +2577,9 @@ def is_duplicated(self: Self) -> Series[Any]:
     def is_empty(self: Self) -> bool:
         r"""Check if the dataframe is empty.
 
+        Returns:
+            A boolean indicating whether the dataframe is empty (True) or not (False).
+
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
@@ -2600,6 +2662,9 @@ def is_unique(self: Self) -> Series[Any]:
     def null_count(self: Self) -> Self:
         r"""Create a new DataFrame that shows the null counts per column.
 
+        Returns:
+            A dataframe of shape (1, n_columns).
+
         Notes:
             pandas and Polars handle null values differently. Polars distinguishes
             between NaN and Null, whereas pandas doesn't.
@@ -2651,6 +2716,13 @@ def null_count(self: Self) -> Self:
     def item(self: Self, row: int | None = None, column: int | str | None = None) -> Any:
         r"""Return the DataFrame as a scalar, or return the element at the given row/column.
 
+        Arguments:
+            row: The *n*-th row.
+            column: The column selected via an integer or a string (column name).
+
+        Returns:
+            A scalar or the specified element in the dataframe.
+
         Notes:
             If row/col not provided, this is equivalent to df[0,0], with a check that the shape is (1,1).
             With row/col, this is equivalent to df[row,col].
@@ -2682,6 +2754,9 @@ def item(self: Self, row: int | None = None, column: int | str | None = None) ->
     def clone(self) -> Self:
         r"""Create a copy of this DataFrame.
 
+        Returns:
+            An identical copy of the original dataframe.
+
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
@@ -2721,6 +2796,9 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:
             n: Gather every *n*-th row.
             offset: Starting index.
 
+        Returns:
+            The dataframe containing only the selected rows.
+
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
@@ -2790,6 +2868,9 @@ def pivot(
             separator: Used as separator/delimiter in generated column names in case of
                 multiple `values` columns.
 
+        Returns:
+            A new dataframe.
+
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
@@ -2841,6 +2922,9 @@ def pivot(
     def to_arrow(self: Self) -> pa.Table:
         r"""Convert to arrow table.
 
+        Returns:
+            A new PyArrow table.
+
         Examples:
             >>> import narwhals as nw
             >>> import pandas as pd
@@ -2890,6 +2974,9 @@ def sample(
             seed: Seed for the random number generator. If set to None (default), a random
                 seed is generated for each sample operation.
 
+        Returns:
+            A new dataframe.
+
         Notes:
             The results may not be consistent across libraries.
 
@@ -2956,6 +3043,9 @@ def unpivot(
             variable_name: Name to give to the `variable` column. Defaults to "variable".
             value_name: Name to give to the `value` column. Defaults to "value".
 
+        Returns:
+            The unpivoted dataframe.
+
         Notes:
             If you're coming from pandas, this is similar to `pandas.DataFrame.melt`,
             but with `index` replacing `id_vars` and `on` replacing `value_vars`.