Skip to content

Commit bc7f9f1

Browse files
Copilotpwwang
andcommitted
Add pipe() function to datar.all to simulate pandas.DataFrame.pipe()
Co-authored-by: pwwang <[email protected]>
1 parent 28f6025 commit bc7f9f1

File tree

2 files changed

+150
-0
lines changed

2 files changed

+150
-0
lines changed

datar/apis/dplyr.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,3 +2478,50 @@ def any_of(_data, x, vars=None) -> Any:
24782478
in `_data` columns
24792479
"""
24802480
raise _NotImplementedByCurrentBackendError("any_of", _data)
2481+
2482+
2483+
@_register_verb(object)
2484+
def pipe(_data: T, func: _Callable, *args, **kwargs) -> Any:
2485+
"""Apply a function to the data
2486+
2487+
This function is similar to pandas.DataFrame.pipe() and allows you to
2488+
apply custom functions in a piping workflow.
2489+
2490+
Args:
2491+
_data: The data object (typically a DataFrame)
2492+
func: Function to apply to the data. ``args`` and ``kwargs`` are
2493+
passed into ``func``.
2494+
*args: Positional arguments passed into ``func``
2495+
**kwargs: Keyword arguments passed into ``func``
2496+
2497+
Returns:
2498+
The return value of ``func``
2499+
2500+
Examples:
2501+
>>> import pandas as pd
2502+
>>> import datar.all as dr
2503+
>>> from datar import f
2504+
>>> df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
2505+
>>> df >> dr.pipe(lambda x: x * 2)
2506+
a b
2507+
0 2 8
2508+
1 4 10
2509+
2 6 12
2510+
2511+
>>> # With additional arguments
2512+
>>> def add_value(df, value):
2513+
... return df + value
2514+
>>> df >> dr.pipe(add_value, 10)
2515+
a b
2516+
0 11 14
2517+
1 12 15
2518+
2 13 16
2519+
2520+
>>> # Combined with other datar functions
2521+
>>> df >> dr.select(f.a) >> dr.pipe(lambda x: x * 2)
2522+
a
2523+
0 2
2524+
1 4
2525+
2 6
2526+
"""
2527+
return func(_data, *args, **kwargs)

tests/test_pipe.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import pytest
2+
import pandas as pd
3+
from datar.all import pipe
4+
5+
6+
def test_pipe_basic_lambda():
7+
"""Test pipe with a basic lambda function"""
8+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
9+
result = df >> pipe(lambda x: x * 2)
10+
expected = pd.DataFrame({'a': [2, 4, 6], 'b': [8, 10, 12]})
11+
pd.testing.assert_frame_equal(result, expected)
12+
13+
14+
def test_pipe_with_args():
15+
"""Test pipe with additional positional arguments"""
16+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
17+
18+
def add_value(df, value):
19+
return df + value
20+
21+
result = df >> pipe(add_value, 10)
22+
expected = pd.DataFrame({'a': [11, 12, 13], 'b': [14, 15, 16]})
23+
pd.testing.assert_frame_equal(result, expected)
24+
25+
26+
def test_pipe_with_kwargs():
27+
"""Test pipe with keyword arguments"""
28+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
29+
30+
def multiply_col(df, col, factor=1):
31+
df = df.copy()
32+
df[col] = df[col] * factor
33+
return df
34+
35+
result = df >> pipe(multiply_col, 'a', factor=10)
36+
expected = pd.DataFrame({'a': [10, 20, 30], 'b': [4, 5, 6]})
37+
pd.testing.assert_frame_equal(result, expected)
38+
39+
40+
def test_pipe_with_column_selection():
41+
"""Test pipe with column operations"""
42+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
43+
44+
# Select a column and then multiply it
45+
result = df >> pipe(lambda df: df[['a']]) >> pipe(lambda x: x * 2)
46+
expected = pd.DataFrame({'a': [2, 4, 6]})
47+
pd.testing.assert_frame_equal(result, expected)
48+
49+
50+
def test_pipe_with_column_rename():
51+
"""Test pipe with column renaming (similar to issue example)"""
52+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
53+
54+
# Select a column and rename it
55+
result = df >> pipe(lambda df: df[['a']]) >> pipe(lambda df: df.rename(columns=str.upper))
56+
expected = pd.DataFrame({'A': [1, 2, 3]})
57+
pd.testing.assert_frame_equal(result, expected)
58+
59+
60+
def test_pipe_with_custom_function():
61+
"""Test pipe with a custom function that modifies the dataframe"""
62+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
63+
64+
def custom_transform(df, new_col_name, value):
65+
df = df.copy()
66+
df[new_col_name] = df['a'] + value
67+
return df
68+
69+
result = df >> pipe(custom_transform, 'c', 100)
70+
expected = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'c': [101, 102, 103]})
71+
pd.testing.assert_frame_equal(result, expected)
72+
73+
74+
def test_pipe_returns_non_dataframe():
75+
"""Test that pipe can return non-DataFrame objects"""
76+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
77+
78+
result = df >> pipe(lambda x: x['a'].sum())
79+
assert result == 6
80+
81+
82+
def test_pipe_chain_multiple():
83+
"""Test chaining multiple pipe operations"""
84+
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
85+
86+
result = (
87+
df
88+
>> pipe(lambda x: x * 2)
89+
>> pipe(lambda x: x + 1)
90+
)
91+
expected = pd.DataFrame({'a': [3, 5, 7], 'b': [9, 11, 13]})
92+
pd.testing.assert_frame_equal(result, expected)
93+
94+
95+
def test_pipe_with_set_axis_like_issue():
96+
"""Test pipe similar to the issue example with set_axis"""
97+
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
98+
99+
# Simulate the issue example: convert column names to lowercase
100+
result = df >> pipe(lambda df: df.set_axis(df.columns.str.lower(), axis=1))
101+
expected = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
102+
pd.testing.assert_frame_equal(result, expected)
103+

0 commit comments

Comments
 (0)