Skip to content

Commit 0b567d1

Browse files
authored
ensure merging functions' compatibility with pandas DataFrames (#91)
1 parent 754b7e5 commit 0b567d1

File tree

3 files changed

+103
-9
lines changed

3 files changed

+103
-9
lines changed

tests/test_frame.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,19 @@ def test_correct_appending(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_h
110110
assert_dict_equal(result.headers, merge_headers(dframe_x.headers, dframe_y.headers, how=how_headers))
111111
assert_frame_equal(result, pd.DataFrame(dframe_x).append(pd.DataFrame(dframe_y)))
112112

113+
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
114+
def test_appending_accepts_pandas_dataframe(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers):
115+
dframe_x = tfs.read(_tfs_file_x_pathlib)
116+
dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here
117+
result = dframe_x.append(dframe_y, how_headers=how_headers)
118+
119+
assert isinstance(result, TfsDataFrame)
120+
assert isinstance(result.headers, OrderedDict)
121+
122+
# using empty OrderedDict here as it's what dframe_y is getting when converted in the call
123+
assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
124+
assert_frame_equal(result, pd.DataFrame(dframe_x).append(dframe_y)) # dframe_y already pandas
125+
113126

114127
class TestTfsDataFrameJoining:
115128
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -127,6 +140,25 @@ def test_correct_joining(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_hea
127140
result, pd.DataFrame(dframe_x).join(pd.DataFrame(dframe_y), lsuffix=lsuffix, rsuffix=rsuffix)
128141
)
129142

143+
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
144+
@pytest.mark.parametrize("lsuffix", ["left", "_x"])
145+
@pytest.mark.parametrize("rsuffix", ["right", "_y"])
146+
def test_joining_accepts_pandas_dataframe(
147+
self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, lsuffix, rsuffix
148+
):
149+
dframe_x = tfs.read(_tfs_file_x_pathlib)
150+
dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here
151+
result = dframe_x.join(dframe_y, how_headers=how_headers, lsuffix=lsuffix, rsuffix=rsuffix)
152+
153+
assert isinstance(result, TfsDataFrame)
154+
assert isinstance(result.headers, OrderedDict)
155+
156+
# using empty OrderedDict here as it's what dframe_y is getting when converted in the call
157+
assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
158+
assert_frame_equal(
159+
result, pd.DataFrame(dframe_x).join(pd.DataFrame(dframe_y), lsuffix=lsuffix, rsuffix=rsuffix)
160+
)
161+
130162

131163
class TestTfsDataFrameMerging:
132164
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -142,6 +174,23 @@ def test_correct_merging(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_hea
142174
assert_dict_equal(result.headers, merge_headers(dframe_x.headers, dframe_y.headers, how=how_headers))
143175
assert_frame_equal(result, pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y), how=how, on=on))
144176

177+
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
178+
@pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
179+
@pytest.mark.parametrize("on", ["NAME", "S", "NUMBER", "CO", "CORMS", "BPM_RES"])
180+
def test_merging_accepts_pandas_dataframe(
181+
self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, how, on
182+
):
183+
dframe_x = tfs.read(_tfs_file_x_pathlib)
184+
dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here
185+
result = dframe_x.merge(dframe_y, how_headers=how_headers, how=how, on=on)
186+
187+
assert isinstance(result, TfsDataFrame)
188+
assert isinstance(result.headers, OrderedDict)
189+
190+
# using empty OrderedDict here as it's what dframe_y is getting when converted in the call
191+
assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
192+
assert_frame_equal(result, pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y), how=how, on=on))
193+
145194

146195
class TestTfsDataFramesConcatenating:
147196
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -160,6 +209,28 @@ def test_correct_concatenating(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, h
160209
assert_dict_equal(result.headers, reduce(merger, all_headers))
161210
assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
162211

212+
@pytest.mark.parametrize("how_headers", [None, "left", "right"])
213+
@pytest.mark.parametrize("axis", [0, 1])
214+
@pytest.mark.parametrize("join", ["inner", "outer"])
215+
def test_concatenating_accepts_pandas_dataframes(
216+
self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, axis, join
217+
):
218+
dframe_x = tfs.read(_tfs_file_x_pathlib)
219+
dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here
220+
objs = [dframe_x] * 4 + [dframe_y] * 4 # now has a mix of TfsDataFrames and pandas.DataFrames
221+
result = concat(objs, how_headers=how_headers, axis=axis, join=join)
222+
223+
merger = partial(merge_headers, how=how_headers)
224+
# all_headers = (tfsdframe.headers for tfsdframe in objs)
225+
assert isinstance(result, TfsDataFrame)
226+
assert isinstance(result.headers, OrderedDict)
227+
228+
all_headers = [ # empty OrderedDicts here as it's what objects are getting when converted in the call
229+
dframe.headers if isinstance(dframe, TfsDataFrame) else OrderedDict() for dframe in objs
230+
]
231+
assert_dict_equal(result.headers, reduce(merger, all_headers))
232+
assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
233+
163234

164235
# ------ Fixtures ------ #
165236

tfs/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
__title__ = "tfs-pandas"
1010
__description__ = "Read and write tfs files."
1111
__url__ = "https://github.com/pylhc/tfs"
12-
__version__ = "3.0.0"
12+
__version__ = "3.0.1"
1313
__author__ = "pylhc"
1414
__author_email__ = "[email protected]"
1515
__license__ = "MIT"

tfs/frame.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,19 @@ def __repr__(self) -> str:
8282
return f"{headers_string}{super().__repr__()}"
8383

8484
def append(
85-
self, other: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
85+
self,
86+
other: Union["TfsDataFrame", pd.DataFrame],
87+
how_headers: str = None,
88+
new_headers: dict = None,
89+
**kwargs,
8690
) -> "TfsDataFrame":
8791
"""
8892
Append rows of the other ``TfsDataFrame`` to the end of caller, returning a new object. Data
8993
manipulation is done by the ``pandas.Dataframe`` method of the same name. Resulting headers are
9094
either merged according to the provided **how_headers** method or as given via **new_headers**.
9195
9296
Args:
93-
other (TfsDataFrame): The ``TfsDataFrame`` to append to the caller.
97+
other (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to append to the caller.
9498
how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
9599
Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
96100
**new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -108,6 +112,10 @@ def append(
108112
A new ``TfsDataFrame`` with the appended data and merged headers.
109113
"""
110114
LOGGER.debug("Appending data through 'pandas'")
115+
if not hasattr(other, "headers"):
116+
LOGGER.debug("Converting 'other' to TfsDataFrame for appending")
117+
other = TfsDataFrame(other) # so we accept pandas.DataFrame input here
118+
111119
dframe = super().append(other, **kwargs)
112120

113121
LOGGER.debug("Determining headers")
@@ -119,15 +127,19 @@ def append(
119127
return TfsDataFrame(data=dframe, headers=new_headers)
120128

121129
def join(
122-
self, other: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
130+
self,
131+
other: Union["TfsDataFrame", pd.DataFrame],
132+
how_headers: str = None,
133+
new_headers: dict = None,
134+
**kwargs,
123135
) -> "TfsDataFrame":
124136
"""
125137
Join columns of another ``TfsDataFrame``. Data manipulation is done by the ``pandas.Dataframe``
126138
method of the same name. Resulting headers are either merged according to the provided
127139
**how_headers** method or as given via **new_headers**.
128140
129141
Args:
130-
other (TfsDataFrame): The ``TfsDataFrame`` to join into the caller.
142+
other (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to join into the caller.
131143
how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
132144
Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
133145
**new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -145,6 +157,9 @@ def join(
145157
A new ``TfsDataFrame`` with the joined columns and merged headers.
146158
"""
147159
LOGGER.debug("Joining data through 'pandas'")
160+
if not hasattr(other, "headers"):
161+
LOGGER.debug("Converting 'other' to TfsDataFrame for joining")
162+
other = TfsDataFrame(other) # so we accept pandas.DataFrame input here
148163
dframe = super().join(other, **kwargs)
149164

150165
LOGGER.debug("Determining headers")
@@ -156,15 +171,19 @@ def join(
156171
return TfsDataFrame(data=dframe, headers=new_headers)
157172

158173
def merge(
159-
self, right: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
174+
self,
175+
right: Union["TfsDataFrame", pd.DataFrame],
176+
how_headers: str = None,
177+
new_headers: dict = None,
178+
**kwargs,
160179
) -> "TfsDataFrame":
161180
"""
162181
Merge ``TfsDataFrame`` objects with a database-style join. Data manipulation is done by the
163182
``pandas.Dataframe`` method of the same name. Resulting headers are either merged according to the
164183
provided **how_headers** method or as given via **new_headers**.
165184
166185
Args:
167-
right (TfsDataFrame): The ``TfsDataFrame`` to merge with the caller.
186+
right (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to merge with the caller.
168187
how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
169188
Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
170189
**new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -182,6 +201,9 @@ def merge(
182201
A new ``TfsDataFrame`` with the merged data and merged headers.
183202
"""
184203
LOGGER.debug("Merging data through 'pandas'")
204+
if not hasattr(right, "headers"):
205+
LOGGER.debug("Converting 'right' to TfsDataFrame for merging")
206+
right = TfsDataFrame(right) # so we accept pandas.DataFrame input here
185207
dframe = super().merge(right, **kwargs)
186208

187209
LOGGER.debug("Determining headers")
@@ -228,7 +250,7 @@ def merge_headers(headers_left: dict, headers_right: dict, how: str) -> OrderedD
228250

229251

230252
def concat(
231-
objs: Sequence[TfsDataFrame],
253+
objs: Sequence[Union[TfsDataFrame, pd.DataFrame]],
232254
how_headers: str = None,
233255
new_headers: dict = None,
234256
**kwargs,
@@ -245,7 +267,7 @@ def concat(
245267
**how_headers** and **new_headers** as ``None`` (their defaults) to end up with empty headers.
246268
247269
Args:
248-
objs (Sequence[TfsDataFrame]): the ``TfsDataFrame`` objects to be concatenated.
270+
objs (Sequence[Union[TfsDataFrame, pd.DataFrame]]): the ``TfsDataFrame`` objects to be concatenated.
249271
how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
250272
Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
251273
**new_headers** is not provided, the final headers will be empty. Case insensitive, defaults to
@@ -263,6 +285,7 @@ def concat(
263285
A new ``TfsDataFrame`` with the merged data and merged headers.
264286
"""
265287
LOGGER.debug("Concatenating data through 'pandas'")
288+
objs = [dframe if hasattr(dframe, "headers") else TfsDataFrame(dframe) for dframe in objs]
266289
dframe = pd.concat(objs, **kwargs)
267290

268291
LOGGER.debug("Determining headers")

0 commit comments

Comments
 (0)