ensure merging functions' compatibility with pandas DataFrames (#91)

fsoubelet · web-flow · commit 0b567d11f89e · 2021-09-30T14:33:02.000Z
diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -110,6 +110,19 @@ def test_correct_appending(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_h
         assert_dict_equal(result.headers, merge_headers(dframe_x.headers, dframe_y.headers, how=how_headers))
         assert_frame_equal(result, pd.DataFrame(dframe_x).append(pd.DataFrame(dframe_y)))
 
+    @pytest.mark.parametrize("how_headers", [None, "left", "right"])
+    def test_appending_accepts_pandas_dataframe(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers):
+        dframe_x = tfs.read(_tfs_file_x_pathlib)
+        dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
+        result = dframe_x.append(dframe_y, how_headers=how_headers)
+
+        assert isinstance(result, TfsDataFrame)
+        assert isinstance(result.headers, OrderedDict)
+
+        # using empty OrderedDict here as it's what dframe_y is getting when converted in the call
+        assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
+        assert_frame_equal(result, pd.DataFrame(dframe_x).append(dframe_y))  # dframe_y already pandas
+
 
 class TestTfsDataFrameJoining:
     @pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -127,6 +140,25 @@ def test_correct_joining(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_hea
             result, pd.DataFrame(dframe_x).join(pd.DataFrame(dframe_y), lsuffix=lsuffix, rsuffix=rsuffix)
         )
 
+    @pytest.mark.parametrize("how_headers", [None, "left", "right"])
+    @pytest.mark.parametrize("lsuffix", ["left", "_x"])
+    @pytest.mark.parametrize("rsuffix", ["right", "_y"])
+    def test_joining_accepts_pandas_dataframe(
+        self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, lsuffix, rsuffix
+    ):
+        dframe_x = tfs.read(_tfs_file_x_pathlib)
+        dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
+        result = dframe_x.join(dframe_y, how_headers=how_headers, lsuffix=lsuffix, rsuffix=rsuffix)
+
+        assert isinstance(result, TfsDataFrame)
+        assert isinstance(result.headers, OrderedDict)
+
+        # using empty OrderedDict here as it's what dframe_y is getting when converted in the call
+        assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
+        assert_frame_equal(
+            result, pd.DataFrame(dframe_x).join(pd.DataFrame(dframe_y), lsuffix=lsuffix, rsuffix=rsuffix)
+        )
+
 
 class TestTfsDataFrameMerging:
     @pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -142,6 +174,23 @@ def test_correct_merging(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_hea
         assert_dict_equal(result.headers, merge_headers(dframe_x.headers, dframe_y.headers, how=how_headers))
         assert_frame_equal(result, pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y), how=how, on=on))
 
+    @pytest.mark.parametrize("how_headers", [None, "left", "right"])
+    @pytest.mark.parametrize("how", ["left", "right", "outer", "inner"])
+    @pytest.mark.parametrize("on", ["NAME", "S", "NUMBER", "CO", "CORMS", "BPM_RES"])
+    def test_merging_accepts_pandas_dataframe(
+        self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, how, on
+    ):
+        dframe_x = tfs.read(_tfs_file_x_pathlib)
+        dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
+        result = dframe_x.merge(dframe_y, how_headers=how_headers, how=how, on=on)
+
+        assert isinstance(result, TfsDataFrame)
+        assert isinstance(result.headers, OrderedDict)
+
+        # using empty OrderedDict here as it's what dframe_y is getting when converted in the call
+        assert_dict_equal(result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
+        assert_frame_equal(result, pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y), how=how, on=on))
+
 
 class TestTfsDataFramesConcatenating:
     @pytest.mark.parametrize("how_headers", [None, "left", "right"])
@@ -160,6 +209,28 @@ def test_correct_concatenating(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, h
         assert_dict_equal(result.headers, reduce(merger, all_headers))
         assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
 
+    @pytest.mark.parametrize("how_headers", [None, "left", "right"])
+    @pytest.mark.parametrize("axis", [0, 1])
+    @pytest.mark.parametrize("join", ["inner", "outer"])
+    def test_concatenating_accepts_pandas_dataframes(
+        self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, axis, join
+    ):
+        dframe_x = tfs.read(_tfs_file_x_pathlib)
+        dframe_y = pd.DataFrame(tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
+        objs = [dframe_x] * 4 + [dframe_y] * 4  # now has a mix of TfsDataFrames and pandas.DataFrames
+        result = concat(objs, how_headers=how_headers, axis=axis, join=join)
+
+        merger = partial(merge_headers, how=how_headers)
+        # all_headers = (tfsdframe.headers for tfsdframe in objs)
+        assert isinstance(result, TfsDataFrame)
+        assert isinstance(result.headers, OrderedDict)
+
+        all_headers = [  # empty OrderedDicts here as it's what objects are getting when converted in the call
+            dframe.headers if isinstance(dframe, TfsDataFrame) else OrderedDict() for dframe in objs
+        ]
+        assert_dict_equal(result.headers, reduce(merger, all_headers))
+        assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
+
 
 # ------ Fixtures ------ #
 
diff --git a/tfs/__init__.py b/tfs/__init__.py
@@ -9,7 +9,7 @@
 __title__ = "tfs-pandas"
 __description__ = "Read and write tfs files."
 __url__ = "https://github.com/pylhc/tfs"
-__version__ = "3.0.0"
+__version__ = "3.0.1"
 __author__ = "pylhc"
 __author_email__ = "pylhc@github.com"
 __license__ = "MIT"
diff --git a/tfs/frame.py b/tfs/frame.py
@@ -82,15 +82,19 @@ def __repr__(self) -> str:
         return f"{headers_string}{super().__repr__()}"
 
     def append(
-        self, other: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
+        self,
+        other: Union["TfsDataFrame", pd.DataFrame],
+        how_headers: str = None,
+        new_headers: dict = None,
+        **kwargs,
     ) -> "TfsDataFrame":
         """
         Append rows of the other ``TfsDataFrame`` to the end of caller, returning a new object. Data
         manipulation is done by the ``pandas.Dataframe`` method of the same name. Resulting headers are
         either merged according to the provided **how_headers** method or as given via **new_headers**.
 
         Args:
-            other (TfsDataFrame): The ``TfsDataFrame`` to append to the caller.
+            other (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to append to the caller.
             how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
                 Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
                 **new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -108,6 +112,10 @@ def append(
             A new ``TfsDataFrame`` with the appended data and merged headers.
         """
         LOGGER.debug("Appending data through 'pandas'")
+        if not hasattr(other, "headers"):
+            LOGGER.debug("Converting 'other' to TfsDataFrame for appending")
+            other = TfsDataFrame(other)  # so we accept pandas.DataFrame input here
+
         dframe = super().append(other, **kwargs)
 
         LOGGER.debug("Determining headers")
@@ -119,15 +127,19 @@ def append(
         return TfsDataFrame(data=dframe, headers=new_headers)
 
     def join(
-        self, other: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
+        self,
+        other: Union["TfsDataFrame", pd.DataFrame],
+        how_headers: str = None,
+        new_headers: dict = None,
+        **kwargs,
     ) -> "TfsDataFrame":
         """
         Join columns of another ``TfsDataFrame``. Data manipulation is done by the ``pandas.Dataframe``
         method of the same name. Resulting headers are either merged according to the provided
         **how_headers** method or as given via **new_headers**.
 
         Args:
-            other (TfsDataFrame): The ``TfsDataFrame`` to join into the caller.
+            other (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to join into the caller.
             how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
                 Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
                 **new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -145,6 +157,9 @@ def join(
             A new ``TfsDataFrame`` with the joined columns and merged headers.
         """
         LOGGER.debug("Joining data through 'pandas'")
+        if not hasattr(other, "headers"):
+            LOGGER.debug("Converting 'other' to TfsDataFrame for joining")
+            other = TfsDataFrame(other)  # so we accept pandas.DataFrame input here
         dframe = super().join(other, **kwargs)
 
         LOGGER.debug("Determining headers")
@@ -156,15 +171,19 @@ def join(
         return TfsDataFrame(data=dframe, headers=new_headers)
 
     def merge(
-        self, right: "TfsDataFrame", how_headers: str = None, new_headers: dict = None, **kwargs
+        self,
+        right: Union["TfsDataFrame", pd.DataFrame],
+        how_headers: str = None,
+        new_headers: dict = None,
+        **kwargs,
     ) -> "TfsDataFrame":
         """
         Merge ``TfsDataFrame`` objects with a database-style join. Data manipulation is done by the
         ``pandas.Dataframe`` method of the same name. Resulting headers are either merged according to the
         provided **how_headers** method or as given via **new_headers**.
 
         Args:
-            right (TfsDataFrame): The ``TfsDataFrame`` to merge with the caller.
+            right (Union[TfsDataFrame, pd.DataFrame]): The ``TfsDataFrame`` to merge with the caller.
             how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
                 Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
                 **new_headers** is not provided, the final headers will be empty. Case insensitive,
@@ -182,6 +201,9 @@ def merge(
             A new ``TfsDataFrame`` with the merged data and merged headers.
         """
         LOGGER.debug("Merging data through 'pandas'")
+        if not hasattr(right, "headers"):
+            LOGGER.debug("Converting 'right' to TfsDataFrame for merging")
+            right = TfsDataFrame(right)  # so we accept pandas.DataFrame input here
         dframe = super().merge(right, **kwargs)
 
         LOGGER.debug("Determining headers")
@@ -228,7 +250,7 @@ def merge_headers(headers_left: dict, headers_right: dict, how: str) -> OrderedD
 
 
 def concat(
-    objs: Sequence[TfsDataFrame],
+    objs: Sequence[Union[TfsDataFrame, pd.DataFrame]],
     how_headers: str = None,
     new_headers: dict = None,
     **kwargs,
@@ -245,7 +267,7 @@ def concat(
         **how_headers** and **new_headers** as ``None`` (their defaults) to end up with empty headers.
 
     Args:
-        objs (Sequence[TfsDataFrame]): the ``TfsDataFrame`` objects to be concatenated.
+        objs (Sequence[Union[TfsDataFrame, pd.DataFrame]]): the ``TfsDataFrame`` objects to be concatenated.
         how_headers (str): Type of merge to be performed for the headers. Either **left** or **right**.
             Refer to :func:`tfs.frame.merge_headers` for behavior. If ``None`` is provided and
             **new_headers** is not provided, the final headers will be empty. Case insensitive, defaults to
@@ -263,6 +285,7 @@ def concat(
         A new ``TfsDataFrame`` with the merged data and merged headers.
     """
     LOGGER.debug("Concatenating data through 'pandas'")
+    objs = [dframe if hasattr(dframe, "headers") else TfsDataFrame(dframe) for dframe in objs]
     dframe = pd.concat(objs, **kwargs)
 
     LOGGER.debug("Determining headers")