[Fix] Series-like writing (#130)

fsoubelet · web-flow · commit 43cb566d8d98 · 2024-03-05T13:53:00.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,39 +1,45 @@
 # TFS-Pandas Changelog
 
+## Version 3.7.3
+
+- Fixed:
+  - Fixed a regression where the writing of a `pd.Series`-like object to disk was raising an error. It is now possible again.  
+
 ## Version 3.7.2
 
 - Fixed:
-    - fixing the issues with `pandas` >= `v2.1.0` (see `tfs-pandas` `v3.7.1`) by overwriting the `_constructor_from_mgr` function.  
+  - fixing the issues with `pandas` >= `v2.1.0` (see `tfs-pandas` `v3.7.1`) by overwriting the `_constructor_from_mgr` function.  
 
 ## Version 3.7.1
 
 - Changed:
-    - The dependency on `pandas` was restricted to avoid the latest version, `2.1.0` and above as a temporary workaround to an attribute access bug that arose with it. 
+  - The dependency on `pandas` was restricted to avoid the latest version, `2.1.0` and above as a temporary workaround to an attribute access bug that arose with it.
 
 ## Version 3.7.0
 
 Minor API changes to the `TFSCollections`:
-  - the old `write_to` and `get_filename` are renamed to `_write_to` and `_get_filename` as they
+
+- the old `write_to` and `get_filename` are renamed to `_write_to` and `_get_filename` as they
     could only be accessed internally (due to the input parameters not available to the user).
     This also means, that - in case they are overwritten by a user's implementation - they need to be renamed there!!
 
-  - The column which is set as index can now also be defined manually, by overwriting the attribute `INDEX`, which defaults to `"NAME"`.
+- The column which is set as index can now also be defined manually, by overwriting the attribute `INDEX`, which defaults to `"NAME"`.
 
-  - New Functions of `TFSCollection` Instances:
-    - `get_filename(name)`: Returns the associated filename to the property with name `name`.
-    - `get_path(name)`: Return the actual file path of the property `name`
-    - `flush()`: Write the current state of the TFSDataFrames into their respective files.
-    - `write_tfs(filename, data_frame)`: Write the `data_frame` to `self.directory` with the given `filename`.
+- New Functions of `TFSCollection` Instances:
+  - `get_filename(name)`: Returns the associated filename to the property with name `name`.
+  - `get_path(name)`: Return the actual file path of the property `name`
+  - `flush()`: Write the current state of the TFSDataFrames into their respective files.
+  - `write_tfs(filename, data_frame)`: Write the `data_frame` to `self.directory` with the given `filename`.
 
-  - New Special Properties of `TFSCollection` Instances:
-    - `defined_properties`: Tuple of strings of the defined properties on this instance.
-    - `filenames` is a convenience wrapper for `get_filename()`: 
-      - When called (`filenames(exist: bool)`) returns a dictionary of the defined properties and their associated filenames.
+- New Special Properties of `TFSCollection` Instances:
+  - `defined_properties`: Tuple of strings of the defined properties on this instance.
+  - `filenames` is a convenience wrapper for `get_filename()`:
+    - When called (`filenames(exist: bool)`) returns a dictionary of the defined properties and their associated filenames.
         The `exist` boolean filters between existing files or filenames for all properties.
-      - Can also be used either `filenames.name` or `filenames[name]` to call `get_filename(name)` on the instance.
+    - Can also be used either `filenames.name` or `filenames[name]` to call `get_filename(name)` on the instance.
 
-  - Moved the define-properties functions directly into the `Tfs`-attribute marker class.
-  - Return of `None` for the `MaybeCall` class in case of attribute not found (instead of empty function, which didn't make sense).
+- Moved the define-properties functions directly into the `Tfs`-attribute marker class.
+- Return of `None` for the `MaybeCall` class in case of attribute not found (instead of empty function, which didn't make sense).
 
 ## Version 3.6.0
 
@@ -56,177 +62,184 @@ Minor API changes to the `TFSCollections`:
 ## Version 3.5.1
 
 - Fixed:
-    - Allow reading of empty lines in headers again.
+  - Allow reading of empty lines in headers again.
 
 ## Version 3.5.0
 
 - Fixed:
-  - Any empty strings ("") in a file's columns will now properly be read as such and not converted to `NaN`. 
+  - Any empty strings ("") in a file's columns will now properly be read as such and not converted to `NaN`.
 
 - Added:
   - It is now possible to only read the headers of a file by using a new function, `read_headers`. The function API is not exported at the top level of the package but is available to import from `tfs.reader`.
 
 ## Version 3.4.0
 
 - Added:
-  - The `read_tfs` and `write_tfs` functions can now handle reading / writing compressed files, see documentation for details. 
+  - The `read_tfs` and `write_tfs` functions can now handle reading / writing compressed files, see documentation for details.
 
 ## Version 3.3.1
 
 - Changed:
-    - Column types are now assigned at read time instead of later on, which should improve performance for large data frames.
+  - Column types are now assigned at read time instead of later on, which should improve performance for large data frames.
 
 ## Version 3.3.0
 
 - Added:
-    - The option is now given to the user to skip data frame validation after reading from file / before writing to file. Validation is left "on" by default, but can be turned off with a boolean argument.
+  - The option is now given to the user to skip data frame validation after reading from file / before writing to file. Validation is left "on" by default, but can be turned off with a boolean argument.
 
 - Changes:
-    - The `tfs.frame.validate` function has seen its internal logic reworked to be more efficient and users performing validation on large data frames should notice a significant performance improvement.
-    - The documentation has been expanded and improved, with notably the addition of example code snippets.
+  - The `tfs.frame.validate` function has seen its internal logic reworked to be more efficient and users performing validation on large data frames should notice a significant performance improvement.
+  - The documentation has been expanded and improved, with notably the addition of example code snippets.
 
 ## Version 3.2.1
 
 - Changed:
-    - Allow spaces in header names.
+  - Allow spaces in header names.
 
 ## Version 3.2.0
 
-- Added: 
-    - HDF5 read/write.
+- Added:
+  - HDF5 read/write.
   
 - Changed:
   - The minimum required Python version is now `3.7`.
 
 ## Version 3.1.0
 
 - Fixed:
-    - Removed dependency on depricated `numpy.str` 
+  - Removed dependency on depricated `numpy.str`
 
 - Changed:
-    - No logging of error messages internally for reading files and checking dataframes. 
+  - No logging of error messages internally for reading files and checking dataframes.
       Instead logging is either moved to `debug`-level or all info is now in the error message itself
       to be handled externally by the user.
 
 ## Version 3.0.2
 
 - Fixed:
-    - String representation of empty headers is fixed (accidentally printed 'None' before).
+  - String representation of empty headers is fixed (accidentally printed 'None' before).
 
 ## Version 3.0.1
 
 - Fixed:
-    - Merging functionality from `TfsDataFrame.append`, `TfsDataFrame.join`, `TfsDataFrame.merge` and `tfs.concat` do not crash anymore when encountering a `pandas.DataFrame` (or more for `tfs.concat`) in their input. Signatures have been updated and tests were added for this behavior.
+  - Merging functionality from `TfsDataFrame.append`, `TfsDataFrame.join`, `TfsDataFrame.merge` and `tfs.concat` do not crash anymore when encountering a `pandas.DataFrame` (or more for `tfs.concat`) in their input. Signatures have been updated and tests were added for this behavior.
 
 ## Version 3.0.0
 
 A long-standing issue where merging functionality used on `TfsDataFrame` (through `.merge` or `pandas.concat` for instance) would cause them to be cast back to `pandas.DataFrame` and lose their headers has been patched.
 
 - Breaking changes:
-    - The internal API has been reworked for clarity and consistency. Note that anyone previously using the high-level exports `tfs.read`, `tfs.write` and `tfs.TfsDataFrame` **will not be affected**.
+  - The internal API has been reworked for clarity and consistency. Note that anyone previously using the high-level exports `tfs.read`, `tfs.write` and `tfs.TfsDataFrame` **will not be affected**.
 
 - Added:
-    - The `TfsDataFrame` class now has new `.append`, `.join` and `.merge` methods wrapping the inherited methods of the same name and fixing the aforementioned issue.
-    - A `tfs.frame.concat` function, exported as `tfs.concat`, has been added to wrap `pandas.concat` and fix the aforementioned issue.
-    - A `tfs.frame.merge_headers` function has been added.
-    - Top level exports are now: `tfs.TfsDataFrame`, `tfs.read`, `tfs.write` and `tfs.concat`.
+  - The `TfsDataFrame` class now has new `.append`, `.join` and `.merge` methods wrapping the inherited methods of the same name and fixing the aforementioned issue.
+  - A `tfs.frame.concat` function, exported as `tfs.concat`, has been added to wrap `pandas.concat` and fix the aforementioned issue.
+  - A `tfs.frame.merge_headers` function has been added.
+  - Top level exports are now: `tfs.TfsDataFrame`, `tfs.read`, `tfs.write` and `tfs.concat`.
 
 - Changes:
-    - The `tfs.frame.validate` function is now a public-facing documented API and may be used stably.
-    - The `write_tfs` function now appends an `EOL` (`\n`) at the end of the file when writing out for visual clarity and readability. This is a purely cosmetic and **does not** change functionality / compatibility of the files.
-    - Documentation and README have been updated and cleared up.
+  - The `tfs.frame.validate` function is now a public-facing documented API and may be used stably.
+  - The `write_tfs` function now appends an `EOL` (`\n`) at the end of the file when writing out for visual clarity and readability. This is a purely cosmetic and **does not** change functionality / compatibility of the files.
+  - Documentation and README have been updated and cleared up.
 
 Please do refer to the documentation for the use of the new merging functionality to be aware of caveats, especially when merging headers.
 
-
 ## Version 2.1.0
 
 - Changes:
-    - The parsing in `read_tfs` has been reworked to make use of `pandas`'s C engine, resulting in drastic performance improvements when loading files. No functionality was lost or changed.
+  - The parsing in `read_tfs` has been reworked to make use of `pandas`'s C engine, resulting in drastic performance improvements when loading files. No functionality was lost or changed.
 
 ## Version 2.0.3
 
 - Fixed:
-    - Took care of a numpy deprecation warning when using `np.str`, which should not appear anymore for users.
+  - Took care of a numpy deprecation warning when using `np.str`, which should not appear anymore for users.
 
 - Changes:
-    - Prior to version `2.0.3`, reading and writing would raise a `TfsFormatError` in case of non-unique indices or columns. From now on, this behavior is an option in `read_tfs` and `write_tfs`called `non_unique_bahvior` which by default is set to log a warning. If explicitely asked by the user, the failed check will raise a `TfsFormatError`.
+  - Prior to version `2.0.3`, reading and writing would raise a `TfsFormatError` in case of non-unique indices or columns. From now on, this behavior is an option in `read_tfs` and `write_tfs`called `non_unique_bahvior` which by default is set to log a warning. If explicitely asked by the user, the failed check will raise a `TfsFormatError`.
 
 ## Version 2.0.2
+
 - Fixed:
-    - Proper error on non-string columns
-    - Writing numeric-only mixed type dataframes bug
+  - Proper error on non-string columns
+  - Writing numeric-only mixed type dataframes bug
 
 ## Version 2.0.1
+
 - Fixed:
-    - No longer warns on MAD-X styled string column types (`%[num]s`).
-    - Documentation is up-to-date, and plays nicely with `Sphinx`'s parsing.
-    - Fix a wrong type hint.
+  - No longer warns on MAD-X styled string column types (`%[num]s`).
+  - Documentation is up-to-date, and plays nicely with `Sphinx`'s parsing.
+  - Fix a wrong type hint.
 
 ## Version 2.0.0
+
 - Breaking Changes:
-    - `FixedColumn`, `FixedColumnCollection` and `FixedTfs` have been removed from the package
-    - Objects are not converted to strings upon read anymore, and will raise an error
-    - Minimum pandas version is 1.0
+  - `FixedColumn`, `FixedColumnCollection` and `FixedTfs` have been removed from the package
+  - Objects are not converted to strings upon read anymore, and will raise an error
+  - Minimum pandas version is 1.0
 
 - Fixed:
-    - No longer writes an empty line to file in case of empty headers
-    - "Planed" dataframes capitalize plane key attributes to be consistent with other `pylhc` packages, however they can be accessed with and without capitalizing your query.
+  - No longer writes an empty line to file in case of empty headers
+  - "Planed" dataframes capitalize plane key attributes to be consistent with other `pylhc` packages, however they can be accessed with and without capitalizing your query.
 
 - Changes:
-    - Minimum required `numpy` version is now 1.19
-    - TfsDataFrames now automatically cast themselves to pandas datatypes using `.convert_dtypes()`
-    - Lighter dependency matrix
-    - Full testing of supported Python versions across linux, macOS and windows systems through Github Actions
+  - Minimum required `numpy` version is now 1.19
+  - TfsDataFrames now automatically cast themselves to pandas datatypes using `.convert_dtypes()`
+  - Lighter dependency matrix
+  - Full testing of supported Python versions across linux, macOS and windows systems through Github Actions
 
 ## Version 1.0.5
+
 - Fixed:
-    - Bug with testing for headers, also in pandas DataFrames
-    - Same testing method for all data-frame comparisons
-    - Some minor fixes
+  - Bug with testing for headers, also in pandas DataFrames
+  - Same testing method for all data-frame comparisons
+  - Some minor fixes
 
 - Added:
-    - Testing of writing of pandas DataFrames
-
+  - Testing of writing of pandas DataFrames
 
 ## Version 1.0.4
- - Added:
-   - support for pathlib Paths
-   - strings with spaces support (all strings in data are quoted)
-   - more validation checks (no spaces in header/columns)
-   - nicer string representation
-   - left-align of index-column
 
- - Removed:
-   - `.indx` from class (use `index="NAME"` instead)
+- Added:
+  - support for pathlib Paths
+  - strings with spaces support (all strings in data are quoted)
+  - more validation checks (no spaces in header/columns)
+  - nicer string representation
+  - left-align of index-column
+
+- Removed:
+  - `.indx` from class (use `index="NAME"` instead)
 
- - Fixed:
-   - Writing of empty dataframes
-   - Doc imports
-   - Minor bugfixes 
+- Fixed:
+  - Writing of empty dataframes
+  - Doc imports
+  - Minor bugfixes
 
 ## Version 1.0.3
- - Fixed:
-   - From relative to absolute imports (IMPORTANT FIX!!)
+
+- Fixed:
+  - From relative to absolute imports (IMPORTANT FIX!!)
 
 ## Version 1.0.2
- - Fixed:
-   - Additional index column after writing is removed again
-   - Renamded sigificant_numbers to significant_digits
-   - significant_digits throws proper error if zero-error is given
 
- - Added:
-   - Fixed Dataframe Class
-   - Type Annotations
+- Fixed:
+  - Additional index column after writing is removed again
+  - Renamded sigificant_numbers to significant_digits
+  - significant_digits throws proper error if zero-error is given
+
+- Added:
+  - Fixed Dataframe Class
+  - Type Annotations
 
 ## Version 1.0.1
- - Fixed: 
-    - Metaclass-Bug in Collections
 
- - Added: 
-    - Additional Unit Tests
-    - Versioning
-    - Changelog
+- Fixed:
+  - Metaclass-Bug in Collections
+
+- Added:
+  - Additional Unit Tests
+  - Versioning
+  - Changelog
 
 ## Version 1.0.0
- - Initial Release
+
+- Initial Release
diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -2,7 +2,6 @@
 import pathlib
 import random
 import string
-import sys
 
 import numpy
 import pandas
@@ -36,6 +35,22 @@ def test_tfs_write_empty_columns_dataframe(self, tmp_path):
         assert_frame_equal(df, new)
         assert_dict_equal(df.headers, new.headers, compare_keys=True)
 
+    def test_tfs_write_series_like_dataframe(self, tmp_path):
+        """Write-read a pandas.Series-like to disk and make sure all goes right."""
+        df = pandas.Series([1,2,3,4,5])
+
+        write_location = tmp_path / "test.tfs"
+        test_headers = {"test": 1, "test_string": "test_write_series_like"}
+        write_tfs(write_location, df, headers_dict=test_headers, save_index=True)
+        assert write_location.is_file()
+
+        # Read data will be TfsDataFrame, so in pd.DataFrame-like form
+        # For the comparison we only compare the column (as Series-like) and accept that the
+        # user sees a little difference in the data format (Series vs DataFrame with 1 column)
+        new = read_tfs(write_location)
+        assert_series_equal(df, new["0"], check_names=False)  
+        assert_dict_equal(test_headers, new.headers, compare_keys=True)
+
     def test_madx_reads_written_tfsdataframes(self, _bigger_tfs_dataframe, tmp_path):
         dframe = _bigger_tfs_dataframe
         dframe.headers["TYPE"] = "TWISS"  # MAD-X complains on TFS files with no "TYPE" header
diff --git a/tfs/__init__.py b/tfs/__init__.py
@@ -10,7 +10,7 @@
 __title__ = "tfs-pandas"
 __description__ = "Read and write tfs files."
 __url__ = "https://github.com/pylhc/tfs"
-__version__ = "3.7.2"
+__version__ = "3.7.3"
 __author__ = "pylhc"
 __author_email__ = "pylhc@github.com"
 __license__ = "MIT"
diff --git a/tfs/writer.py b/tfs/writer.py
@@ -97,7 +97,12 @@ def write_tfs(
     """
     left_align_first_column = False
     tfs_file_path = pathlib.Path(tfs_file_path)
-    
+
+    # Force a conversion from pd.Series-like to TfsDataFrame to avoid empty columns issues
+    if not isinstance(data_frame, (TfsDataFrame, pd.DataFrame)):
+        data_frame = TfsDataFrame(data_frame)
+        data_frame.columns = data_frame.columns.astype(str)  # need column names to be strings
+
     if validate:
         validate_frame(data_frame, f"to be written in {tfs_file_path.absolute()}", non_unique_behavior)