From 1c0f62b0719435ba486b21c8fd4033d700e54a4e Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 14:41:05 +0200 Subject: [PATCH 1/6] ruff rules --- pyproject.toml | 59 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0a3a3c3f..b8c3744f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,9 +86,7 @@ repository = "https://github.com/pylhc/tfs" documentation = "https://pylhc.github.io/tfs/ " changelog = "https://github.com/pylhc/tfs/blob/master/CHANGELOG.md" - -[tool.ruff] -target-version = "py310" # Assume Python 3.10+ +# ----- Tests Configuration ----- # [tool.pytest.ini_options] addopts = "--cov-report=xml --cov-report term-missing --cov-config=pyproject.toml --cov=tfs" @@ -98,4 +96,57 @@ testpaths = ["tests"] exclude_also = [ "if TYPE_CHECKING:", # do not count type checking imports (ignored at runtime) for coverage "except ImportError:", # do not count missing optional dependencies set to None, we monkeypatch and test that -] +] + +# ----- Dev Tools Configuration ----- # + +[tool.ruff] +exclude = [ + ".eggs", + ".git", + ".mypy_cache", + ".venv", + "_build", + "build", + "dist", +] + +# Assume Python 3.10+ +target-version = "py310" + +line-length = 100 +indent-width = 4 + +[tool.ruff.lint] +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" +ignore = [ + "E501", # line too long + "FBT001", # boolean-type-hint-positional-argument + "FBT002", # boolean-default-value-positional-argument + "PT019", # pytest-fixture-param-without-value (but suggested solution fails) +] +extend-select = [ + "F", # Pyflakes rules + "W", # PyCodeStyle warnings + "E", # PyCodeStyle errors + "I", # Sort imports properly + "A", # Detect shadowed builtins + "N", # enforce naming conventions, e.g. ClassName vs function_name + "UP", # Warn if certain things can changed due to newer Python versions + "C4", # Catch incorrect use of comprehensions, dict, list, etc + "FA", # Enforce from __future__ import annotations + "FBT", # detect boolean traps + "ISC", # Good use of string concatenation + "BLE", # disallow catch-all exceptions + "ICN", # Use common import conventions + "RET", # Good return practices + "SIM", # Common simplification rules + "TID", # Some good import practices + "TC", # Enforce importing certain types in a TYPE_CHECKING block + "PTH", # Use pathlib instead of os.path + "NPY", # Some numpy-specific things +] +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] From 9806fc42ba64723ce768101bd526ed15c0af48ef Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 14:46:10 +0200 Subject: [PATCH 2/6] formatting and simple rules fix --- doc/conf.py | 2 +- pyproject.toml | 2 +- tests/test_collection.py | 11 +++++------ tests/test_compression.py | 3 ++- tests/test_frame.py | 11 ++++++++++- tests/test_reader.py | 8 ++++++-- tests/test_testing.py | 1 - tests/test_validation.py | 17 ++++++++++++----- tests/test_writer.py | 37 ++++++++++++++++++++++++++++--------- tfs/frame.py | 5 ++++- tfs/reader.py | 2 +- tfs/testing.py | 4 +--- tfs/tools.py | 4 +++- tfs/writer.py | 18 +++++++++++++----- 14 files changed, 87 insertions(+), 38 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index e89fd2fe..06654990 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -99,7 +99,7 @@ def about_package(init_posixpath: pathlib.Path) -> dict: # Override link in 'Edit on Github' rst_prolog = f""" -:github_url: {ABOUT_TFS['__url__']} +:github_url: {ABOUT_TFS["__url__"]} """ # The version info for the project you're documenting, acts as replacement for diff --git a/pyproject.toml b/pyproject.toml index b8c3744f..578302df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,7 +114,7 @@ exclude = [ # Assume Python 3.10+ target-version = "py310" -line-length = 100 +line-length = 110 indent-width = 4 [tool.ruff.lint] diff --git a/tests/test_collection.py b/tests/test_collection.py index 938419b9..14988f21 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -21,7 +21,6 @@ def _get_filename(self, template, plane=""): class TestRead: - def test_read_pathlib_input( self, _input_dir_pathlib: pathlib.Path, _tfs_x: TfsDataFrame, _tfs_y: TfsDataFrame ): @@ -48,13 +47,13 @@ def test_read_str_input(self, _input_dir_str: str, _tfs_x: TfsDataFrame, _tfs_y: class TestWrite: - def test_write(self, _tfs_x: TfsDataFrame, _tfs_y: TfsDataFrame, tmp_path): c = CollectionTest(tmp_path) file_x_path = tmp_path / "nofile_x.tfs" assert not file_x_path.is_file() - c.nofile_x = _tfs_y # only assigns dataframe without writing (use _tfs_y so that we can set _tfs_x below) + # only assigns dataframe without writing (use _tfs_y so that we can set _tfs_x below) + c.nofile_x = _tfs_y assert not file_x_path.is_file() assert_tfs_frame_equal(_tfs_y, c.nofile_x) @@ -130,7 +129,9 @@ def test_buffer_flush(self, _input_dir_str: str, _tfs_x: TfsDataFrame, _tfs_y: T assert tfs_x_after_flush.loc["BPMSX.4L2.B1", "NUMBER"] == -199 assert tfs_y_after_flush.loc["BPMSX.4L2.B1", "NUMBER"] == -19 - def test_buffer_flush_nowrite(self, _input_dir_str: str, _tfs_x: TfsDataFrame, _tfs_y: TfsDataFrame, tmp_path): + def test_buffer_flush_nowrite( + self, _input_dir_str: str, _tfs_x: TfsDataFrame, _tfs_y: TfsDataFrame, tmp_path + ): c = CollectionTest(tmp_path, allow_write=True) c.file_x = _tfs_x.copy() @@ -155,7 +156,6 @@ def test_buffer_flush_nowrite(self, _input_dir_str: str, _tfs_x: TfsDataFrame, _ class TestFilenames: - def test_tfscollection_getfilename_not_implemented(self): with pytest.raises(NotImplementedError): TfsCollection._get_filename("doesnt matter") # noqa: SLF001 @@ -206,7 +206,6 @@ def test_get_path(self, _input_dir_pathlib: pathlib.Path): class TestOther: - def test_access_methods(self, _input_dir_pathlib: pathlib.Path): c = CollectionTest(_input_dir_pathlib, allow_write=False) diff --git a/tests/test_compression.py b/tests/test_compression.py index d10f5370..15ad571b 100644 --- a/tests/test_compression.py +++ b/tests/test_compression.py @@ -12,7 +12,8 @@ from .conftest import INPUTS_DIR -SUPPORTED_EXTENSIONS: tuple[str] = ["gz", "bz2", "zip", "xz", "zst", "tar", "tar.gz"] # through pandas +# Compression extensions supported through pandas +SUPPORTED_EXTENSIONS: tuple[str] = ("gz", "bz2", "zip", "xz", "zst", "tar", "tar.gz") # ----- Compression tests with 'classic' TFS files (no MAD-NG features) ----- # diff --git a/tests/test_frame.py b/tests/test_frame.py index d75c407f..167861d2 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -117,7 +117,16 @@ def test_header_print(self): assert str(val) in print_out def test_long_headers_print(self): - headers = {"p1": 1, "p2": "hello", "p3": 3, "p4": 4, "p5": 5, "p6": 6, "p7": "string", "p8": "long"} + headers = { + "p1": 1, + "p2": "hello", + "p3": 3, + "p4": 4, + "p5": 5, + "p6": 6, + "p7": "string", + "p8": "long", + } df = TfsDataFrame(headers=headers) print_out = str(df) assert "Headers" in print_out diff --git a/tests/test_reader.py b/tests/test_reader.py index ba121542..edae262d 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -71,7 +71,9 @@ def test_tfs_read_write_read_pathlib_input(self, _tfs_filex: pathlib.Path, tmp_p def test_read_write_wise_header(self, _tfs_file_wise, tmp_path): original_text = _tfs_file_wise.read_text() - original_header_lines = [line for line in original_text.splitlines() if line.strip().startswith(HEADER)] + original_header_lines = [ + line for line in original_text.splitlines() if line.strip().startswith(HEADER) + ] df = read_tfs(_tfs_file_wise) assert len(df.headers) == len(original_header_lines) @@ -111,7 +113,9 @@ def test_read_file_with_empty_lines_in_header(self, _tfs_file_empty_lines, _tfs_ df_for_compare = read_tfs(_tfs_filex) assert_tfs_frame_equal(df, df_for_compare) - def test_read_file_single_header_empty_line_in_header(self, _tfs_file_single_header_empty_line, _tfs_filex): + def test_read_file_single_header_empty_line_in_header( + self, _tfs_file_single_header_empty_line, _tfs_filex + ): """Very special, but this was a case that failed in the past.""" df = read_tfs(_tfs_file_single_header_empty_line) assert len(df.headers) == 1 diff --git a/tests/test_testing.py b/tests/test_testing.py index 2a749e30..a20ce126 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -5,7 +5,6 @@ class TestAssertTfsDataFrameEqual: - def test_no_headers_equal(self): df1 = TfsDataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) assert_tfs_frame_equal(df1, df1) # we expect True diff --git a/tests/test_validation.py b/tests/test_validation.py index 9b3acb9d..6aa8fbb1 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -74,7 +74,9 @@ def test_validate_raises_on_wrong_unique_behavior(self, validation_mode): validate(df, "", non_unique_behavior="invalid", compatibility=validation_mode) @pytest.mark.parametrize("validation_mode", ["madx", "mad-x", "madng", "MAD-NG"]) - def test_validation_raises_space_in_colname(self, _space_in_colnames_tfs_path: pathlib.Path, validation_mode): + def test_validation_raises_space_in_colname( + self, _space_in_colnames_tfs_path: pathlib.Path, validation_mode + ): # Read file has a space in a column name which should raise with pytest.raises(SpaceinColumnNameError, match="TFS-Columns can not contain spaces."): _ = read_tfs(_space_in_colnames_tfs_path, index="NAME", validate=validation_mode) @@ -124,14 +126,17 @@ class TestMADXFailures: def test_madx_validation_raises_if_no_headers(self, _pd_dataframe, validation_mode): """MAD-X expects at least a 'TYPE' header. If there are no headers, we raise.""" df = _pd_dataframe - with pytest.raises(MADXCompatibilityError, match="Headers should be present in MAD-X compatibility mode"): + with pytest.raises( + MADXCompatibilityError, match="Headers should be present in MAD-X compatibility mode" + ): validate(df, compatibility=validation_mode) @pytest.mark.parametrize("validation_mode", ["madx", "mad-x", "mAd-X"]) def test_madx_validation_raises_on_boolean_headers(self, _tfs_booleans_file, validation_mode): df = read_tfs(_tfs_booleans_file) with pytest.raises( - MADXCompatibilityError, match="TFS-Headers can not contain boolean values in MAD-X compatibility mode" + MADXCompatibilityError, + match="TFS-Headers can not contain boolean values in MAD-X compatibility mode", ): validate(df, compatibility=validation_mode) @@ -139,7 +144,8 @@ def test_madx_validation_raises_on_boolean_headers(self, _tfs_booleans_file, val def test_madx_validation_raises_on_complex_headers(self, _tfs_complex_file, validation_mode): df = read_tfs(_tfs_complex_file) with pytest.raises( - MADXCompatibilityError, match="TFS-Headers can not contain complex values in MAD-X compatibility mode" + MADXCompatibilityError, + match="TFS-Headers can not contain complex values in MAD-X compatibility mode", ): validate(df, compatibility=validation_mode) @@ -148,7 +154,8 @@ def test_madx_validation_raises_on_none_headers(self, _tfs_dataframe, validation df = _tfs_dataframe df.headers["NONEVALUE"] = None with pytest.raises( - MADXCompatibilityError, match="TFS-Headers can not contain 'None' values in MAD-X compatibility mode" + MADXCompatibilityError, + match="TFS-Headers can not contain 'None' values in MAD-X compatibility mode", ): validate(df, compatibility=validation_mode) diff --git a/tests/test_writer.py b/tests/test_writer.py index 1f75a46a..92e4a3d8 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -131,7 +131,9 @@ def test_tfs_write_read_empty_headers(self, _dataframe_empty_headers: TfsDataFra assert write_location.is_file() new = read_tfs(write_location) - assert_tfs_frame_equal(_dataframe_empty_headers, new, check_exact=False) # float precision can be an issue + assert_tfs_frame_equal( + _dataframe_empty_headers, new, check_exact=False + ) # float precision can be an issue @pytest.mark.parametrize("validation_mode", [None, "madng"]) def test_tfs_write_read_no_headers_compatible_modes(self, _pd_dataframe, validation_mode, tmp_path): @@ -187,7 +189,9 @@ def test_tfs_write_read_no_headers_dataframe(self, tmp_path, _pd_dataframe): assert_frame_equal(df, new, check_frame_type=False) @pytest.mark.parametrize("validation_mode", ["madx", "madng"]) - def test_tfs_write_read_validate_with_pandas_and_headers_dict(self, tmp_path, _pd_dataframe, validation_mode): + def test_tfs_write_read_validate_with_pandas_and_headers_dict( + self, tmp_path, _pd_dataframe, validation_mode + ): # We make sure that if provided with a pandas.DataFrame and a headers_dict # the validation and writing go as expected. df = _pd_dataframe @@ -200,7 +204,9 @@ def test_tfs_write_read_validate_with_pandas_and_headers_dict(self, tmp_path, _p assert_dict_equal(headers, new.headers, compare_keys=True) @pytest.mark.parametrize("validation_mode", ["madx", "madng"]) - def test_tfs_write_read_dataframe_empty_headers_provided_headers(self, tmp_path, _tfs_dataframe, validation_mode): + def test_tfs_write_read_dataframe_empty_headers_provided_headers( + self, tmp_path, _tfs_dataframe, validation_mode + ): # We make sure that providing a TfsDataFrame with empty headers, but providing # actual headers to write_tfs actually validates and writes with the provided headers df = _tfs_dataframe @@ -235,7 +241,9 @@ def test_tfs_write_read_with_booleans(self, _tfs_dataframe_booleans, tmp_path): assert write_location.is_file() new = read_tfs(write_location) - assert_tfs_frame_equal(_tfs_dataframe_booleans, new, check_exact=False) # float precision can be an issue + assert_tfs_frame_equal( + _tfs_dataframe_booleans, new, check_exact=False + ) # float precision can be an issue def test_tfs_write_read_with_complex(self, _tfs_dataframe_complex, tmp_path): write_location = tmp_path / "test.tfs" @@ -243,7 +251,9 @@ def test_tfs_write_read_with_complex(self, _tfs_dataframe_complex, tmp_path): assert write_location.is_file() new = read_tfs(write_location) - assert_tfs_frame_equal(_tfs_dataframe_complex, new, check_exact=False) # float precision can be an issue + assert_tfs_frame_equal( + _tfs_dataframe_complex, new, check_exact=False + ) # float precision can be an issue def test_tfs_write_with_nil_in_headers(self, _tfs_dataframe, tmp_path): df = _tfs_dataframe @@ -263,7 +273,9 @@ def test_tfs_write_read_madng_like(self, _tfs_dataframe_madng, tmp_path): assert write_location.is_file() new = read_tfs(write_location) - assert_tfs_frame_equal(_tfs_dataframe_madng, new, check_exact=False) # float precision can be an issue + assert_tfs_frame_equal( + _tfs_dataframe_madng, new, check_exact=False + ) # float precision can be an issue @pytest.mark.skipif(sys.platform == "win32", reason="MAD-NG not available on Windows") def test_tfs_write_madng_compatible_is_read_by_madng(self, _tfs_dataframe_madng, tmp_path): @@ -328,10 +340,15 @@ def test_fail_on_spaces_columns_when_validating(self, caplog): def test_messed_up_dataframe_fails_writes_when_validating(self, _messed_up_dataframe: TfsDataFrame): messed_tfs = _messed_up_dataframe # This df raises in validate because of list elements - with pytest.raises(IterableInDataFrameError, match="Lists or tuple elements are not accepted in a TfsDataFrame"): + with pytest.raises( + IterableInDataFrameError, + match="Lists or tuple elements are not accepted in a TfsDataFrame", + ): write_tfs("", messed_tfs, validate="madx") # strictest - def test_dict_column_dataframe_fails_writes_when_validating(self, _dict_column_in_dataframe: TfsDataFrame, tmp_path): + def test_dict_column_dataframe_fails_writes_when_validating( + self, _dict_column_in_dataframe: TfsDataFrame, tmp_path + ): dict_col_tfs = _dict_column_in_dataframe with pytest.raises(TypeError): # tries to format dict.__dict__, can't get a % formatter write_tfs("", dict_col_tfs, validate="madx") # strictest @@ -341,7 +358,9 @@ def test_dict_column_dataframe_fails_writes_when_validating(self, _dict_column_i write_tfs(write_location, dict_col_tfs) assert write_location.is_file() - def test_list_column_dataframe_fails_writes_when_validating(self, _list_column_in_dataframe: TfsDataFrame, tmp_path, caplog): + def test_list_column_dataframe_fails_writes_when_validating( + self, _list_column_in_dataframe: TfsDataFrame, tmp_path, caplog + ): list_col_tfs = _list_column_in_dataframe write_location = tmp_path / "test.tfs" # This df raises in validate because of list colnames diff --git a/tfs/frame.py b/tfs/frame.py index e107cd7b..d34c55da 100644 --- a/tfs/frame.py +++ b/tfs/frame.py @@ -309,7 +309,10 @@ def _element_is_list(element): f"{inf_or_nan_bool_df.index[inf_or_nan_bool_df.any(axis='columns')].tolist()}" ) - if getattr(data_frame, "headers", None) is not None and pd.Series(data_frame.headers.values()).isna().any(): + if ( + getattr(data_frame, "headers", None) is not None + and pd.Series(data_frame.headers.values()).isna().any() + ): LOGGER.warning(f"DataFrame {info_str} contains non-physical values in headers.") # ----- Other sanity checks ----- # diff --git a/tfs/reader.py b/tfs/reader.py index 4045beea..1acf4bb6 100644 --- a/tfs/reader.py +++ b/tfs/reader.py @@ -54,6 +54,7 @@ # ----- Main Functionality ----- # + def read_tfs( tfs_file_path: pathlib.Path | str, index: str | None = None, @@ -344,7 +345,6 @@ def _read_metadata(tfs_file_path: pathlib.Path | str) -> _TfsMetaData: ) - def _parse_header_line(str_list: list[str]) -> tuple[str, bool | str | int | float, np.complex128]: """ Parses the data in the provided header line. Expects a valid header diff --git a/tfs/testing.py b/tfs/testing.py index bdea2ba9..2eab7b8a 100644 --- a/tfs/testing.py +++ b/tfs/testing.py @@ -19,9 +19,7 @@ # ----- Helpers ----- # -def assert_tfs_frame_equal( - df1: TfsDataFrame, df2: TfsDataFrame, compare_keys: bool = True, **kwargs -): +def assert_tfs_frame_equal(df1: TfsDataFrame, df2: TfsDataFrame, compare_keys: bool = True, **kwargs): """ Compare two `TfsDataFrame` objects, with `df1` being the reference that `df2` is compared to. This is mostly intended for unit tests. diff --git a/tfs/tools.py b/tfs/tools.py index 5f5977bb..976fc323 100644 --- a/tfs/tools.py +++ b/tfs/tools.py @@ -23,7 +23,9 @@ def significant_digits( - value: float, error: float, return_floats: bool = False # noqa: FBT001, FBT002 + value: float, + error: float, + return_floats: bool = False, # noqa: FBT001, FBT002 ) -> tuple[str, str] | tuple[float, float]: """ Computes `value` and its error properly rounded with respect to the size of `error`. diff --git a/tfs/writer.py b/tfs/writer.py index dabea58a..6ad0cc07 100644 --- a/tfs/writer.py +++ b/tfs/writer.py @@ -119,7 +119,7 @@ def write_tfs( # We case to a TfsDataFrame anyway for validation and writing # We ensure column names as strings in case it was a pd.Series-like (and colname is 0) data_frame = TfsDataFrame(data_frame, headers=headers_dict) - data_frame.columns = data_frame.columns.astype(str) + data_frame.columns = data_frame.columns.astype(str) # Only perform validation if asked (validation is OFF by default) # We also check for False as it was the way to skip it in tfs-pandas 3.x @@ -161,7 +161,9 @@ def write_tfs( # ----- Helpers ----- # -def _insert_index_column(data_frame: TfsDataFrame | pd.DataFrame, save_index: str | bool | None = None) -> None: +def _insert_index_column( + data_frame: TfsDataFrame | pd.DataFrame, save_index: str | bool | None = None +) -> None: """ Inserts the index of the dataframe into it as a column, naming it according to 'save_index' if it was provided. Otherwise it tries to use the existing index's @@ -218,9 +220,13 @@ def _get_header_line(name: str, value, width: int) -> str: errmsg = f"{name} is not a string" raise TypeError(errmsg) type_identifier = _value_to_tfs_type_identifier(value) - dtype_ = NoneType if value is None else np.array(value).dtype # otherwise numpy gives 'Object' for 'None's + dtype_ = ( + NoneType if value is None else np.array(value).dtype + ) # otherwise numpy gives 'Object' for 'None's # Strip the following as it might have trailing spaces and we leave that to the alignment formatting below - value_str = ValueToStringFormatter().format_field(value, _dtype_to_formatter_string(dtype_, width)).strip() + value_str = ( + ValueToStringFormatter().format_field(value, _dtype_to_formatter_string(dtype_, width)).strip() + ) return f"@ {name:<{width}} {type_identifier} {value_str.strip():>{width}}" @@ -268,7 +274,9 @@ def _get_data_string( def _get_row_format_string( - dtypes: list[type], colwidth: int, left_align_first_column: bool # noqa: FBT001 + dtypes: list[type], + colwidth: int, + left_align_first_column: bool, # noqa: FBT001 ) -> str: """ Returns the formatter string for a given row of the data part of the dataframe, From 6460bd8da6ac63db93d6016af2c03c55885e04e9 Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 14:47:22 +0200 Subject: [PATCH 3/6] auto fixes --- tests/conftest.py | 10 +++++----- tests/test_writer.py | 9 ++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 18cb4856..0c9195ed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -49,7 +49,7 @@ def _tfs_madng_file() -> pathlib.Path: def _pd_dataframe() -> pd.DataFrame: return pd.DataFrame( index=range(3), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(3, 5), ) @@ -58,7 +58,7 @@ def _pd_dataframe() -> pd.DataFrame: def _tfs_dataframe() -> TfsDataFrame: return TfsDataFrame( index=range(15), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(15, 5), headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -69,7 +69,7 @@ def _tfs_dataframe_booleans() -> TfsDataFrame: """TfsDataFrame with boolean values in the headers and data (1 column).""" df = TfsDataFrame( index=range(15), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(15, 5), headers={"Title": "Bool Test", "Bool1": True, "Bool2": False, "Bool3": 1}, ) @@ -82,7 +82,7 @@ def _tfs_dataframe_complex() -> TfsDataFrame: """TfsDataFrame with complex values in the headers and data (1 column).""" df = TfsDataFrame( index=range(15), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(15, 5), headers={"Title": "Complex Test", "Complex1": 1 + 2j, "Complex2": -4 - 17.9j}, ) @@ -98,7 +98,7 @@ def _tfs_dataframe_madng() -> TfsDataFrame: """ df = TfsDataFrame( index=range(15), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(15, 5), headers={ "Title": "MADNG Test", diff --git a/tests/test_writer.py b/tests/test_writer.py index 92e4a3d8..449ce187 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -16,7 +16,6 @@ DuplicateColumnsError, DuplicateIndicesError, IterableInDataFrameError, - NonStringColumnNameError, SpaceinColumnNameError, ) from tfs.testing import assert_tfs_frame_equal @@ -414,7 +413,7 @@ def _bigger_tfs_dataframe() -> TfsDataFrame: def _dataframe_empty_headers() -> TfsDataFrame: return TfsDataFrame( index=range(3), - columns="a b c d e".split(), + columns=["a", "b", "c", "d", "e"], data=np.random.rand(3, 5), headers={}, ) @@ -429,7 +428,7 @@ def _messed_up_dataframe() -> TfsDataFrame: list_floats_row = [[1.0, 14.777], [2.0, 1243.9], [3.0], [123414.0, 9909.12795]] return TfsDataFrame( index=range(4), - columns="a b c d".split(), + columns=["a", "b", "c", "d"], data=[int_row, float_row, string_row, list_floats_row], headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -445,7 +444,7 @@ def _dict_column_in_dataframe() -> TfsDataFrame: data = [[e[i] for e in (int_elements, float_elements, string_elements, dict_elements)] for i in range(4)] return TfsDataFrame( index=range(4), - columns="a b c d".split(), + columns=["a", "b", "c", "d"], data=data, headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -461,7 +460,7 @@ def _list_column_in_dataframe() -> TfsDataFrame: data = [[e[i] for e in (int_elements, float_elements, string_elements, list_elements)] for i in range(4)] return TfsDataFrame( index=range(4), - columns="a b c d".split(), + columns=["a", "b", "c", "d"], data=data, headers={"Title": "Tfs Title", "Value": 3.3663}, ) From 6ed88afda94101abfb088433090b701e48e50524 Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 14:55:00 +0200 Subject: [PATCH 4/6] stop using np.random functions, use default_rng and its methods --- tests/conftest.py | 23 ++++++++++++++--------- tests/test_writer.py | 12 ++++++++---- tfs/writer.py | 7 +++++-- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0c9195ed..ec83c0ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -47,19 +47,21 @@ def _tfs_madng_file() -> pathlib.Path: @pytest.fixture def _pd_dataframe() -> pd.DataFrame: + rng = np.random.default_rng() return pd.DataFrame( index=range(3), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(3, 5), + data=rng.random(size=(3, 5)), ) @pytest.fixture def _tfs_dataframe() -> TfsDataFrame: + rng = np.random.default_rng() return TfsDataFrame( index=range(15), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(15, 5), + data=rng.random(size=(15, 5)), headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -67,26 +69,28 @@ def _tfs_dataframe() -> TfsDataFrame: @pytest.fixture def _tfs_dataframe_booleans() -> TfsDataFrame: """TfsDataFrame with boolean values in the headers and data (1 column).""" + rng = np.random.default_rng() df = TfsDataFrame( index=range(15), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(15, 5), + data=rng.random(size=(15, 5)), headers={"Title": "Bool Test", "Bool1": True, "Bool2": False, "Bool3": 1}, ) - df["bools"] = np.random.rand(15) > 0.5 # random from 0 to 1 and then boolean check + df["bools"] = rng.random(15) > 0.5 # random from 0 to 1 and then boolean check return df @pytest.fixture def _tfs_dataframe_complex() -> TfsDataFrame: """TfsDataFrame with complex values in the headers and data (1 column).""" + rng = np.random.default_rng() df = TfsDataFrame( index=range(15), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(15, 5), + data=rng.random(size=(15, 5)), headers={"Title": "Complex Test", "Complex1": 1 + 2j, "Complex2": -4 - 17.9j}, ) - df["complex"] = np.random.rand(15) + np.random.rand(15) * 1j + df["complex"] = rng.random(15) + rng.random(15) * 1j return df @@ -96,10 +100,11 @@ def _tfs_dataframe_madng() -> TfsDataFrame: TfsDataFrame with both booleans and complex values in the headers and data (1 column each). """ + rng = np.random.default_rng() df = TfsDataFrame( index=range(15), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(15, 5), + data=rng.random(size=(15, 5)), headers={ "Title": "MADNG Test", "Bool1": True, @@ -109,6 +114,6 @@ def _tfs_dataframe_madng() -> TfsDataFrame: "Complex2": -94.6 - 67.9j, }, ) - df["bools"] = np.random.rand(15) > 0.5 # random from 0 to 1 and then boolean check - df["complex"] = np.random.rand(15) + np.random.rand(15) * 1j + df["bools"] = rng.random(15) > 0.5 # random from 0 to 1 and then boolean check + df["complex"] = rng.random(15) + rng.random(15) * 1j return df diff --git a/tests/test_writer.py b/tests/test_writer.py index 449ce187..86d47286 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -23,10 +23,11 @@ class TestWrites: def test_tfs_write_empty_columns_dataframe(self, tmp_path): + rng = np.random.default_rng() df = TfsDataFrame( index=range(3), columns=[], - data=np.random.rand(3, 0), + data=rng.random(size=(3, 0)), headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -72,10 +73,11 @@ def test_madx_reads_written_tfsdataframes(self, _bigger_tfs_dataframe, tmp_path) ) def test_tfs_write_empty_index_dataframe(self, tmp_path): + rng = np.random.default_rng() df = TfsDataFrame( index=[], columns=["a", "b", "c"], - data=np.random.rand(0, 3), + data=rng.random(size=(0, 3)), headers={"Title": "Tfs Title", "Value": 3.3663}, ) @@ -401,20 +403,22 @@ def test_header_line_raises_on_non_strings(self): @pytest.fixture def _bigger_tfs_dataframe() -> TfsDataFrame: + rng = np.random.default_rng() return TfsDataFrame( index=range(50), columns=list(string.ascii_lowercase), - data=np.random.rand(50, len(list(string.ascii_lowercase))), + data=rng.random(size=(50, len(list(string.ascii_lowercase)))), headers={"Title": "Tfs Title", "Value": 3.3663}, ) @pytest.fixture def _dataframe_empty_headers() -> TfsDataFrame: + rng = np.random.default_rng() return TfsDataFrame( index=range(3), columns=["a", "b", "c", "d", "e"], - data=np.random.rand(3, 5), + data=rng.random(size=(3, 5)), headers={}, ) diff --git a/tfs/writer.py b/tfs/writer.py index 6ad0cc07..ac357ed3 100644 --- a/tfs/writer.py +++ b/tfs/writer.py @@ -11,9 +11,8 @@ import pathlib import string from types import NoneType - +from typing import TYPE_CHECKING import numpy as np -import pandas as pd from pandas.api import types as pdtypes from pandas.io.common import get_handle @@ -21,6 +20,10 @@ from tfs.frame import TfsDataFrame from tfs.frame import validate as validate_frame +if TYPE_CHECKING: + import pandas as pd + + LOGGER = logging.getLogger(__name__) From d34b07a6612d2576dc80bfcca95acb01b60c11e9 Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 14:55:35 +0200 Subject: [PATCH 5/6] return condition evaluated directly --- tfs/reader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tfs/reader.py b/tfs/reader.py index 1acf4bb6..128142e8 100644 --- a/tfs/reader.py +++ b/tfs/reader.py @@ -419,9 +419,7 @@ def _string_to_bool(val_str: str) -> bool: if val_str.lower().capitalize() not in VALID_BOOLEANS_HEADERS: raise InvalidBooleanHeaderError(val_str) - if val_str.lower().capitalize() in VALID_TRUE_BOOLEANS: - return True - return False + return val_str.lower().capitalize() in VALID_TRUE_BOOLEANS def _id_to_type(type_identifier: str) -> type: From 8a423d29682b9bf430b59c29cc83ba32b4bb8b8f Mon Sep 17 00:00:00 2001 From: Felix Soubelet Date: Thu, 10 Jul 2025 15:00:19 +0200 Subject: [PATCH 6/6] use pathlib operations where possible --- tfs/tools.py | 11 +++-------- tfs/writer.py | 1 + 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tfs/tools.py b/tfs/tools.py index 976fc323..dd82f7d0 100644 --- a/tfs/tools.py +++ b/tfs/tools.py @@ -8,7 +8,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING +from pathlib import Path import numpy as np @@ -16,9 +16,6 @@ from tfs.reader import read_tfs from tfs.writer import write_tfs -if TYPE_CHECKING: - from pathlib import Path - LOGGER = logging.getLogger(__name__) @@ -88,8 +85,7 @@ def remove_header_comments_from_files(list_of_files: list[str | Path]) -> None: """ for filepath in list_of_files: LOGGER.info(f"Checking file: {filepath}") - with open(filepath) as f: - f_lines = f.readlines() + f_lines = Path(filepath).read_text().splitlines(keepends=True) delete_indicies = [] for index, line in enumerate(f_lines): @@ -104,5 +100,4 @@ def remove_header_comments_from_files(list_of_files: list[str | Path]) -> None: deleted_line = f_lines.pop(index) LOGGER.info(f" Deleted line: {deleted_line.strip():s}") - with open(filepath, "w") as f: - f.writelines(f_lines) + Path(filepath).write_text("".join(f_lines)) diff --git a/tfs/writer.py b/tfs/writer.py index ac357ed3..8cab56f3 100644 --- a/tfs/writer.py +++ b/tfs/writer.py @@ -12,6 +12,7 @@ import string from types import NoneType from typing import TYPE_CHECKING + import numpy as np from pandas.api import types as pdtypes from pandas.io.common import get_handle