Fixes new formatting complaints

skrawcz · skrawcz · commit 189797f69fa4 · 2024-01-31T15:11:08.000-08:00
diff --git a/contrib/docs/compile_docs.py b/contrib/docs/compile_docs.py
@@ -10,6 +10,7 @@
 dataflow python files and information we have.
 6. We then will trigger a build of the docs; the docs can serve the latest commit version!
 """
+
 import json
 import os
 import shutil
diff --git a/hamilton/io/materialization.py b/hamilton/io/materialization.py
@@ -155,7 +155,8 @@ def generate_nodes(self, fn_graph: graph.FunctionGraph) -> List[node.Node]:
 
 class MaterializerFactory:
     """Basic factory for creating materializers. Note that this should only ever be instantiated
-    through `to.<name>`, which conducts polymorphic lookup to find the appropriate materializer."""
+    through `to.<name>`, which conducts polymorphic lookup to find the appropriate materializer.
+    """
 
     def __init__(
         self,
@@ -193,7 +194,11 @@ def sanitize_dependencies(self, module_set: Set[str]) -> "MaterializerFactory":
         """
         final_vars = common.convert_output_values(self.dependencies, module_set)
         return MaterializerFactory(
-            self.id, self.savers, self.result_builder, final_vars, **self.data_saver_kwargs
+            self.id,
+            self.savers,
+            self.result_builder,
+            final_vars,
+            **self.data_saver_kwargs,
         )
 
     def _resolve_dependencies(self, fn_graph: graph.FunctionGraph) -> List[node.Node]:
@@ -241,9 +246,9 @@ def join_function(**kwargs):
                 doc_string=f"Builds the result for {self.id} materializer",
                 callabl=join_function,
                 input_types={dep.name: dep.type for dep in node_dependencies},
-                originating_functions=None
-                if self.result_builder is None
-                else [self.result_builder.build_result],
+                originating_functions=(
+                    None if self.result_builder is None else [self.result_builder.build_result]
+                ),
             )
             out.append(join_node)
             save_dep = join_node
@@ -268,13 +273,13 @@ def __call__(
         combine: lifecycle.ResultBuilder = None,
         **kwargs: Union[str, SingleDependency],
     ) -> MaterializerFactory:
-        ...
+        pass
 
 
 @typing.runtime_checkable
 class _ExtractorFactoryProtocol(Protocol):
     def __call__(self, target: str, **kwargs: Union[str, SingleDependency]) -> ExtractorFactory:
-        ...
+        pass
 
 
 def partial_materializer(data_savers: List[Type[DataSaver]]) -> _MaterializerFactoryProtocol:
@@ -297,7 +302,9 @@ def create_materializer_factory(
     return create_materializer_factory
 
 
-def partial_extractor(data_loaders: List[Type[DataLoader]]) -> _ExtractorFactoryProtocol:
+def partial_extractor(
+    data_loaders: List[Type[DataLoader]],
+) -> _ExtractorFactoryProtocol:
     """Creates a partial materializer, with the specified data savers."""
 
     def create_extractor_factory(
diff --git a/hamilton/plugins/h_spark.py b/hamilton/plugins/h_spark.py
@@ -885,7 +885,10 @@ def new_callable(__callable=node_.callable, **kwargs) -> Any:
             if key != transformation_target and key not in dependent_columns_from_dataframe
         }
         # Thus we put that linear dependency in
-        new_input_types[linear_df_dependency_name] = (DataFrame, node.DependencyType.REQUIRED)
+        new_input_types[linear_df_dependency_name] = (
+            DataFrame,
+            node.DependencyType.REQUIRED,
+        )
         # Then we go through all "logical" dependencies -- columns we want to add to make lineage
         # look nice
         for item in dependent_columns_from_upstream:
@@ -1191,7 +1194,9 @@ def generate_nodes(self, fn: Callable, config: Dict[str, Any]) -> List[node.Node
                 self.select if self.select is not None else [item.name for item in output_nodes]
             )
             select_node = with_columns.create_selector_node(
-                upstream_name=current_dataframe_node, columns=select_columns, node_name="_select"
+                upstream_name=current_dataframe_node,
+                columns=select_columns,
+                node_name="_select",
             )
             output_nodes.append(select_node)
             current_dataframe_node = select_node.name
diff --git a/hamilton/plugins/numpy_extensions.py b/hamilton/plugins/numpy_extensions.py
@@ -26,7 +26,10 @@ class NumpyNpyWriter(DataSaver):
 
     def save_data(self, data: np.ndarray) -> Dict[str, Any]:
         np.save(
-            file=self.path, arr=data, allow_pickle=self.allow_pickle, fix_imports=self.fix_imports
+            file=self.path,
+            arr=data,
+            allow_pickle=self.allow_pickle,
+            fix_imports=self.fix_imports,
         )
         return utils.get_file_metadata(self.path)
 
diff --git a/hamilton/plugins/pandas_extensions.py b/hamilton/plugins/pandas_extensions.py
@@ -136,7 +136,8 @@ class PandasCSVReader(DataLoader):
     comment: Optional[str] = None
     encoding: str = "utf-8"
     encoding_errors: Union[
-        Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"], str
+        Literal["strict", "ignore", "replace", "backslashreplace", "surrogateescape"],
+        str,
     ] = "strict"
     dialect: Optional[Union[str, csv.Dialect]] = None
     on_bad_lines: Union[Literal["error", "warn", "skip"], Callable] = "error"
@@ -446,9 +447,9 @@ class PandasPickleReader(DataLoader):
     """
 
     filepath_or_buffer: Union[str, Path, BytesIO, BufferedReader] = None
-    path: Union[
-        str, Path, BytesIO, BufferedReader
-    ] = None  # alias for `filepath_or_buffer` to keep reading/writing args symmetric.
+    path: Union[str, Path, BytesIO, BufferedReader] = (
+        None  # alias for `filepath_or_buffer` to keep reading/writing args symmetric.
+    )
     # kwargs:
     compression: Union[str, Dict[str, Any], None] = "infer"
     storage_options: Optional[Dict[str, Any]] = None
@@ -732,7 +733,10 @@ def load_data(self, type_: Type) -> Tuple[DATAFRAME_TYPE, Dict[str, Any]]:
         df = pd.read_sql(self.query_or_table, self.db_connection, **self._get_loading_kwargs())
         sql_metadata = utils.get_sql_metadata(self.query_or_table, df)
         df_metadata = utils.get_dataframe_metadata(df)
-        metadata = {utils.SQL_METADATA: sql_metadata, utils.DATAFRAME_METADATA: df_metadata}
+        metadata = {
+            utils.SQL_METADATA: sql_metadata,
+            utils.DATAFRAME_METADATA: df_metadata,
+        }
         return df, metadata
 
     @classmethod
@@ -789,7 +793,10 @@ def save_data(self, data: DATAFRAME_TYPE) -> Dict[str, Any]:
         results = data.to_sql(self.table_name, self.db_connection, **self._get_saving_kwargs())
         sql_metadata = utils.get_sql_metadata(self.table_name, results)
         df_metadata = utils.get_dataframe_metadata(data)
-        metadata = {utils.SQL_METADATA: sql_metadata, utils.DATAFRAME_METADATA: df_metadata}
+        metadata = {
+            utils.SQL_METADATA: sql_metadata,
+            utils.DATAFRAME_METADATA: df_metadata,
+        }
         return metadata
 
     @classmethod
diff --git a/hamilton/plugins/plotly_extensions.py b/hamilton/plugins/plotly_extensions.py
@@ -65,9 +65,9 @@ class PlotlyInteractiveWriter(DataSaver):
     path: Union[str, pathlib.Path, IO]
     config: Optional[Dict] = None
     auto_play: bool = True
-    include_plotlyjs: Union[
-        bool, str
-    ] = True  # or "cdn", "directory", "require", "False", "other string .js"
+    include_plotlyjs: Union[bool, str] = (
+        True  # or "cdn", "directory", "require", "False", "other string .js"
+    )
     include_mathjax: Union[bool, str] = False  # "cdn", "string .js"
     post_script: Union[str, List[str], None] = None
     full_html: bool = True
diff --git a/plugin_tests/h_dask/test_h_dask.py b/plugin_tests/h_dask/test_h_dask.py
@@ -105,7 +105,10 @@ def test_smoke_screen_module(client):
     ),
     # dataframe_and_series
     (
-        {"a": pd.Series([1, 2, 3]), "b": pd.DataFrame({"b": [1, 2, 3], "c": [1, 1, 1]})},
+        {
+            "a": pd.Series([1, 2, 3]),
+            "b": pd.DataFrame({"b": [1, 2, 3], "c": [1, 1, 1]}),
+        },
         pd.DataFrame({"a": [1, 2, 3], "b.b": [1, 2, 3], "b.c": [1, 1, 1]}),
     ),
     # multiple_series_and_scalar
diff --git a/plugin_tests/h_ray/test_h_ray.py b/plugin_tests/h_ray/test_h_ray.py
@@ -25,7 +25,9 @@ def test_ray_graph_adapter(init):
         "spend": pd.Series([10, 10, 20, 40, 40, 50]),
     }
     dr = driver.Driver(
-        initial_columns, example_module, adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult())
+        initial_columns,
+        example_module,
+        adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult()),
     )
     output_columns = [
         "spend",
@@ -47,7 +49,9 @@ def test_ray_graph_adapter(init):
 def test_smoke_screen_module(init):
     config = {"region": "US"}
     dr = driver.Driver(
-        config, smoke_screen_module, adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult())
+        config,
+        smoke_screen_module,
+        adapter=h_ray.RayGraphAdapter(base.PandasDataFrameResult()),
     )
     output_columns = [
         "raw_acquisition_cost",
diff --git a/plugin_tests/h_spark/test_h_spark.py b/plugin_tests/h_spark/test_h_spark.py
@@ -47,7 +47,9 @@ def test_koalas_spark_graph_adapter(spark_session):
         initial_columns,
         example_module,
         adapter=h_spark.SparkKoalasGraphAdapter(
-            spark_session, result_builder=base.PandasDataFrameResult(), spine_column="spend"
+            spark_session,
+            result_builder=base.PandasDataFrameResult(),
+            spine_column="spend",
         ),
     )
     output_columns = [
@@ -79,7 +81,9 @@ def test_smoke_screen_module(spark_session):
         config,
         smoke_screen_module,
         adapter=h_spark.SparkKoalasGraphAdapter(
-            spark_session, result_builder=base.PandasDataFrameResult(), spine_column="weeks"
+            spark_session,
+            result_builder=base.PandasDataFrameResult(),
+            spine_column="weeks",
         ),
     )
     output_columns = [
@@ -110,7 +114,12 @@ def test_smoke_screen_module(spark_session):
         (lambda df: ({"a": df}, (df, {}))),
         (lambda df: ({"a": df, "b": 1}, (df, {"b": 1}))),
     ],
-    ids=["no_kwargs", "one_plain_kwarg", "one_df_kwarg", "one_df_kwarg_and_one_plain_kwarg"],
+    ids=[
+        "no_kwargs",
+        "one_plain_kwarg",
+        "one_df_kwarg",
+        "one_df_kwarg_and_one_plain_kwarg",
+    ],
 )
 def test__inspect_kwargs(input_and_expected_fn, spark_session):
     """A unit test for inspect_kwargs."""
@@ -230,7 +239,11 @@ def base_func(a: int, b: int) -> int:
     base_spark_df = spark_session.createDataFrame(pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}))
     node_ = node.Node.from_fn(base_func)
     new_df = h_spark._lambda_udf(base_spark_df, node_, {})
-    assert new_df.collect() == [Row(a=1, b=4, test=5), Row(a=2, b=5, test=7), Row(a=3, b=6, test=9)]
+    assert new_df.collect() == [
+        Row(a=1, b=4, test=5),
+        Row(a=2, b=5, test=7),
+        Row(a=3, b=6, test=9),
+    ]
 
 
 def test__lambda_udf_pandas_func(spark_session):
@@ -243,7 +256,11 @@ def base_func(a: pd.Series, b: pd.Series) -> htypes.column[pd.Series, int]:
     node_ = node.Node.from_fn(base_func)
 
     new_df = h_spark._lambda_udf(base_spark_df, node_, {})
-    assert new_df.collect() == [Row(a=1, b=4, test=5), Row(a=2, b=5, test=7), Row(a=3, b=6, test=9)]
+    assert new_df.collect() == [
+        Row(a=1, b=4, test=5),
+        Row(a=2, b=5, test=7),
+        Row(a=3, b=6, test=9),
+    ]
 
 
 def test__lambda_udf_pandas_func_error(spark_session):
@@ -348,11 +365,13 @@ def test_get_spark_type_numpy_types(return_type, expected_spark_type):
 
 # 4. Unsupported types
 @pytest.mark.parametrize(
-    "unsupported_return_type", [dict, set, tuple]  # Add other unsupported types as needed
+    "unsupported_return_type",
+    [dict, set, tuple],  # Add other unsupported types as needed
 )
 def test_get_spark_type_unsupported(unsupported_return_type):
     with pytest.raises(
-        ValueError, match=f"Currently unsupported return type {unsupported_return_type}."
+        ValueError,
+        match=f"Currently unsupported return type {unsupported_return_type}.",
     ):
         h_spark.get_spark_type(unsupported_return_type)
 
@@ -470,19 +489,19 @@ def test_base_spark_executor_end_to_end_multiple_with_columns(spark_session):
 
 
 def _only_pyspark_dataframe_parameter(foo: DataFrame) -> DataFrame:
-    ...
+    pass
 
 
 def _no_pyspark_dataframe_parameter(foo: int) -> int:
-    ...
+    pass
 
 
 def _one_pyspark_dataframe_parameter(foo: DataFrame, bar: int) -> DataFrame:
-    ...
+    pass
 
 
 def _two_pyspark_dataframe_parameters(foo: DataFrame, bar: int, baz: DataFrame) -> DataFrame:
-    ...
+    pass
 
 
 @pytest.mark.parametrize(
@@ -603,7 +622,11 @@ def df_as_pandas(df: DataFrame) -> pd.DataFrame:
 
     nodes = dec.generate_nodes(df_as_pandas, {})
     nodes_by_names = {n.name: n for n in nodes}
-    assert set(nodes_by_names.keys()) == {"df_as_pandas.c", "df_as_pandas", "df_as_pandas._select"}
+    assert set(nodes_by_names.keys()) == {
+        "df_as_pandas.c",
+        "df_as_pandas",
+        "df_as_pandas._select",
+    }
 
 
 def test_with_columns_generate_nodes_specify_namespace():
@@ -640,7 +663,10 @@ def test__format_standard_udf():
 
 def test_sparkify_node():
     def foo(
-        a_from_upstream: pd.Series, b_from_upstream: pd.Series, c_from_df: pd.Series, d_fixed: int
+        a_from_upstream: pd.Series,
+        b_from_upstream: pd.Series,
+        c_from_df: pd.Series,
+        d_fixed: int,
     ) -> htypes.column[pd.Series, int]:
         return a_from_upstream + b_from_upstream + c_from_df + d_fixed
 
@@ -679,7 +705,10 @@ def test_pyspark_mixed_pandas_udfs_end_to_end():
     #     inputs={"spark_session": spark_session},
     # )
     results = dr.execute(
-        ["processed_df_as_pandas_dataframe_with_injected_dataframe", "processed_df_as_pandas"],
+        [
+            "processed_df_as_pandas_dataframe_with_injected_dataframe",
+            "processed_df_as_pandas",
+        ],
         inputs={"spark_session": spark_session},
     )
     processed_df_as_pandas = results["processed_df_as_pandas"]
@@ -774,7 +803,11 @@ def test_create_selector_node(spark_session):
     selector_node = h_spark.with_columns.create_selector_node("foo", ["a", "b"], "select")
     assert selector_node.name == "select"
     pandas_df = pd.DataFrame(
-        {"a": [10, 10, 20, 40, 40, 50], "b": [1, 10, 50, 100, 200, 400], "c": [1, 2, 3, 4, 5, 6]}
+        {
+            "a": [10, 10, 20, 40, 40, 50],
+            "b": [1, 10, 50, 100, 200, 400],
+            "c": [1, 2, 3, 4, 5, 6],
+        }
     )
     df = spark_session.createDataFrame(pandas_df)
     transformed = selector_node(foo=df).toPandas()
diff --git a/tests/function_modifiers/test_combined.py b/tests/function_modifiers/test_combined.py
@@ -3,6 +3,7 @@
 it is useful to have a few tests that demonstrate that common use-cases are supported.
 
 Note we also have some more end-to-end cases in test_layered.py"""
+
 from typing import Dict
 
 import pandas as pd
diff --git a/tests/resources/bad_functions.py b/tests/resources/bad_functions.py
@@ -1,6 +1,7 @@
 """
 Module for more dummy functions to test graph things with.
 """
+
 # we import this to check we don't pull in this function when parsing this module.
 from tests.resources import only_import_me  # noqa: F401
 
diff --git a/tests/resources/cyclic_functions.py b/tests/resources/cyclic_functions.py
@@ -1,6 +1,7 @@
 """
 Module for cyclic functions to test graph things with.
 """
+
 # we import this to check we don't pull in this function when parsing this module.
 from tests.resources import only_import_me  # noqa: F401
 
diff --git a/tests/resources/dummy_functions.py b/tests/resources/dummy_functions.py
@@ -1,6 +1,7 @@
 """
 Module for dummy functions to test graph things with.
 """
+
 # we import this to check we don't pull in this function when parsing this module.
 from tests.resources import only_import_me
 
diff --git a/tests/resources/functions_with_generics.py b/tests/resources/functions_with_generics.py
@@ -1,6 +1,7 @@
 """
 Module for functions with genercis to test graph things with.
 """
+
 from typing import Dict, List, Mapping, Tuple
 
 
diff --git a/tests/resources/smoke_screen_module.py b/tests/resources/smoke_screen_module.py
@@ -21,6 +21,7 @@
     neutral_net_acquisition_cost
     optimistic_net_acquisition_cost
 """
+
 from typing import Dict
 
 import numpy as np
diff --git a/tests/resources/typing_vs_not_typing.py b/tests/resources/typing_vs_not_typing.py
diff --git a/tests/test_end_to_end.py b/tests/test_end_to_end.py