apache
diff --git a/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions b/‎.pre-commit-config.yaml
Lines changed: 5 additions & 5 deletions
diff --git a/‎contrib/docs/compile_docs.py
Lines changed: 1 addition & 0 deletions b/‎contrib/docs/compile_docs.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py
Lines changed: 18 additions & 5 deletions b/‎contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py
Lines changed: 18 additions & 5 deletions
diff --git a/‎docs/data_adapters_extension.py
Lines changed: 16 additions & 12 deletions b/‎docs/data_adapters_extension.py
Lines changed: 16 additions & 12 deletions
diff --git a/‎examples/LLM_Workflows/knowledge_retrieval/functions.py
Lines changed: 1 addition & 0 deletions b/‎examples/LLM_Workflows/knowledge_retrieval/functions.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/LLM_Workflows/knowledge_retrieval/state.py
Lines changed: 1 addition & 0 deletions b/‎examples/LLM_Workflows/knowledge_retrieval/state.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/airflow/plugins/function_modules/data_loaders.py
Lines changed: 1 addition & 0 deletions b/‎examples/airflow/plugins/function_modules/data_loaders.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/airflow/plugins/function_modules/feature_logic.py
Lines changed: 1 addition & 0 deletions b/‎examples/airflow/plugins/function_modules/feature_logic.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/pandera/data_loaders.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/pandera/data_loaders.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/pandera/feature_logic.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/pandera/feature_logic.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/pandera/feature_logic_spark.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/pandera/feature_logic_spark.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/pandera/run_ray.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/pandera/run_ray.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/simple/data_loaders.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/simple/data_loaders.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/simple/feature_logic.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/simple/feature_logic.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/data_quality/simple/run_ray.py
Lines changed: 1 addition & 0 deletions b/‎examples/data_quality/simple/run_ray.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/dbt/python_transforms/data_loader.py
Lines changed: 1 addition & 0 deletions b/‎examples/dbt/python_transforms/data_loader.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/dbt/python_transforms/feature_transforms.py
Lines changed: 1 addition & 0 deletions b/‎examples/dbt/python_transforms/feature_transforms.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/dbt/python_transforms/model_pipeline.py
Lines changed: 4 additions & 1 deletion b/‎examples/dbt/python_transforms/model_pipeline.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/decoupling_io/components/feature_data.py
Lines changed: 1 addition & 0 deletions b/‎examples/decoupling_io/components/feature_data.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/etl.py
Lines changed: 1 addition & 0 deletions b/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/etl.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/features.py
Lines changed: 1 addition & 0 deletions b/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/features.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/etl.py
Lines changed: 1 addition & 0 deletions b/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/etl.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/features.py
Lines changed: 1 addition & 0 deletions b/‎examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/features.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/feature_engineering/write_once_run_everywhere_blog_post/contexts/streaming.py
Lines changed: 3 additions & 1 deletion b/‎examples/feature_engineering/write_once_run_everywhere_blog_post/contexts/streaming.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/lineage/lineage_script.py
Lines changed: 1 addition & 0 deletions b/‎examples/lineage/lineage_script.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/numpy/air-quality-analysis/analysis_flow.py
Lines changed: 5 additions & 1 deletion b/‎examples/numpy/air-quality-analysis/analysis_flow.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/spark/pyspark_udfs/pandas_udfs.py
Lines changed: 1 addition & 0 deletions b/‎examples/spark/pyspark_udfs/pandas_udfs.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎hamilton/ad_hoc_utils.py
Lines changed: 1 addition & 0 deletions b/‎hamilton/ad_hoc_utils.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎hamilton/base.py
Lines changed: 1 addition & 0 deletions b/‎hamilton/base.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎hamilton/contrib/__init__.py
Lines changed: 1 addition & 0 deletions b/‎hamilton/contrib/__init__.py
Lines changed: 1 addition & 0 deletions
@@ -3,10 +3,10 @@
 # Then install the hooks within the repo:
 #   $ cd /PATH/TO/REPO
 #   $ pre-commit install
-
+exclude: '^docs/code-comparisons/'  # skip the code comparisons directory
 repos:
 -   repo: https://github.com/ambv/black
-    rev: 23.11.0
+    rev: 24.1.1
     hooks:
     - id: black
       args: [--line-length=100, --exclude=docs/*]
@@ -22,15 +22,15 @@ repos:
     -   id: check-ast
 # isort python package import sorting
 -   repo: https://github.com/pycqa/isort
-    rev: '5.12.0'
+    rev: '5.13.2'
     hooks:
     -   id: isort
         args: ["--profile", "black",
                "--line-length=100",
-               "--extend-skip=docs/*/*/*.py",
+               "--skip=docs/",
                "--known-local-folder",
                "tests", "-p", "hamilton"]
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.0.0
     hooks:
     - id: flake8
@@ -10,6 +10,7 @@
 dataflow python files and information we have.
 6. We then will trigger a build of the docs; the docs can serve the latest commit version!
 """
+
 import json
 import os
 import shutil
 
@@ -22,6 +22,7 @@
 SOFTWARE.
 ----------------------------------------------------------------------------------------------
 """
+
 import logging
 import os
 import pickle  # for saving the embeddings cache
@@ -42,7 +43,9 @@
     import plotly.express as px  # for plots
     import plotly.graph_objs as go  # for plot object type
     import requests
-    from sklearn.model_selection import train_test_split  # for splitting train & test data
+    from sklearn.model_selection import (
+        train_test_split,
+    )  # for splitting train & test data
     import torch  # for matrix optimization
     from tenacity import retry, stop_after_attempt, wait_random_exponential
 
@@ -243,8 +246,14 @@ def test_df_negatives(base_test_df: pd.DataFrame) -> pd.DataFrame:
 
 
 @parameterize(
-    train_df={"base_df": source("base_train_df"), "df_negatives": source("train_df_negatives")},
-    test_df={"base_df": source("base_test_df"), "df_negatives": source("test_df_negatives")},
+    train_df={
+        "base_df": source("base_train_df"),
+        "df_negatives": source("train_df_negatives"),
+    },
+    test_df={
+        "base_df": source("base_test_df"),
+        "df_negatives": source("test_df_negatives"),
+    },
 )
 def construct_df(
     base_df: pd.DataFrame,
@@ -631,7 +640,9 @@ def mse_loss(predictions, targets):
 @inject(
     optimization_result_matrices=group(*[source(k) for k in optimization_parameterization.keys()])
 )
-def optimization_results(optimization_result_matrices: List[pd.DataFrame]) -> pd.DataFrame:
+def optimization_results(
+    optimization_result_matrices: List[pd.DataFrame],
+) -> pd.DataFrame:
     """Combine optimization results into one dataframe."""
     return pd.concat(optimization_result_matrices)
 
@@ -685,7 +696,9 @@ def customized_embeddings_dataframe(
     return embedded_data_set
 
 
-def customized_dataset_histogram(customized_embeddings_dataframe: pd.DataFrame) -> go.Figure:
+def customized_dataset_histogram(
+    customized_embeddings_dataframe: pd.DataFrame,
+) -> go.Figure:
     """Plot histogram of cosine similarities for the new customized embeddings.
 
     The graphs show how much the overlap there is between the distribution of cosine similarities for similar and
 
@@ -107,18 +107,22 @@ def from_loader(loader: Type[hamilton.io.data_adapters.DataLoader]) -> "AdapterI
             key=loader.name(),
             class_name=loader.__name__,
             class_path=loader.__module__,
-            load_params=[
-                Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
-                for p in dataclasses.fields(loader)
-            ]
-            if issubclass(loader, hamilton.io.data_adapters.DataLoader)
-            else None,
-            save_params=[
-                Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
-                for p in dataclasses.fields(loader)
-            ]
-            if issubclass(loader, hamilton.io.data_adapters.DataSaver)
-            else None,
+            load_params=(
+                [
+                    Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
+                    for p in dataclasses.fields(loader)
+                ]
+                if issubclass(loader, hamilton.io.data_adapters.DataLoader)
+                else None
+            ),
+            save_params=(
+                [
+                    Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
+                    for p in dataclasses.fields(loader)
+                ]
+                if issubclass(loader, hamilton.io.data_adapters.DataSaver)
+                else None
+            ),
             applicable_types=[get_class_repr(t) for t in loader.applicable_types()],
             file_=inspect.getfile(loader),
             line_nos=get_lines_for_class(loader),
 
@@ -1,4 +1,5 @@
 """Module to house functions for an LLM agent to use."""
+
 import logging
 
 import arxiv_articles
 
@@ -2,6 +2,7 @@
 Module that contains code to house state for an agent. The dialog
 right now is hardcoded at the bottom of this file.
 """
+
 import json
 import logging
 import sys
 
@@ -7,6 +7,7 @@
     (2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
     the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
 """
+
 from typing import List
 
 import pandas as pd
 
@@ -13,6 +13,7 @@
     integration - see `examples/data_quality/pandera` for an example.
 
 """
+
 import numpy as np
 import pandas as pd
 
 
@@ -9,6 +9,7 @@
     (2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
     the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
 """
+
 from typing import List
 
 import pandas as pd
 
@@ -16,6 +16,7 @@
     (4) If you require dataframe validation - see the examples here.
 
 """
+
 import numpy as np
 import pandas as pd
 import pandera as pa
 
@@ -8,6 +8,7 @@
 2. The data type checks on the output of functions are different. E.g. float vs np.float64. Execution on spark
    results in different data types.
 """
+
 import numpy as np
 import pandas as pd
 import pandera as pa
 
@@ -13,6 +13,7 @@
 To run:
 > python run_ray.py
 """
+
 import logging
 import sys
 
 
@@ -7,6 +7,7 @@
     (2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
     the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
 """
+
 from typing import List
 
 import pandas as pd
 
@@ -13,6 +13,7 @@
     integration - see `examples/data_quality/pandera` for an example.
 
 """
+
 import numpy as np
 import pandas as pd
 
 
@@ -13,6 +13,7 @@
 To run:
 > python run_ray.py
 """
+
 import logging
 import sys
 
 
@@ -1,6 +1,7 @@
 """
 This module contains our data loading functions.
 """
+
 from typing import List
 
 import pandas as pd
 
@@ -1,6 +1,7 @@
 """
 This is a module that contains our feature transforms.
 """
+
 import pickle
 from typing import Set
 
 
@@ -1,6 +1,7 @@
 """
 This is a module that contains our "model fitting and related" transforms.
 """
+
 import pickle
 from typing import Dict
 
@@ -43,7 +44,9 @@ def train_test_split(
 
 @config.when(model_to_use="create_new")
 def fit_model__create_new(
-    model_classifier: base.ClassifierMixin, train_set: pd.DataFrame, target_column_name: str
+    model_classifier: base.ClassifierMixin,
+    train_set: pd.DataFrame,
+    target_column_name: str,
 ) -> base.ClassifierMixin:
     """Fits a new model.
 
 
@@ -1,6 +1,7 @@
 """
 This is a module that contains our feature transforms.
 """
+
 from typing import Dict, List, Set
 
 import pandas as pd
 
@@ -5,6 +5,7 @@
 Here we ONLY use Hamilton to create the features for your training set, with comment stubs for the rest of the ETL
 that would normally be here.
 """
+
 import features
 import named_model_feature_sets
 import offline_loader
 
@@ -9,6 +9,7 @@
 Note (2): we can tag the `aggregation` features with whatever key value pair makes sense
 for us to discern/identify that we should not compute these features in an online setting.
 """
+
 import pandas as pd
 import pandera as pa
 
 
@@ -17,6 +17,7 @@
   for input to create features easily with Hamilton. Between these two options you should be able to find a solution
   that works for you. If not, come ask us in slack.
 """
+
 import features
 import named_model_feature_sets
 import offline_loader
 
@@ -10,6 +10,7 @@
 This means they need to be satisfied by either being passed in, or having another module define them.
 We do the latter for this example, but having online_loader define them.
 """
+
 import pandas as pd
 import pandera as pa
 
 
@@ -7,6 +7,7 @@
 
 This will print out predictions as they are computed.
 """
+
 import datetime
 import logging
 import pathlib
@@ -46,7 +47,8 @@ def hamilton_predict(payload: dict):
     for int_key in ["client_id", "budget", "age"]:
         payload[int_key] = int(float(payload[int_key]))
     series_out = dr.execute(
-        ["predictions"], inputs={"survey_event": payload, "execution_time": datetime.datetime.now()}
+        ["predictions"],
+        inputs={"survey_event": payload, "execution_time": datetime.datetime.now()},
     )["predictions"]
     return {"prediction": series_out.values[0], "client_id": payload["client_id"]}
 
 
@@ -2,6 +2,7 @@
 
 It mirrors the code that was presented for the Lineage + Hamilton in 10 minutes blog post.
 """
+
 import pprint
 
 import data_loading
 
@@ -13,6 +13,7 @@
 * In real life, data is generally not normally distributed. There are tests for such non-normal data like the
   Wilcoxon test.
 """
+
 import typing
 from functools import partial
 
@@ -199,7 +200,10 @@ def after_lock(
 
 
 def before_lock(
-    aqi_array: np.ndarray, datetime_index: np.ndarray, after_lock: np.ndarray, before_lock_date: str
+    aqi_array: np.ndarray,
+    datetime_index: np.ndarray,
+    after_lock: np.ndarray,
+    before_lock_date: str,
 ) -> np.ndarray:
     """Grab period before lock down."""
     return aqi_array[np.where(datetime_index <= np.datetime64(before_lock_date))][
 
@@ -16,6 +16,7 @@
 5. You can have non-pandas_udf functions in the same file, and will be run as row based UDFs.
 
 """
+
 import pandas as pd
 
 from hamilton.htypes import column
 
@@ -1,4 +1,5 @@
 """A suite of tools for ad-hoc use"""
+
 import sys
 import types
 import uuid
 
@@ -2,6 +2,7 @@
 It should only import hamilton.node, numpy, pandas.
 It cannot import hamilton.graph, or hamilton.driver.
 """
+
 import abc
 import collections
 import logging
 
@@ -2,6 +2,7 @@
 
 It will get clobbered when sf-hamilton-contrib is installed, which is good.
 """
+
 import logging
 from contextlib import contextmanager
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""Module to house functions for an LLM agent to use."""`
	`2`	`+`
`2`	`3`	`import logging`
`3`	`4`
`4`	`5`	`import arxiv_articles`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`"""A suite of tools for ad-hoc use"""`
	`2`	`+`
`2`	`3`	`import sys`
`3`	`4`	`import types`
`4`	`5`	`import uuid`