Skip to content

Commit 7a82b07

Browse files
committed
Fixing so py docs examples are skipped from formatting
This will exclude the code snippets from being formatted because we need them to be skinny. This also upgrades pre-commit hooks and fixes any issues arizing from the new updates.
1 parent 040defc commit 7a82b07

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+326
-121
lines changed

.pre-commit-config.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
# Then install the hooks within the repo:
44
# $ cd /PATH/TO/REPO
55
# $ pre-commit install
6-
6+
exclude: '^docs/code-comparisons/' # skip the code comparisons directory
77
repos:
88
- repo: https://github.com/ambv/black
9-
rev: 23.11.0
9+
rev: 24.1.1
1010
hooks:
1111
- id: black
1212
args: [--line-length=100, --exclude=docs/*]
@@ -22,15 +22,15 @@ repos:
2222
- id: check-ast
2323
# isort python package import sorting
2424
- repo: https://github.com/pycqa/isort
25-
rev: '5.12.0'
25+
rev: '5.13.2'
2626
hooks:
2727
- id: isort
2828
args: ["--profile", "black",
2929
"--line-length=100",
30-
"--extend-skip=docs/*/*/*.py",
30+
"--skip=docs/",
3131
"--known-local-folder",
3232
"tests", "-p", "hamilton"]
3333
- repo: https://github.com/pycqa/flake8
34-
rev: 6.1.0
34+
rev: 7.0.0
3535
hooks:
3636
- id: flake8

contrib/docs/compile_docs.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
dataflow python files and information we have.
1111
6. We then will trigger a build of the docs; the docs can serve the latest commit version!
1212
"""
13+
1314
import json
1415
import os
1516
import shutil

contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py

+18-5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
SOFTWARE.
2323
----------------------------------------------------------------------------------------------
2424
"""
25+
2526
import logging
2627
import os
2728
import pickle # for saving the embeddings cache
@@ -42,7 +43,9 @@
4243
import plotly.express as px # for plots
4344
import plotly.graph_objs as go # for plot object type
4445
import requests
45-
from sklearn.model_selection import train_test_split # for splitting train & test data
46+
from sklearn.model_selection import (
47+
train_test_split,
48+
) # for splitting train & test data
4649
import torch # for matrix optimization
4750
from tenacity import retry, stop_after_attempt, wait_random_exponential
4851

@@ -243,8 +246,14 @@ def test_df_negatives(base_test_df: pd.DataFrame) -> pd.DataFrame:
243246

244247

245248
@parameterize(
246-
train_df={"base_df": source("base_train_df"), "df_negatives": source("train_df_negatives")},
247-
test_df={"base_df": source("base_test_df"), "df_negatives": source("test_df_negatives")},
249+
train_df={
250+
"base_df": source("base_train_df"),
251+
"df_negatives": source("train_df_negatives"),
252+
},
253+
test_df={
254+
"base_df": source("base_test_df"),
255+
"df_negatives": source("test_df_negatives"),
256+
},
248257
)
249258
def construct_df(
250259
base_df: pd.DataFrame,
@@ -631,7 +640,9 @@ def mse_loss(predictions, targets):
631640
@inject(
632641
optimization_result_matrices=group(*[source(k) for k in optimization_parameterization.keys()])
633642
)
634-
def optimization_results(optimization_result_matrices: List[pd.DataFrame]) -> pd.DataFrame:
643+
def optimization_results(
644+
optimization_result_matrices: List[pd.DataFrame],
645+
) -> pd.DataFrame:
635646
"""Combine optimization results into one dataframe."""
636647
return pd.concat(optimization_result_matrices)
637648

@@ -685,7 +696,9 @@ def customized_embeddings_dataframe(
685696
return embedded_data_set
686697

687698

688-
def customized_dataset_histogram(customized_embeddings_dataframe: pd.DataFrame) -> go.Figure:
699+
def customized_dataset_histogram(
700+
customized_embeddings_dataframe: pd.DataFrame,
701+
) -> go.Figure:
689702
"""Plot histogram of cosine similarities for the new customized embeddings.
690703
691704
The graphs show how much the overlap there is between the distribution of cosine similarities for similar and

docs/data_adapters_extension.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -107,18 +107,22 @@ def from_loader(loader: Type[hamilton.io.data_adapters.DataLoader]) -> "AdapterI
107107
key=loader.name(),
108108
class_name=loader.__name__,
109109
class_path=loader.__module__,
110-
load_params=[
111-
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
112-
for p in dataclasses.fields(loader)
113-
]
114-
if issubclass(loader, hamilton.io.data_adapters.DataLoader)
115-
else None,
116-
save_params=[
117-
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
118-
for p in dataclasses.fields(loader)
119-
]
120-
if issubclass(loader, hamilton.io.data_adapters.DataSaver)
121-
else None,
110+
load_params=(
111+
[
112+
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
113+
for p in dataclasses.fields(loader)
114+
]
115+
if issubclass(loader, hamilton.io.data_adapters.DataLoader)
116+
else None
117+
),
118+
save_params=(
119+
[
120+
Param(name=p.name, type=get_class_repr(p.type), default=get_default(p))
121+
for p in dataclasses.fields(loader)
122+
]
123+
if issubclass(loader, hamilton.io.data_adapters.DataSaver)
124+
else None
125+
),
122126
applicable_types=[get_class_repr(t) for t in loader.applicable_types()],
123127
file_=inspect.getfile(loader),
124128
line_nos=get_lines_for_class(loader),

examples/LLM_Workflows/knowledge_retrieval/functions.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Module to house functions for an LLM agent to use."""
2+
23
import logging
34

45
import arxiv_articles

examples/LLM_Workflows/knowledge_retrieval/state.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Module that contains code to house state for an agent. The dialog
33
right now is hardcoded at the bottom of this file.
44
"""
5+
56
import json
67
import logging
78
import sys

examples/airflow/plugins/function_modules/data_loaders.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
88
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
99
"""
10+
1011
from typing import List
1112

1213
import pandas as pd

examples/airflow/plugins/function_modules/feature_logic.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
integration - see `examples/data_quality/pandera` for an example.
1414
1515
"""
16+
1617
import numpy as np
1718
import pandas as pd
1819

examples/data_quality/pandera/data_loaders.py

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
1010
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
1111
"""
12+
1213
from typing import List
1314

1415
import pandas as pd

examples/data_quality/pandera/feature_logic.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
(4) If you require dataframe validation - see the examples here.
1717
1818
"""
19+
1920
import numpy as np
2021
import pandas as pd
2122
import pandera as pa

examples/data_quality/pandera/feature_logic_spark.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
2. The data type checks on the output of functions are different. E.g. float vs np.float64. Execution on spark
99
results in different data types.
1010
"""
11+
1112
import numpy as np
1213
import pandas as pd
1314
import pandera as pa

examples/data_quality/pandera/run_ray.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
To run:
1414
> python run_ray.py
1515
"""
16+
1617
import logging
1718
import sys
1819

examples/data_quality/simple/data_loaders.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
(2) instead of @config.when* we could instead move these functions into specific independent modules, and then in
88
the driver choose which one to use for the DAG. For the purposes of this example, we decided one file is simpler.
99
"""
10+
1011
from typing import List
1112

1213
import pandas as pd

examples/data_quality/simple/feature_logic.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
integration - see `examples/data_quality/pandera` for an example.
1414
1515
"""
16+
1617
import numpy as np
1718
import pandas as pd
1819

examples/data_quality/simple/run_ray.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
To run:
1414
> python run_ray.py
1515
"""
16+
1617
import logging
1718
import sys
1819

examples/dbt/python_transforms/data_loader.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
This module contains our data loading functions.
33
"""
4+
45
from typing import List
56

67
import pandas as pd

examples/dbt/python_transforms/feature_transforms.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
This is a module that contains our feature transforms.
33
"""
4+
45
import pickle
56
from typing import Set
67

examples/dbt/python_transforms/model_pipeline.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
This is a module that contains our "model fitting and related" transforms.
33
"""
4+
45
import pickle
56
from typing import Dict
67

@@ -43,7 +44,9 @@ def train_test_split(
4344

4445
@config.when(model_to_use="create_new")
4546
def fit_model__create_new(
46-
model_classifier: base.ClassifierMixin, train_set: pd.DataFrame, target_column_name: str
47+
model_classifier: base.ClassifierMixin,
48+
train_set: pd.DataFrame,
49+
target_column_name: str,
4750
) -> base.ClassifierMixin:
4851
"""Fits a new model.
4952

examples/decoupling_io/components/feature_data.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
This is a module that contains our feature transforms.
33
"""
4+
45
from typing import Dict, List, Set
56

67
import pandas as pd

examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/etl.py

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
Here we ONLY use Hamilton to create the features for your training set, with comment stubs for the rest of the ETL
66
that would normally be here.
77
"""
8+
89
import features
910
import named_model_feature_sets
1011
import offline_loader

examples/feature_engineering/feature_engineering_multiple_contexts/scenario_1/features.py

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Note (2): we can tag the `aggregation` features with whatever key value pair makes sense
1010
for us to discern/identify that we should not compute these features in an online setting.
1111
"""
12+
1213
import pandas as pd
1314
import pandera as pa
1415

examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/etl.py

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
for input to create features easily with Hamilton. Between these two options you should be able to find a solution
1818
that works for you. If not, come ask us in slack.
1919
"""
20+
2021
import features
2122
import named_model_feature_sets
2223
import offline_loader

examples/feature_engineering/feature_engineering_multiple_contexts/scenario_2/features.py

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
This means they need to be satisfied by either being passed in, or having another module define them.
1111
We do the latter for this example, but having online_loader define them.
1212
"""
13+
1314
import pandas as pd
1415
import pandera as pa
1516

examples/feature_engineering/write_once_run_everywhere_blog_post/contexts/streaming.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
88
This will print out predictions as they are computed.
99
"""
10+
1011
import datetime
1112
import logging
1213
import pathlib
@@ -46,7 +47,8 @@ def hamilton_predict(payload: dict):
4647
for int_key in ["client_id", "budget", "age"]:
4748
payload[int_key] = int(float(payload[int_key]))
4849
series_out = dr.execute(
49-
["predictions"], inputs={"survey_event": payload, "execution_time": datetime.datetime.now()}
50+
["predictions"],
51+
inputs={"survey_event": payload, "execution_time": datetime.datetime.now()},
5052
)["predictions"]
5153
return {"prediction": series_out.values[0], "client_id": payload["client_id"]}
5254

examples/lineage/lineage_script.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
It mirrors the code that was presented for the Lineage + Hamilton in 10 minutes blog post.
44
"""
5+
56
import pprint
67

78
import data_loading

examples/numpy/air-quality-analysis/analysis_flow.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
* In real life, data is generally not normally distributed. There are tests for such non-normal data like the
1414
Wilcoxon test.
1515
"""
16+
1617
import typing
1718
from functools import partial
1819

@@ -199,7 +200,10 @@ def after_lock(
199200

200201

201202
def before_lock(
202-
aqi_array: np.ndarray, datetime_index: np.ndarray, after_lock: np.ndarray, before_lock_date: str
203+
aqi_array: np.ndarray,
204+
datetime_index: np.ndarray,
205+
after_lock: np.ndarray,
206+
before_lock_date: str,
203207
) -> np.ndarray:
204208
"""Grab period before lock down."""
205209
return aqi_array[np.where(datetime_index <= np.datetime64(before_lock_date))][

examples/spark/pyspark_udfs/pandas_udfs.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
5. You can have non-pandas_udf functions in the same file, and will be run as row based UDFs.
1717
1818
"""
19+
1920
import pandas as pd
2021

2122
from hamilton.htypes import column

hamilton/ad_hoc_utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""A suite of tools for ad-hoc use"""
2+
23
import sys
34
import types
45
import uuid

hamilton/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
It should only import hamilton.node, numpy, pandas.
33
It cannot import hamilton.graph, or hamilton.driver.
44
"""
5+
56
import abc
67
import collections
78
import logging

hamilton/contrib/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
It will get clobbered when sf-hamilton-contrib is installed, which is good.
44
"""
5+
56
import logging
67
from contextlib import contextmanager
78

0 commit comments

Comments
 (0)