Skip to content

Commit 3ce39a1

Browse files
authored
Completes B letter flake8-bugbear rules (#1099)
* Completes Ruff B letter flake8-bugbear rules Refactors to take this rule into account.
1 parent 8251ae4 commit 3ce39a1

File tree

69 files changed

+151
-146
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+151
-146
lines changed

contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def construct_df(
262262
negatives_per_positive: int = 1,
263263
random_seed: int = 123,
264264
) -> pd.DataFrame:
265-
f"""Return dataframe of {base_df} paris with negatives added."""
265+
"""Return dataframe of {base_df} paris with negatives added."""
266266
return pd.concat(
267267
[
268268
base_df,

contrib/hamilton/contrib/user/zilto/lancedb_vdb/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ def table_ref(
5959

6060
try:
6161
table = client.open_table(table_name)
62-
except FileNotFoundError:
62+
except FileNotFoundError as e:
6363
if schema is None:
64-
raise ValueError("`schema` must be provided to create table.")
64+
raise ValueError("`schema` must be provided to create table.") from e
6565

6666
table = _create_table(
6767
client=client,

contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def best_model_per_series(cross_validation_evaluation: pd.DataFrame) -> pd.Serie
125125
def inference_predictions(
126126
forecaster: StatsForecast,
127127
inference_forecast_steps: int = 12,
128-
inference_confidence_percentile: list[float] = [90.0],
128+
inference_confidence_percentile: list[float] = [90.0], # noqa: B006
129129
) -> pd.DataFrame:
130130
"""Infer values using the training harness. Fitted models aren't stored
131131
@@ -141,7 +141,7 @@ def plotting_config(
141141
plot_uids: Optional[list[str]] = None,
142142
plot_models: Optional[list[str]] = None,
143143
plot_anomalies: bool = False,
144-
plot_confidence_percentile: list[float] = [90.0],
144+
plot_confidence_percentile: list[float] = [90.0], # noqa: B006
145145
plot_engine: str = "matplotlib",
146146
) -> dict:
147147
"""Configuration for plotting functions"""

contrib/hamilton/contrib/user/zilto/webscraper/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ def html_page(url: str) -> str:
5454
def parsed_html(
5555
url: str,
5656
html_page: str,
57-
tags_to_extract: List[str] = ["p", "li", "div"],
58-
tags_to_remove: List[str] = ["script", "style"],
57+
tags_to_extract: List[str] = ["p", "li", "div"], # noqa: B006
58+
tags_to_remove: List[str] = ["script", "style"], # noqa: B006
5959
) -> ParsingResult:
6060
"""Parse an HTML string using BeautifulSoup
6161

contrib/hamilton/contrib/user/zilto/xgboost_optuna/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def cross_validation_folds(
133133

134134
def study(
135135
higher_is_better: bool,
136-
pruner: Optional[optuna.pruners.BasePruner] = optuna.pruners.MedianPruner(),
136+
pruner: Optional[optuna.pruners.BasePruner] = None,
137137
sampler: Optional[optuna.samplers.BaseSampler] = None,
138138
study_storage: Optional[str] = None,
139139
study_name: Optional[str] = None,
@@ -142,6 +142,8 @@ def study(
142142
"""Create an optuna study; use the XGBoost + Optuna integration for pruning
143143
ref: https://github.com/optuna/optuna-examples/blob/main/xgboost/xgboost_integration.py
144144
"""
145+
if pruner is None:
146+
pruner = optuna.pruners.MedianPruner()
145147
return optuna.create_study(
146148
direction="maximize" if higher_is_better else "minimize",
147149
pruner=pruner,

contrib/setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
try:
1111
with open("README.md") as readme_file:
1212
readme = readme_file.read()
13-
except Exception:
14-
warnings.warn("README.md not found")
13+
except FileNotFoundError:
14+
warnings.warn("README.md not found") # noqa
1515
readme = None
1616

1717
REQUIREMENTS_FILES = ["requirements.txt"]

examples/LLM_Workflows/GraphRAG/ingest_fighters.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def raw_fighter_details() -> pd.DataFrame:
1717

1818
def fighter(raw_fighter_details: pd.DataFrame) -> Parallelizable[pd.Series]:
1919
"""We then want to do something for each record. That's what this code sets up"""
20-
for idx, row in raw_fighter_details.iterrows():
20+
for _, row in raw_fighter_details.iterrows():
2121
yield row
2222

2323

examples/LLM_Workflows/image_telephone/streamlit.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,7 @@ def explore_display():
403403
image_urls_to_display = image_urls[0 : len(projection)]
404404
if len(image_urls_to_display) != len(projection):
405405
image_url_length = len(image_urls_to_display)
406-
for i in range(len(projection) - len(image_urls_to_display)):
407-
image_urls_to_display.append(image_urls[image_url_length - 1])
406+
image_urls_to_display.append(image_urls[image_url_length - 1])
408407
embedding_path_plot(projection, image_urls_to_display, selected_entry, prompt_path)
409408
# highlight_point(projection, selected_entry)
410409

examples/LLM_Workflows/knowledge_retrieval/state.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def call_arxiv_function(messages, full_message):
137137
return response
138138
except Exception as e:
139139
logger.error(type(e))
140-
raise Exception("Function chat request failed")
140+
raise Exception("Function chat request failed") from e
141141

142142
elif full_message["message"]["function_call"]["name"] == "read_article_and_summarize":
143143
parsed_output = json.loads(full_message["message"]["function_call"]["arguments"])

examples/LLM_Workflows/knowledge_retrieval/summarize_text.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def pdf_text(pdf_path: pd.Series) -> pd.Series:
5656
:return: Series of strings of the PDFs' contents
5757
"""
5858
_pdf_text = []
59-
for i, file_path in pdf_path.items():
59+
for _i, file_path in pdf_path.items():
6060
# creating a pdf reader object
6161
reader = PdfReader(file_path)
6262
text = ""

examples/LLM_Workflows/retrieval_augmented_generation/backend/server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ class SummaryResponse(pydantic.BaseModel):
5959

6060

6161
@app.post("/store_arxiv", tags=["Ingestion"])
62-
async def store_arxiv(arxiv_ids: list[str] = fastapi.Form(...)) -> JSONResponse:
62+
async def store_arxiv(arxiv_ids: list[str] = fastapi.Form(...)) -> JSONResponse: # noqa: B008
6363
"""Retrieve PDF files of arxiv articles for arxiv_ids\n
6464
Read the PDF as text, create chunks, and embed them using OpenAI API\n
6565
Store chunks with embeddings in Weaviate.

examples/LLM_Workflows/scraping_and_chunking/spark/doc_pipeline.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def article_text(url: str, article_regex: str) -> str:
2727
"""
2828
try:
2929
html = requests.get(url)
30-
except requests.exceptions.RequestException:
31-
raise Exception(f"Failed to get URL: {url}")
30+
except requests.exceptions.RequestException as e:
31+
raise Exception(f"Failed to get URL: {url}") from e
3232
article = re.findall(article_regex, html.text, re.DOTALL)
3333
if not article:
3434
raise ValueError(f"No article found in {url}")

examples/LLM_Workflows/scraping_and_chunking/spark/spark_pipeline.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ def sitemap_text(sitemap_url: str = "https://hamilton.dagworks.io/en/latest/site
2727
try:
2828
sitemap = requests.get(sitemap_url)
2929
except Exception as e:
30-
raise RuntimeError(f"Failed to fetch sitemap from {sitemap_url}. Original error: {str(e)}")
30+
raise RuntimeError(
31+
f"Failed to fetch sitemap from {sitemap_url}. Original error: {str(e)}"
32+
) from e
3133
return sitemap.text
3234

3335

examples/dagster/dagster_code/tutorial/assets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def most_frequent_words() -> MaterializeResult:
5555
for raw_title in topstories["title"]:
5656
title = raw_title.lower()
5757
for word in title.split():
58-
cleaned_word = word.strip(".,-!?:;()[]'\"-")
58+
cleaned_word = word.strip(".,-!?:;()[]'\"-") # noqa
5959
if cleaned_word not in stopwords and len(cleaned_word) > 0:
6060
word_counts[cleaned_word] = word_counts.get(cleaned_word, 0) + 1
6161

examples/dagster/dagster_code/tutorial/resources/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def get_signups_for_date(self, date: datetime) -> Sequence[Signup]:
9393
signups = []
9494
num_signups = self.random.randint(25, 100)
9595

96-
for i in range(num_signups):
96+
for _ in range(num_signups):
9797
signup = self.generate_signup(date)
9898
signups.append(signup.to_dict())
9999

examples/dagster/hamilton_code/dataflow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def most_frequent_words(title: pd.Series) -> dict[str, int]:
3131
word_counts = {}
3232
for raw_title in title:
3333
for word in raw_title.lower().split():
34-
word = word.strip(".,-!?:;()[]'\"-")
34+
word = word.strip(".,-!?:;()[]'\"-") # noqa
3535
if len(word) == 0:
3636
continue
3737

examples/dagster/hamilton_code/mock_api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def get_signups_for_date(self, date: datetime) -> Sequence[Signup]:
9494
signups = []
9595
num_signups = self.random.randint(25, 100)
9696

97-
for i in range(num_signups):
97+
for _ in range(num_signups):
9898
signup = self.generate_signup(date)
9999
signups.append(signup.to_dict())
100100

examples/decoupling_io/adapters.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import sklearn.inspection
88
import sklearn.metrics
99
import sklearn.model_selection
10-
except ImportError:
11-
raise NotImplementedError("scikit-learn is not installed.")
10+
except ImportError as e:
11+
raise NotImplementedError("scikit-learn is not installed.") from e
1212

1313

1414
from hamilton import registry

examples/dlt/slack/__init__.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,7 @@ def get_thread_replies(messages: List[Dict[str, Any]]) -> Iterable[TDataItem]:
168168
write_disposition=write_disposition,
169169
)
170170
def messages_resource(
171-
created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
172-
"ts",
173-
initial_value=start_dt,
174-
end_value=end_dt,
175-
allow_external_schedulers=True,
176-
),
171+
created_at: dlt.sources.incremental[DateTime] = None,
177172
) -> Iterable[TDataItem]:
178173
"""
179174
Yield all messages for a set of selected channels as a DLT resource. Keep blocks column without normalization.
@@ -184,19 +179,21 @@ def messages_resource(
184179
Yields:
185180
Iterable[TDataItem]: A list of messages.
186181
"""
182+
if created_at is None:
183+
created_at = dlt.sources.incremental(
184+
"ts",
185+
initial_value=start_dt,
186+
end_value=end_dt,
187+
allow_external_schedulers=True,
188+
)
187189
start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
188190
end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
189191
for channel_data in fetched_selected_channels:
190192
yield from get_messages(channel_data, start_date_ts, end_date_ts)
191193

192194
def per_table_messages_resource(
193195
channel_data: Dict[str, Any],
194-
created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
195-
"ts",
196-
initial_value=start_dt,
197-
end_value=end_dt,
198-
allow_external_schedulers=True,
199-
),
196+
created_at: dlt.sources.incremental[DateTime] = None,
200197
) -> Iterable[TDataItem]:
201198
"""Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
202199
@@ -207,6 +204,13 @@ def per_table_messages_resource(
207204
Yields:
208205
Iterable[TDataItem]: A list of messages.
209206
"""
207+
if created_at is None:
208+
created_at = dlt.sources.incremental(
209+
"ts",
210+
initial_value=start_dt,
211+
end_value=end_dt,
212+
allow_external_schedulers=True,
213+
)
210214
start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
211215
end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
212216
yield from get_messages(channel_data, start_date_ts, end_date_ts)

examples/due_date_probabilities/probability_estimation.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,9 @@ def raw_probabilities(raw_data: str) -> pd.DataFrame:
125125

126126
def resampled(raw_probabilities: pd.DataFrame) -> List[int]:
127127
sample_data = []
128-
for index, row in raw_probabilities.iterrows():
128+
for _idx, row in raw_probabilities.iterrows():
129129
count = row.probability * 1000
130-
for i in range(int(count)):
131-
sample_data.append(row.days)
130+
sample_data.extend([row.days] * int(count))
132131
return sample_data
133132

134133

examples/people_data_labs/analysis.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def stock_growth_rate_since_last_funding_round(
115115
df = pd.merge(left=stock_data, right=period_start, on="ticker", how="inner")
116116

117117
stock_growth = dict()
118-
for idx, row in df.iterrows():
118+
for _, row in df.iterrows():
119119
history = pd.json_normalize(row["historical_price"]).astype({"date": "datetime64[ns]"})
120120

121121
# skip ticker if history is empty

examples/prefect/run.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,15 @@ def train_and_evaluate_model_task(
7272
)
7373
def absenteeism_prediction_flow(
7474
raw_data_location: str = "./data/Absenteeism_at_work.csv",
75-
feature_set: list[str] = [
75+
feature_set: list[str] = [ # noqa: B006
7676
"age_zero_mean_unit_variance",
7777
"has_children",
7878
"has_pet",
7979
"is_summer",
8080
"service_time",
8181
],
8282
label: str = "absenteeism_time_in_hours",
83-
validation_user_ids: list[str] = [
83+
validation_user_ids: list[str] = [ # noqa: B006
8484
"1",
8585
"2",
8686
"4",

examples/spark/world_of_warcraft/zone_features__spark_v1.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,9 @@ def world_of_warcraft(spark_session: ps.SparkSession) -> ps.DataFrame:
1212

1313
def zone_flags(world_of_warcraft: ps.DataFrame) -> ps.DataFrame:
1414
zone_flags = world_of_warcraft
15-
for zone in ["durotar", "darkshore"]:
16-
zone_flags = zone_flags.withColumn(
17-
"darkshore_flag", sf.when(sf.col("zone") == " Darkshore", 1).otherwise(0)
18-
).withColumn("durotar_flag", sf.when(sf.col("zone") == " Durotar", 1).otherwise(0))
15+
zone_flags = zone_flags.withColumn(
16+
"darkshore_flag", sf.when(sf.col("zone") == " Darkshore", 1).otherwise(0)
17+
).withColumn("durotar_flag", sf.when(sf.col("zone") == " Durotar", 1).otherwise(0))
1918
return zone_flags
2019

2120

hamilton/cli/__main__.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def _try_command(cmd: Callable, **cmd_kwargs) -> Any:
127127
command=cmd_name, success=False, message={"error": str(type(e)), "details": str(e)}
128128
)
129129
logger.error(dataclasses.asdict(response))
130-
raise typer.Exit(code=1)
130+
raise typer.Exit(code=1) from e
131131

132132
return result
133133

@@ -297,12 +297,12 @@ def ui(
297297
"""Runs the Hamilton UI on sqllite in port 8241"""
298298
try:
299299
from hamilton_ui import commands
300-
except ImportError:
300+
except ImportError as e:
301301
logger.error(
302302
"hamilton[ui] not installed -- you have to install this to run the UI. "
303303
'Run `pip install "sf-hamilton[ui]"` to install and get started with the UI!'
304304
)
305-
raise typer.Exit(code=1)
305+
raise typer.Exit(code=1) from e
306306

307307
ctx.invoke(
308308
commands.run,

hamilton/cli/logic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ def get_git_base_directory() -> str:
2727
else:
2828
print("Error:", result.stderr.strip())
2929
raise OSError(f"{result.stderr.strip()}")
30-
except FileNotFoundError:
31-
raise FileNotFoundError("Git command not found. Please make sure Git is installed.")
30+
except FileNotFoundError as e:
31+
raise FileNotFoundError("Git command not found. Please make sure Git is installed.") from e
3232

3333

3434
def get_git_reference(git_relative_path: Union[str, Path], git_reference: str) -> str:
@@ -51,8 +51,8 @@ def get_git_reference(git_relative_path: Union[str, Path], git_reference: str) -
5151
return
5252
else:
5353
return
54-
except FileNotFoundError:
55-
raise FileNotFoundError("Git command not found. Please make sure Git is installed.")
54+
except FileNotFoundError as e:
55+
raise FileNotFoundError("Git command not found. Please make sure Git is installed.") from e
5656

5757

5858
def version_hamilton_functions(module: ModuleType) -> Dict[str, str]:
@@ -184,7 +184,7 @@ def diff_versions(current_map: Dict[str, str], reference_map: Dict[str, str]) ->
184184
if v1 != v2:
185185
edit.append(node_name)
186186

187-
for node_name, v2 in reference_map.items():
187+
for node_name, _ in reference_map.items():
188188
v1 = current_map.get(node_name)
189189
if v1 is None:
190190
reference_only.append(node_name)

hamilton/dataflows/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,10 @@ def are_py_dependencies_satisfied(dataflow, user=None, version="latest"):
498498
else:
499499
package_name = line
500500
required_version = None
501-
required_version # here for now...
501+
required_version # noqa here for now...
502502
try:
503503
installed_version = pkg_version(package_name)
504-
installed_version # here for now..
504+
installed_version # noqa here for now..
505505
except PackageNotFoundError:
506506
logger.info(f"Package '{package_name}' is not installed.")
507507
return False

hamilton/execution/executors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def base_execute_task(task: TaskImplementation) -> Dict[str, Any]:
9999
for node_ in task.nodes:
100100
if not getattr(node_, "callable_modified", False):
101101
node_._callable = _modify_callable(node_.node_role, node_.callable)
102-
setattr(node_, "callable_modified", True)
102+
node_.callable_modified = True
103103
if task.adapter.does_hook("pre_task_execute", is_async=False):
104104
task.adapter.call_all_lifecycle_hooks_sync(
105105
"pre_task_execute",

0 commit comments

Comments
 (0)