Skip to content

Commit 21f33e4

Browse files
michaelchuclaude
andcommitted
Fix code review issues: path validation, dead state, code quality
Security: - Validate file_path in load_csv_data against uploaded_files whitelist using os.path.realpath to prevent path traversal - Thread uploaded_files from agent → execute_tool → handler Code quality: - Move `import os` to top of _data_inspector.py - Replace `assert el.path` with guard clause in app.py - Derive CSV kwarg keys from datafeeds.default_kwargs (DRY) - Use tmp_path fixture instead of hardcoded /tmp path in test Tests: - Add test_load_csv_rejects_non_uploaded_path - Add test_load_csv_allows_uploaded_path Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e7f57f5 commit 21f33e4

5 files changed

Lines changed: 49 additions & 25 deletions

File tree

optopsy/ui/agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ async def chat(
701701
self.datasets,
702702
self.results,
703703
dataset_fingerprint=self._dataset_fingerprint,
704+
uploaded_files=self.uploaded_files,
704705
),
705706
)
706707
# Update dataset and recompute fingerprint. Always

optopsy/ui/app.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -768,7 +768,8 @@ async def on_message(message: cl.Message):
768768
_upload_contexts: list[str] = []
769769
for el in csv_elements:
770770
try:
771-
assert el.path is not None
771+
if not el.path:
772+
continue
772773
raw = pd.read_csv(el.path, nrows=5)
773774
label = el.name
774775
agent.uploaded_files[label] = el.path

optopsy/ui/tools/_data_inspector.py

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Data inspection tool handlers: load_csv_data, preview_data, describe_data, suggest_strategy_params."""
22

33
import json as _json
4+
import os
45

56
import pandas as pd
67

78
import optopsy as op
9+
from optopsy.datafeeds import default_kwargs
810
from optopsy.strategies._helpers import (
911
_DEFAULT_ATM_DELTA,
1012
_DEFAULT_DEEP_ITM_DELTA,
@@ -17,34 +19,30 @@
1719
from ._helpers import _df_summary, _df_to_markdown
1820
from ._schemas import CALENDAR_STRATEGIES
1921

22+
# Column kwargs accepted by csv_data(), derived from default_kwargs to stay DRY.
23+
_CSV_COL_KEYS = tuple(k for k in default_kwargs if k not in ("start_date", "end_date"))
24+
_CSV_KWARG_KEYS = tuple(default_kwargs.keys())
25+
2026

2127
@_register("load_csv_data")
2228
def _handle_load_csv_data(arguments, dataset, signals, datasets, results, _result):
2329
file_path = arguments.get("file_path")
2430
if not file_path:
2531
return _result("file_path is required.")
2632

33+
# Validate file_path against uploaded files to prevent path traversal.
34+
allowed_paths = arguments.get("_uploaded_file_paths")
35+
if allowed_paths is not None:
36+
real_path = os.path.realpath(file_path)
37+
allowed_real = {os.path.realpath(p) for p in allowed_paths}
38+
if real_path not in allowed_real:
39+
return _result(
40+
f"Access denied: '{file_path}' is not a recognized uploaded file."
41+
)
42+
2743
# Build kwargs for csv_data() from the validated arguments.
2844
csv_kwargs = {}
29-
for key in (
30-
"start_date",
31-
"end_date",
32-
"underlying_symbol",
33-
"underlying_price",
34-
"option_type",
35-
"expiration",
36-
"quote_date",
37-
"strike",
38-
"bid",
39-
"ask",
40-
"delta",
41-
"gamma",
42-
"theta",
43-
"vega",
44-
"implied_volatility",
45-
"volume",
46-
"open_interest",
47-
):
45+
for key in _CSV_KWARG_KEYS:
4846
val = arguments.get(key)
4947
if val is not None:
5048
csv_kwargs[key] = val
@@ -54,8 +52,6 @@ def _handle_load_csv_data(arguments, dataset, signals, datasets, results, _resul
5452
except Exception as e:
5553
return _result(f"Failed to load CSV: {e}")
5654

57-
import os
58-
5955
label = os.path.basename(file_path)
6056
updated_datasets = {**datasets, label: df}
6157

optopsy/ui/tools/_executor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ def execute_tool(
210210
datasets: dict[str, pd.DataFrame] | None = None,
211211
results: dict[str, dict] | None = None,
212212
dataset_fingerprint: str | None = None,
213+
uploaded_files: dict[str, str] | None = None,
213214
) -> ToolResult:
214215
"""
215216
Execute a tool call and return a ToolResult.
@@ -261,9 +262,11 @@ def execute_tool(
261262
results=results,
262263
)
263264

264-
# Inject dataset fingerprint AFTER validation so Pydantic doesn't strip it.
265+
# Inject internal metadata AFTER validation so Pydantic doesn't strip it.
265266
if dataset_fingerprint and tool_name in _CACHEABLE_TOOLS:
266267
arguments = {**arguments, "_dataset_fingerprint": dataset_fingerprint}
268+
if uploaded_files and tool_name == "load_csv_data":
269+
arguments = {**arguments, "_uploaded_file_paths": set(uploaded_files.values())}
267270

268271
# Helper to build a ToolResult that always carries state forward.
269272
def _result(

tests/test_tools_integration.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,15 +177,38 @@ def test_load_csv_wrong_mapping_returns_error(self):
177177
)
178178
assert "Failed to load CSV" in r.llm_summary
179179

180-
def test_load_csv_missing_file(self):
180+
def test_load_csv_missing_file(self, tmp_path):
181181
"""Non-existent file returns an error."""
182182
r = execute_tool(
183183
"load_csv_data",
184-
{"file_path": "/tmp/nonexistent_optopsy_test.csv"},
184+
{"file_path": str(tmp_path / "nonexistent.csv")},
185185
None,
186186
)
187187
assert "Failed to load CSV" in r.llm_summary
188188

189+
def test_load_csv_rejects_non_uploaded_path(self):
190+
"""file_path not in uploaded_files is rejected when whitelist is present."""
191+
csv_path = f"{TEST_DATA_DIR}/data_no_underlying_price.csv"
192+
r = execute_tool(
193+
"load_csv_data",
194+
{"file_path": csv_path},
195+
None,
196+
uploaded_files={"other.csv": "/tmp/other.csv"},
197+
)
198+
assert "Access denied" in r.llm_summary
199+
200+
def test_load_csv_allows_uploaded_path(self):
201+
"""file_path in uploaded_files is accepted."""
202+
csv_path = f"{TEST_DATA_DIR}/data_no_underlying_price.csv"
203+
r = execute_tool(
204+
"load_csv_data",
205+
{"file_path": csv_path},
206+
None,
207+
uploaded_files={"data.csv": csv_path},
208+
)
209+
assert r.dataset is not None
210+
assert "Access denied" not in r.llm_summary
211+
189212
def test_load_then_preview(self):
190213
"""Full flow: load_csv_data → preview_data on the loaded dataset."""
191214
csv_path = f"{TEST_DATA_DIR}/data_no_underlying_price.csv"

0 commit comments

Comments
 (0)