Skip to content

Commit 199f77a

Browse files
committed
feat: add security scanner and hypothesis registry
1 parent d20b590 commit 199f77a

12 files changed

Lines changed: 1343 additions & 26 deletions

agent/src/hypotheses/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""Durable research hypothesis registry."""
2+
3+
from src.hypotheses.registry import (
4+
HYPOTHESIS_STATUSES,
5+
Hypothesis,
6+
HypothesisRegistry,
7+
default_hypotheses_path,
8+
)
9+
10+
__all__ = [
11+
"HYPOTHESIS_STATUSES",
12+
"Hypothesis",
13+
"HypothesisRegistry",
14+
"default_hypotheses_path",
15+
]

agent/src/hypotheses/registry.py

Lines changed: 371 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,371 @@
1+
"""Pure-code durable research hypothesis registry.
2+
3+
The registry is intentionally small: local JSON storage, deterministic reads,
4+
and no dependency on LLMs or live trading services.
5+
"""
6+
7+
from __future__ import annotations
8+
9+
import hashlib
10+
import json
11+
import os
12+
import re
13+
from dataclasses import asdict, dataclass, field
14+
from datetime import datetime, timezone
15+
from pathlib import Path
16+
from typing import Any
17+
18+
HYPOTHESIS_STATUSES = (
19+
"exploring",
20+
"testing",
21+
"validated",
22+
"rejected",
23+
"monitoring",
24+
)
25+
_STATUS_SET = set(HYPOTHESIS_STATUSES)
26+
_ENV_PATH = "VIBE_TRADING_HYPOTHESES_PATH"
27+
_TOKEN_RE = re.compile(r"[a-zA-Z0-9]{2,}|[\u4e00-\u9fff]")
28+
29+
30+
def default_hypotheses_path() -> Path:
31+
"""Return the configured hypotheses JSON path.
32+
33+
Returns:
34+
Env override path when ``VIBE_TRADING_HYPOTHESES_PATH`` is set,
35+
otherwise ``~/.vibe-trading/hypotheses.json``.
36+
"""
37+
override = os.environ.get(_ENV_PATH, "").strip()
38+
if override:
39+
return Path(override).expanduser()
40+
return Path.home() / ".vibe-trading" / "hypotheses.json"
41+
42+
43+
def _utc_now() -> str:
44+
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
45+
46+
47+
def _coerce_str_list(value: Any) -> list[str]:
48+
if value is None:
49+
return []
50+
if isinstance(value, str):
51+
return [value] if value else []
52+
if isinstance(value, list):
53+
return [str(item) for item in value if str(item)]
54+
return [str(value)]
55+
56+
57+
def _tokenize(text: str) -> set[str]:
58+
return set(_TOKEN_RE.findall(text.lower()))
59+
60+
61+
def _new_hypothesis_id(title: str, created_at: str, existing_ids: set[str]) -> str:
62+
seed = f"{title.strip().lower()}|{created_at}"
63+
base = "hyp_" + hashlib.sha256(seed.encode("utf-8")).hexdigest()[:12]
64+
if base not in existing_ids:
65+
return base
66+
idx = 2
67+
while f"{base}_{idx}" in existing_ids:
68+
idx += 1
69+
return f"{base}_{idx}"
70+
71+
72+
def _validate_status(status: str) -> str:
73+
normalized = str(status).strip().lower()
74+
if normalized not in _STATUS_SET:
75+
allowed = ", ".join(HYPOTHESIS_STATUSES)
76+
raise ValueError(f"unknown hypothesis status '{status}'. Allowed: {allowed}")
77+
return normalized
78+
79+
80+
@dataclass
81+
class Hypothesis:
82+
"""A research hypothesis tracked across analysis and backtests.
83+
84+
Attributes:
85+
hypothesis_id: Stable registry identifier.
86+
title: Short human-readable title.
87+
thesis: Research thesis or rationale.
88+
status: Lifecycle status.
89+
universe: Target universe, market, or asset set.
90+
signal_definition: Signal logic in plain text.
91+
data_sources: Data sources expected or used.
92+
skills: Relevant Vibe-Trading skills.
93+
run_cards: Linked backtest/run-card artifacts.
94+
invalidation_notes: Notes describing rejection or invalidation logic.
95+
created_at: UTC creation timestamp.
96+
updated_at: UTC last update timestamp.
97+
"""
98+
99+
hypothesis_id: str
100+
title: str
101+
thesis: str
102+
status: str = "exploring"
103+
universe: str = ""
104+
signal_definition: str = ""
105+
data_sources: list[str] = field(default_factory=list)
106+
skills: list[str] = field(default_factory=list)
107+
run_cards: list[dict[str, Any]] = field(default_factory=list)
108+
invalidation_notes: str = ""
109+
created_at: str = ""
110+
updated_at: str = ""
111+
112+
def to_dict(self) -> dict[str, Any]:
113+
"""Serialize the hypothesis to plain JSON-compatible data."""
114+
return asdict(self)
115+
116+
@classmethod
117+
def from_dict(cls, data: dict[str, Any]) -> "Hypothesis":
118+
"""Build a hypothesis from persisted JSON data.
119+
120+
Args:
121+
data: Raw dictionary loaded from storage.
122+
123+
Returns:
124+
Parsed hypothesis with defaults for missing MVP fields.
125+
"""
126+
now = _utc_now()
127+
return cls(
128+
hypothesis_id=str(data.get("hypothesis_id", "")),
129+
title=str(data.get("title", "")),
130+
thesis=str(data.get("thesis", "")),
131+
status=_validate_status(str(data.get("status", "exploring"))),
132+
universe=str(data.get("universe", "")),
133+
signal_definition=str(data.get("signal_definition", "")),
134+
data_sources=_coerce_str_list(data.get("data_sources")),
135+
skills=_coerce_str_list(data.get("skills")),
136+
run_cards=list(data.get("run_cards") or data.get("backtests") or []),
137+
invalidation_notes=str(data.get("invalidation_notes", "")),
138+
created_at=str(data.get("created_at") or now),
139+
updated_at=str(data.get("updated_at") or data.get("created_at") or now),
140+
)
141+
142+
143+
class HypothesisRegistry:
144+
"""File-backed registry for research hypotheses."""
145+
146+
def __init__(self, path: Path | None = None) -> None:
147+
"""Initialize the registry.
148+
149+
Args:
150+
path: Optional storage path. Defaults to env override or
151+
``~/.vibe-trading/hypotheses.json``.
152+
"""
153+
self.path = path or default_hypotheses_path()
154+
self.path.parent.mkdir(parents=True, exist_ok=True)
155+
156+
def create(
157+
self,
158+
*,
159+
title: str,
160+
thesis: str,
161+
status: str = "exploring",
162+
universe: str = "",
163+
signal_definition: str = "",
164+
data_sources: list[str] | None = None,
165+
skills: list[str] | None = None,
166+
invalidation_notes: str = "",
167+
) -> Hypothesis:
168+
"""Create and persist a new hypothesis.
169+
170+
Args:
171+
title: Short hypothesis title.
172+
thesis: Research thesis or rationale.
173+
status: Initial lifecycle status.
174+
universe: Target market or asset universe.
175+
signal_definition: Signal logic.
176+
data_sources: Source names.
177+
skills: Related Vibe-Trading skills.
178+
invalidation_notes: Initial invalidation notes.
179+
180+
Returns:
181+
Created hypothesis.
182+
183+
Raises:
184+
ValueError: If title/thesis/status are invalid.
185+
"""
186+
title = title.strip()
187+
thesis = thesis.strip()
188+
if not title:
189+
raise ValueError("title is required")
190+
if not thesis:
191+
raise ValueError("thesis is required")
192+
193+
records = self.list()
194+
now = _utc_now()
195+
hyp = Hypothesis(
196+
hypothesis_id=_new_hypothesis_id(title, now, {h.hypothesis_id for h in records}),
197+
title=title,
198+
thesis=thesis,
199+
status=_validate_status(status),
200+
universe=universe.strip(),
201+
signal_definition=signal_definition.strip(),
202+
data_sources=_coerce_str_list(data_sources),
203+
skills=_coerce_str_list(skills),
204+
invalidation_notes=invalidation_notes.strip(),
205+
created_at=now,
206+
updated_at=now,
207+
)
208+
records.append(hyp)
209+
self._save(records)
210+
return hyp
211+
212+
def update(
213+
self,
214+
hypothesis_id: str,
215+
*,
216+
title: str | None = None,
217+
thesis: str | None = None,
218+
status: str | None = None,
219+
universe: str | None = None,
220+
signal_definition: str | None = None,
221+
data_sources: list[str] | None = None,
222+
skills: list[str] | None = None,
223+
invalidation_notes: str | None = None,
224+
) -> Hypothesis:
225+
"""Update an existing hypothesis.
226+
227+
Args:
228+
hypothesis_id: Registry identifier.
229+
title: Optional replacement title.
230+
thesis: Optional replacement thesis.
231+
status: Optional lifecycle status.
232+
universe: Optional replacement universe.
233+
signal_definition: Optional replacement signal definition.
234+
data_sources: Optional replacement source list.
235+
skills: Optional replacement skill list.
236+
invalidation_notes: Optional replacement invalidation notes.
237+
238+
Returns:
239+
Updated hypothesis.
240+
241+
Raises:
242+
KeyError: If the hypothesis does not exist.
243+
ValueError: If status is unknown.
244+
"""
245+
records = self.list()
246+
hyp = self._find_required(records, hypothesis_id)
247+
if title is not None:
248+
hyp.title = title.strip()
249+
if thesis is not None:
250+
hyp.thesis = thesis.strip()
251+
if status is not None:
252+
hyp.status = _validate_status(status)
253+
if universe is not None:
254+
hyp.universe = universe.strip()
255+
if signal_definition is not None:
256+
hyp.signal_definition = signal_definition.strip()
257+
if data_sources is not None:
258+
hyp.data_sources = _coerce_str_list(data_sources)
259+
if skills is not None:
260+
hyp.skills = _coerce_str_list(skills)
261+
if invalidation_notes is not None:
262+
hyp.invalidation_notes = invalidation_notes.strip()
263+
hyp.updated_at = _utc_now()
264+
self._save(records)
265+
return hyp
266+
267+
def link_backtest(
268+
self,
269+
hypothesis_id: str,
270+
*,
271+
run_card_path: str = "",
272+
backtest_run_dir: str = "",
273+
metrics: dict[str, Any] | None = None,
274+
notes: str = "",
275+
) -> Hypothesis:
276+
"""Link a run card or backtest artifact to a hypothesis.
277+
278+
Args:
279+
hypothesis_id: Registry identifier.
280+
run_card_path: Optional path to a run_card.json.
281+
backtest_run_dir: Optional backtest run directory.
282+
metrics: Optional metrics summary.
283+
notes: Optional human note about the link.
284+
285+
Returns:
286+
Updated hypothesis.
287+
288+
Raises:
289+
KeyError: If the hypothesis does not exist.
290+
ValueError: If no run card or run directory is provided.
291+
"""
292+
if not run_card_path and not backtest_run_dir:
293+
raise ValueError("run_card_path or backtest_run_dir is required")
294+
records = self.list()
295+
hyp = self._find_required(records, hypothesis_id)
296+
hyp.run_cards.append({
297+
"run_card_path": run_card_path,
298+
"backtest_run_dir": backtest_run_dir,
299+
"metrics": metrics or {},
300+
"notes": notes,
301+
"linked_at": _utc_now(),
302+
})
303+
hyp.updated_at = _utc_now()
304+
self._save(records)
305+
return hyp
306+
307+
def search(
308+
self,
309+
*,
310+
query: str = "",
311+
status: str | None = None,
312+
limit: int = 10,
313+
) -> list[Hypothesis]:
314+
"""Search hypotheses by text and/or status.
315+
316+
Args:
317+
query: Text query over title, thesis, universe, signal, sources,
318+
skills, notes, and links.
319+
status: Optional status filter.
320+
limit: Maximum results.
321+
322+
Returns:
323+
Matching hypotheses ordered by score then most recently updated.
324+
325+
Raises:
326+
ValueError: If status is unknown.
327+
"""
328+
status_filter = _validate_status(status) if status else None
329+
query_tokens = _tokenize(query)
330+
scored: list[tuple[int, Hypothesis]] = []
331+
for hyp in self.list():
332+
if status_filter and hyp.status != status_filter:
333+
continue
334+
haystack = json.dumps(hyp.to_dict(), ensure_ascii=False, sort_keys=True)
335+
if not query_tokens:
336+
score = 1
337+
else:
338+
hay_tokens = _tokenize(haystack)
339+
score = len(query_tokens & hay_tokens)
340+
if score > 0:
341+
scored.append((score, hyp))
342+
scored.sort(key=lambda item: (item[0], item[1].updated_at), reverse=True)
343+
return [hyp for _, hyp in scored[: max(1, min(int(limit), 100))]]
344+
345+
def list(self) -> list[Hypothesis]:
346+
"""Load all hypotheses from storage."""
347+
if not self.path.exists():
348+
return []
349+
try:
350+
raw = json.loads(self.path.read_text(encoding="utf-8"))
351+
except json.JSONDecodeError as exc:
352+
raise ValueError(f"invalid hypotheses storage JSON: {self.path}") from exc
353+
if not isinstance(raw, list):
354+
raise ValueError("hypotheses storage must contain a JSON list")
355+
return [Hypothesis.from_dict(item) for item in raw if isinstance(item, dict)]
356+
357+
def _save(self, records: list[Hypothesis]) -> None:
358+
payload = [hyp.to_dict() for hyp in sorted(records, key=lambda h: h.created_at)]
359+
tmp_path = self.path.with_suffix(self.path.suffix + ".tmp")
360+
tmp_path.write_text(
361+
json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True),
362+
encoding="utf-8",
363+
)
364+
tmp_path.replace(self.path)
365+
366+
@staticmethod
367+
def _find_required(records: list[Hypothesis], hypothesis_id: str) -> Hypothesis:
368+
for hyp in records:
369+
if hyp.hypothesis_id == hypothesis_id:
370+
return hyp
371+
raise KeyError(f"hypothesis not found: {hypothesis_id}")

agent/src/security/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
"""Security helpers for tool output sanitization and warning metadata."""
2+

0 commit comments

Comments
 (0)