From 1735e22f582e07f68139499ab57e76ef40dee5ab Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 20:22:11 -0400 Subject: [PATCH 01/10] feat: add spec complexity scoring Add a `complexity_score()` method to `Spec` that estimates verification difficulty based on weighted contributions from postconditions, edge cases, description length, preconditions, and invariants. Returns a float in [0, 1]. Includes tests for boundary conditions and weight dominance. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/vericode/spec.py | 27 ++++++++++++++++++++ tests/test_spec.py | 59 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/vericode/spec.py b/src/vericode/spec.py index fba58cd..b6a7430 100644 --- a/src/vericode/spec.py +++ b/src/vericode/spec.py @@ -45,6 +45,33 @@ class Spec(BaseModel): invariants: list[str] = Field(default_factory=list) edge_cases: list[str] = Field(default_factory=list) + def complexity_score(self) -> float: + """Estimate how difficult this spec is to verify. + + The score is a float in [0, 1] computed from: + - Number of postconditions (weight 0.35) + - Number of edge cases (weight 0.25) + - Description length (weight 0.20) + - Number of preconditions (weight 0.10) + - Number of invariants (weight 0.10) + + A higher score means the spec is expected to be harder to prove. + """ + postcond_score = min(len(self.postconditions) / 5.0, 1.0) + edge_score = min(len(self.edge_cases) / 4.0, 1.0) + desc_score = min(len(self.description) / 500.0, 1.0) + precond_score = min(len(self.preconditions) / 4.0, 1.0) + invariant_score = min(len(self.invariants) / 3.0, 1.0) + + raw = ( + 0.35 * postcond_score + + 0.25 * edge_score + + 0.20 * desc_score + + 0.10 * precond_score + + 0.10 * invariant_score + ) + return round(min(raw, 1.0), 4) + @model_validator(mode="after") def _infer_function_name(self) -> Spec: """Best-effort extraction of a function name from the description.""" diff --git a/tests/test_spec.py b/tests/test_spec.py index e5158de..02e0967 100644 --- a/tests/test_spec.py +++ b/tests/test_spec.py @@ -71,6 +71,65 @@ def test_empty_defaults(self) -> None: assert spec.edge_cases == [] +# --------------------------------------------------------------------------- +# Complexity scoring tests +# --------------------------------------------------------------------------- + + +class TestComplexityScore: + """Tests for ``Spec.complexity_score()``.""" + + def test_minimal_spec_low_score(self) -> None: + """A bare-bones spec should have a very low complexity score.""" + spec = Spec(description="Sort a list") + score = spec.complexity_score() + assert 0.0 <= score <= 0.15 + + def test_rich_spec_higher_score(self) -> None: + """A spec with many constraints should score higher.""" + spec = Spec( + description="Merge two sorted lists into one sorted list " * 10, + postconditions=[ + "is_sorted(result)", + "len(result) == len(a) + len(b)", + "is_permutation(result, a + b)", + ], + edge_cases=["a == []", "b == []", "len(a) == 1"], + preconditions=["is_sorted(a)", "is_sorted(b)"], + invariants=["len(merged) <= len(a) + len(b)"], + ) + score = spec.complexity_score() + assert score > 0.5 + + def test_score_bounded_zero_to_one(self) -> None: + """Even an extreme spec must return a score in [0, 1].""" + spec = Spec( + description="x" * 2000, + postconditions=[f"p{i}" for i in range(20)], + edge_cases=[f"e{i}" for i in range(20)], + preconditions=[f"pre{i}" for i in range(20)], + invariants=[f"inv{i}" for i in range(20)], + ) + score = spec.complexity_score() + assert score == 1.0 + + def test_empty_description_zero_desc_component(self) -> None: + """Description component should be zero for a very short description.""" + spec = Spec(description="x") + score = spec.complexity_score() + # Only desc contributes and it's ~0.002 / 500 -> tiny + assert score < 0.01 + + def test_postconditions_are_dominant_factor(self) -> None: + """Postconditions have the highest weight (0.35).""" + base = Spec(description="Sort a list") + with_post = Spec( + description="Sort a list", + postconditions=["is_sorted(result)", "is_permutation(result, input)"], + ) + assert with_post.complexity_score() > base.complexity_score() + + # --------------------------------------------------------------------------- # parse_spec tests # --------------------------------------------------------------------------- From ce720c2fd6b29ef28c32b6c09828495d2d848328 Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 20:26:58 -0400 Subject: [PATCH 02/10] feat: add verification cache with content-addressed storage Cache successful verification results keyed by SHA-256 of (canonical_spec + backend + provider). Subsequent runs with identical inputs return the cached result immediately, skipping the LLM and proof-assistant pipeline. Add --no-cache flag to the CLI verify command. Tests use an autouse fixture to isolate the cache per test. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/vericode/cache.py | 145 ++++++++++++++++++++++++ src/vericode/cli.py | 3 + src/vericode/verifier.py | 44 +++++++- tests/conftest.py | 13 +++ tests/test_cache.py | 201 ++++++++++++++++++++++++++++++++++ tests/test_data_structures.py | 4 + tests/test_verifier.py | 8 ++ 7 files changed, 417 insertions(+), 1 deletion(-) create mode 100644 src/vericode/cache.py create mode 100644 tests/test_cache.py diff --git a/src/vericode/cache.py b/src/vericode/cache.py new file mode 100644 index 0000000..ba456f9 --- /dev/null +++ b/src/vericode/cache.py @@ -0,0 +1,145 @@ +"""Content-addressed verification cache. + +Caches successful ``VerificationOutput`` results keyed by a hash of +(spec + backend + provider) so that repeated runs with the same inputs +can skip the expensive LLM + proof-assistant pipeline. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from pathlib import Path + +from vericode.artifacts import canonical_spec, sha256_hex +from vericode.spec import Spec + +logger = logging.getLogger(__name__) + +_DEFAULT_CACHE_DIR = Path.home() / ".cache" / "vericode" + + +@dataclass +class CacheEntry: + """A single cached verification result. + + Attributes: + cache_key: The content-addressed hash key. + code: Verified implementation source code. + proof: Verified proof source code. + backend: Backend used for verification. + language: Target language of the implementation. + certificate_json: JSON string of the proof certificate. + """ + + cache_key: str + code: str + proof: str + backend: str + language: str + certificate_json: str + metadata: dict[str, str] = field(default_factory=dict) + + +def cache_key(spec: Spec, backend: str, provider: str) -> str: + """Compute a content-addressed cache key. + + The key is a SHA-256 hex digest derived from the canonical spec + representation, the backend name, and the provider name. + + Args: + spec: The specification being verified. + backend: Name of the verification backend. + provider: Name of the LLM provider. + + Returns: + A 64-character hex digest string. + """ + canonical = canonical_spec(spec) + combined = f"{canonical}|{backend.lower()}|{provider.lower()}" + return sha256_hex(combined) + + +class VerificationCache: + """File-backed content-addressed cache for verification results. + + Each entry is stored as a JSON file named ``.json`` under + the cache directory. + + Args: + cache_dir: Directory for cache files. Created on first write. + """ + + def __init__(self, cache_dir: Path | None = None) -> None: + self._cache_dir = cache_dir or _DEFAULT_CACHE_DIR + + @property + def cache_dir(self) -> Path: + """Return the cache directory path.""" + return self._cache_dir + + def get(self, key: str) -> CacheEntry | None: + """Look up a cached entry by key. + + Args: + key: The content-addressed cache key. + + Returns: + A ``CacheEntry`` if found, otherwise ``None``. + """ + path = self._cache_dir / f"{key}.json" + if not path.exists(): + logger.debug("Cache miss", extra={"key": key[:12]}) + return None + + try: + data = json.loads(path.read_text()) + logger.info("Cache hit", extra={"key": key[:12]}) + return CacheEntry( + cache_key=data["cache_key"], + code=data["code"], + proof=data["proof"], + backend=data["backend"], + language=data["language"], + certificate_json=data["certificate_json"], + metadata=data.get("metadata", {}), + ) + except (json.JSONDecodeError, KeyError, OSError) as exc: + logger.warning("Corrupt cache entry, ignoring", extra={"error": str(exc)}) + return None + + def put(self, entry: CacheEntry) -> None: + """Write a cache entry to disk. + + Args: + entry: The cache entry to persist. + """ + self._cache_dir.mkdir(parents=True, exist_ok=True) + path = self._cache_dir / f"{entry.cache_key}.json" + data = { + "cache_key": entry.cache_key, + "code": entry.code, + "proof": entry.proof, + "backend": entry.backend, + "language": entry.language, + "certificate_json": entry.certificate_json, + "metadata": entry.metadata, + } + path.write_text(json.dumps(data, indent=2)) + logger.info("Cached verification result", extra={"key": entry.cache_key[:12]}) + + def clear(self) -> int: + """Remove all cache entries. + + Returns: + The number of entries removed. + """ + if not self._cache_dir.exists(): + return 0 + removed = 0 + for path in self._cache_dir.glob("*.json"): + path.unlink() + removed += 1 + logger.info("Cleared cache", extra={"removed": removed}) + return removed diff --git a/src/vericode/cli.py b/src/vericode/cli.py index 25b6626..7ea01ab 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -114,6 +114,7 @@ def main(ctx: click.Context, verbose: bool) -> None: help="LLM provider.", ) @click.option("--output", "-o", type=click.Path(), help="Write results to a file.") +@click.option("--no-cache", is_flag=True, default=False, help="Skip verification cache.") @click.pass_context def verify( ctx: click.Context, @@ -124,6 +125,7 @@ def verify( max_iterations: int, provider: str, output: str | None, + no_cache: bool, ) -> None: """Verify a natural-language specification. @@ -171,6 +173,7 @@ def verify( backend=backend, provider=llm, max_iterations=max_iterations, + use_cache=not no_cache, ) ) diff --git a/src/vericode/verifier.py b/src/vericode/verifier.py index 715f183..60ebe22 100644 --- a/src/vericode/verifier.py +++ b/src/vericode/verifier.py @@ -14,6 +14,7 @@ from vericode.artifacts import bound_proof_source, canonical_spec, sha256_hex from vericode.backends import VerificationBackend, get_backend +from vericode.cache import CacheEntry, VerificationCache, cache_key from vericode.exceptions import RefinementExhaustedError from vericode.generator import DualGenerator from vericode.models.base import LLMProvider @@ -172,6 +173,8 @@ async def verify( temperature: float = 0.2, max_tokens: int = 4096, existing_code: str | None = None, + use_cache: bool = True, + cache: VerificationCache | None = None, ) -> VerificationOutput: """Run the full vericode pipeline. @@ -196,6 +199,10 @@ async def verify( max_tokens: Max tokens per LLM call. existing_code: If provided, generate a proof for this code instead of generating new code. + use_cache: When ``True`` (default), look up and store results in + the verification cache. + cache: An explicit ``VerificationCache`` instance. When ``None`` + the default file-backed cache is used. Returns: A ``VerificationOutput`` with verified code and a proof certificate @@ -222,6 +229,26 @@ async def verify( provider = AnthropicProvider() + # --- Check cache --- + provider_name = provider.provider_name + vcache = cache or VerificationCache() + if use_cache: + key = cache_key(spec, backend_obj.name, provider_name) + hit = vcache.get(key) + if hit is not None: + logger.info("Returning cached verification result") + cert_data = json.loads(hit.certificate_json) + certificate = ProofCertificate(**cert_data) + return VerificationOutput( + code=hit.code, + proof=hit.proof, + verified=True, + iterations=0, + certificate=certificate, + backend=hit.backend, + language=hit.language, + ) + # --- Build pipeline --- generator = DualGenerator(provider, temperature=temperature, max_tokens=max_tokens) engine = ProofEngine(generator, backend_obj, max_iterations=max_iterations) @@ -257,7 +284,7 @@ async def verify( certificate = _build_certificate(spec, result.code, result.proof, backend_obj.name) - return VerificationOutput( + output = VerificationOutput( code=result.code, proof=result.proof, verified=result.success, @@ -266,3 +293,18 @@ async def verify( backend=backend_obj.name, language=language, ) + + # --- Persist to cache on success --- + if use_cache and output.verified and output.certificate is not None: + key = cache_key(spec, backend_obj.name, provider_name) + entry = CacheEntry( + cache_key=key, + code=output.code, + proof=output.proof, + backend=output.backend, + language=output.language, + certificate_json=output.certificate.to_json(), + ) + vcache.put(entry) + + return output diff --git a/tests/conftest.py b/tests/conftest.py index aadfd66..894973c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,12 +2,25 @@ from __future__ import annotations +import tempfile +from pathlib import Path + import pytest from vericode.backends.base import VerificationBackend, VerificationResult from vericode.models.base import GenerationResponse, LLMProvider from vericode.spec import Spec + +@pytest.fixture(autouse=True) +def _isolate_cache(monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect the verification cache to a temp dir for every test.""" + import vericode.cache as _cache_mod + + monkeypatch.setattr( + _cache_mod, "_DEFAULT_CACHE_DIR", Path(tempfile.mkdtemp()) / "vericode" + ) + # --------------------------------------------------------------------------- # Fake / stub implementations for testing without real LLMs or proof tools # --------------------------------------------------------------------------- diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..03e4d8e --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,201 @@ +"""Tests for the verification cache module.""" + +from __future__ import annotations + +from pathlib import Path + +from vericode.cache import CacheEntry, VerificationCache, cache_key +from vericode.spec import Spec + +# --------------------------------------------------------------------------- +# cache_key tests +# --------------------------------------------------------------------------- + + +class TestCacheKey: + """Tests for the ``cache_key`` function.""" + + def test_deterministic(self) -> None: + """Same inputs always produce the same key.""" + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key(spec, "lean4", "anthropic") + k2 = cache_key(spec, "lean4", "anthropic") + assert k1 == k2 + + def test_different_backend_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key(spec, "lean4", "anthropic") + k2 = cache_key(spec, "dafny", "anthropic") + assert k1 != k2 + + def test_different_provider_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key(spec, "lean4", "anthropic") + k2 = cache_key(spec, "lean4", "openai") + assert k1 != k2 + + def test_different_spec_different_key(self) -> None: + spec_a = Spec(description="Sort a list", function_name="sort") + spec_b = Spec(description="Search a list", function_name="search") + k1 = cache_key(spec_a, "lean4", "anthropic") + k2 = cache_key(spec_b, "lean4", "anthropic") + assert k1 != k2 + + def test_key_is_64_hex_chars(self) -> None: + spec = Spec(description="Sort a list") + key = cache_key(spec, "lean4", "anthropic") + assert len(key) == 64 + assert all(c in "0123456789abcdef" for c in key) + + def test_case_insensitive_backend_and_provider(self) -> None: + spec = Spec(description="Sort a list") + k1 = cache_key(spec, "LEAN4", "Anthropic") + k2 = cache_key(spec, "lean4", "anthropic") + assert k1 == k2 + + +# --------------------------------------------------------------------------- +# VerificationCache tests +# --------------------------------------------------------------------------- + + +class TestVerificationCache: + """Tests for ``VerificationCache`` get/put/clear.""" + + def test_miss_returns_none(self, tmp_path: Path) -> None: + vc = VerificationCache(cache_dir=tmp_path / "cache") + assert vc.get("nonexistent") is None + + def test_round_trip(self, tmp_path: Path) -> None: + vc = VerificationCache(cache_dir=tmp_path / "cache") + entry = CacheEntry( + cache_key="abc123", + code="def sort(lst): return sorted(lst)", + proof="theorem sort_correct := by sorry", + backend="lean4", + language="python", + certificate_json='{"verified": true}', + ) + vc.put(entry) + result = vc.get("abc123") + + assert result is not None + assert result.code == entry.code + assert result.proof == entry.proof + assert result.backend == "lean4" + assert result.language == "python" + + def test_clear(self, tmp_path: Path) -> None: + vc = VerificationCache(cache_dir=tmp_path / "cache") + for i in range(3): + entry = CacheEntry( + cache_key=f"key{i}", + code=f"code{i}", + proof=f"proof{i}", + backend="lean4", + language="python", + certificate_json="{}", + ) + vc.put(entry) + + removed = vc.clear() + assert removed == 3 + assert vc.get("key0") is None + + def test_clear_empty_cache(self, tmp_path: Path) -> None: + vc = VerificationCache(cache_dir=tmp_path / "cache") + assert vc.clear() == 0 + + def test_corrupt_entry_returns_none(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir(parents=True) + (cache_dir / "bad.json").write_text("not valid json{{{") + + vc = VerificationCache(cache_dir=cache_dir) + assert vc.get("bad") is None + + def test_cache_dir_created_on_put(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "nested" / "cache" / "dir" + vc = VerificationCache(cache_dir=cache_dir) + entry = CacheEntry( + cache_key="test", + code="code", + proof="proof", + backend="lean4", + language="python", + certificate_json="{}", + ) + vc.put(entry) + assert cache_dir.exists() + assert (cache_dir / "test.json").exists() + + +# --------------------------------------------------------------------------- +# Integration with verify pipeline +# --------------------------------------------------------------------------- + + +class TestCacheIntegration: + """Test that verify() uses the cache end-to-end.""" + + async def test_cached_result_skips_pipeline(self, tmp_path: Path) -> None: + """A cached result should be returned without calling the LLM.""" + from tests.conftest import FakeBackend, FakeLLMProvider + from vericode.cache import VerificationCache + from vericode.verifier import verify + + spec = Spec(description="Sort a list", function_name="sort") + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + + cache_dir = tmp_path / "cache" + vc = VerificationCache(cache_dir=cache_dir) + + # First call: populates cache + result1 = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + ) + assert result1.verified is True + assert provider.call_count == 1 + + # Second call: should use cache, no extra LLM call + result2 = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + ) + assert result2.verified is True + assert result2.iterations == 0 # cache hit indicator + assert provider.call_count == 1 # no new LLM call + + async def test_no_cache_flag_skips_cache(self, tmp_path: Path) -> None: + """When use_cache=False, the cache should not be consulted.""" + from tests.conftest import FakeBackend, FakeLLMProvider + from vericode.cache import VerificationCache + from vericode.verifier import verify + + spec = Spec(description="Sort a list", function_name="sort") + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + + cache_dir = tmp_path / "cache" + vc = VerificationCache(cache_dir=cache_dir) + + # First call with cache + await verify(spec, backend=backend, provider=provider, cache=vc) + assert provider.call_count == 1 + + # Second call with use_cache=False + result = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + use_cache=False, + ) + assert result.verified is True + assert provider.call_count == 2 # LLM was called again diff --git a/tests/test_data_structures.py b/tests/test_data_structures.py index 87e7e5b..cc77f5b 100644 --- a/tests/test_data_structures.py +++ b/tests/test_data_structures.py @@ -51,6 +51,7 @@ async def test_merge_sorted_lists(self) -> None: language="python", backend=backend, provider=provider, + ) assert result.verified is True @@ -85,6 +86,7 @@ async def test_insert_into_bst(self) -> None: ), backend=backend, provider=provider, + ) assert result.verified is True @@ -99,6 +101,7 @@ async def test_data_structure_with_refinement(self) -> None: backend=backend, provider=provider, max_iterations=5, + ) assert result.verified is True @@ -118,6 +121,7 @@ async def test_reverse_list(self) -> None: spec, backend=backend, provider=provider, + ) assert result.verified is True diff --git a/tests/test_verifier.py b/tests/test_verifier.py index c9bea5b..24faf44 100644 --- a/tests/test_verifier.py +++ b/tests/test_verifier.py @@ -100,6 +100,7 @@ async def test_successful_verification(self, sort_spec: Spec) -> None: backend=backend, provider=provider, max_iterations=3, + ) assert result.verified is True @@ -120,6 +121,7 @@ async def test_failed_verification_returns_partial(self, sort_spec: Spec) -> Non backend=backend, provider=provider, max_iterations=2, + ) assert result.verified is False @@ -136,6 +138,7 @@ async def test_verify_with_string_spec(self) -> None: language="python", backend=backend, provider=provider, + ) assert result.verified is True @@ -152,6 +155,7 @@ async def test_verify_with_refinement(self, sort_spec: Spec) -> None: backend=backend, provider=provider, max_iterations=5, + ) assert result.verified is True @@ -165,6 +169,7 @@ async def test_certificate_hashes_are_sha256(self, sort_spec: Spec) -> None: sort_spec, backend=backend, provider=provider, + ) assert result.certificate is not None @@ -181,6 +186,7 @@ async def test_certificate_timestamp_is_iso(self, sort_spec: Spec) -> None: sort_spec, backend=backend, provider=provider, + ) assert result.certificate is not None @@ -228,6 +234,7 @@ async def test_verify_with_backend_name_string(self) -> None: backend="lean4", provider=provider, max_iterations=1, + ) # We expect failure since lean4 is not installed, but no crash @@ -246,6 +253,7 @@ async def test_verify_with_existing_code_keeps_source_fixed( provider=provider, existing_code="def original(lst): return lst", max_iterations=3, + ) assert result.verified is True From 7e913c9c9d667bb6e0db402e8f9d793c751c0f5d Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 20:31:05 -0400 Subject: [PATCH 03/10] refactor: unify backend error handling with custom exceptions All three backends (Lean4, Dafny, Verus) now raise ProofCompilationError with structured fields (backend_name, source_file, error_lines, raw_output) instead of returning VerificationResult with success=False. The ProofEngine catches these exceptions and converts them back to VerificationResult for the refinement loop. Legacy keyword arguments are preserved for backward compatibility. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/vericode/backends/dafny.py | 40 +++++++++++------- src/vericode/backends/lean4.py | 40 +++++++++++------- src/vericode/backends/verus.py | 40 +++++++++++------- src/vericode/exceptions.py | 26 ++++++++++-- src/vericode/proof_engine.py | 18 +++++++-- tests/test_backends.py | 40 +++++++++++------- tests/test_backends_subprocess.py | 67 ++++++++++++++++--------------- tests/test_exceptions.py | 28 +++++++++++++ 8 files changed, 203 insertions(+), 96 deletions(-) diff --git a/src/vericode/backends/dafny.py b/src/vericode/backends/dafny.py index e0d37cb..77f004f 100644 --- a/src/vericode/backends/dafny.py +++ b/src/vericode/backends/dafny.py @@ -13,6 +13,7 @@ from pathlib import Path from vericode.backends.base import VerificationBackend, VerificationResult +from vericode.exceptions import ProofCompilationError logger = logging.getLogger(__name__) @@ -72,21 +73,23 @@ async def verify(self, proof_source: str) -> VerificationResult: proc.communicate(), timeout=self.timeout ) except FileNotFoundError: - return VerificationResult( - success=False, - compiler_output="", - errors=["dafny binary not found on PATH"], - backend=self.name, - ) + raise ProofCompilationError( + "dafny binary not found on PATH", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=["dafny binary not found on PATH"], + raw_output="", + ) from None except TimeoutError: proc.kill() await proc.wait() - return VerificationResult( - success=False, - compiler_output="", - errors=[f"dafny verification timed out after {self.timeout}s"], - backend=self.name, - ) + raise ProofCompilationError( + f"dafny verification timed out after {self.timeout}s", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=[f"dafny verification timed out after {self.timeout}s"], + raw_output="", + ) from None finally: tmp_path.unlink(missing_ok=True) @@ -94,10 +97,19 @@ async def verify(self, proof_source: str) -> VerificationResult: output = (stdout_bytes.decode() + "\n" + stderr_bytes.decode()).strip() errors = _parse_dafny_errors(output) + if proc.returncode != 0 or len(errors) > 0: + raise ProofCompilationError( + f"Dafny proof compilation failed with {len(errors)} error(s)", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=errors, + raw_output=output, + ) + return VerificationResult( - success=proc.returncode == 0 and len(errors) == 0, + success=True, compiler_output=output, - errors=errors, + errors=[], elapsed_seconds=elapsed, backend=self.name, ) diff --git a/src/vericode/backends/lean4.py b/src/vericode/backends/lean4.py index 6872a05..d03406f 100644 --- a/src/vericode/backends/lean4.py +++ b/src/vericode/backends/lean4.py @@ -13,6 +13,7 @@ from pathlib import Path from vericode.backends.base import VerificationBackend, VerificationResult +from vericode.exceptions import ProofCompilationError logger = logging.getLogger(__name__) @@ -71,21 +72,23 @@ async def verify(self, proof_source: str) -> VerificationResult: proc.communicate(), timeout=self.timeout ) except FileNotFoundError: - return VerificationResult( - success=False, - compiler_output="", - errors=["lean binary not found on PATH"], - backend=self.name, - ) + raise ProofCompilationError( + "lean binary not found on PATH", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=["lean binary not found on PATH"], + raw_output="", + ) from None except TimeoutError: proc.kill() await proc.wait() - return VerificationResult( - success=False, - compiler_output="", - errors=[f"lean verification timed out after {self.timeout}s"], - backend=self.name, - ) + raise ProofCompilationError( + f"lean verification timed out after {self.timeout}s", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=[f"lean verification timed out after {self.timeout}s"], + raw_output="", + ) from None finally: tmp_path.unlink(missing_ok=True) @@ -93,10 +96,19 @@ async def verify(self, proof_source: str) -> VerificationResult: output = (stdout_bytes.decode() + "\n" + stderr_bytes.decode()).strip() errors = _parse_lean_errors(output) + if proc.returncode != 0 or len(errors) > 0: + raise ProofCompilationError( + f"Lean 4 proof compilation failed with {len(errors)} error(s)", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=errors, + raw_output=output, + ) + return VerificationResult( - success=proc.returncode == 0 and len(errors) == 0, + success=True, compiler_output=output, - errors=errors, + errors=[], elapsed_seconds=elapsed, backend=self.name, ) diff --git a/src/vericode/backends/verus.py b/src/vericode/backends/verus.py index f1cdc9f..a2c41f0 100644 --- a/src/vericode/backends/verus.py +++ b/src/vericode/backends/verus.py @@ -13,6 +13,7 @@ from pathlib import Path from vericode.backends.base import VerificationBackend, VerificationResult +from vericode.exceptions import ProofCompilationError logger = logging.getLogger(__name__) @@ -71,21 +72,23 @@ async def verify(self, proof_source: str) -> VerificationResult: proc.communicate(), timeout=self.timeout ) except FileNotFoundError: - return VerificationResult( - success=False, - compiler_output="", - errors=["verus binary not found on PATH"], - backend=self.name, - ) + raise ProofCompilationError( + "verus binary not found on PATH", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=["verus binary not found on PATH"], + raw_output="", + ) from None except TimeoutError: proc.kill() await proc.wait() - return VerificationResult( - success=False, - compiler_output="", - errors=[f"verus verification timed out after {self.timeout}s"], - backend=self.name, - ) + raise ProofCompilationError( + f"verus verification timed out after {self.timeout}s", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=[f"verus verification timed out after {self.timeout}s"], + raw_output="", + ) from None finally: tmp_path.unlink(missing_ok=True) @@ -93,10 +96,19 @@ async def verify(self, proof_source: str) -> VerificationResult: output = (stdout_bytes.decode() + "\n" + stderr_bytes.decode()).strip() errors = _parse_verus_errors(output) + if proc.returncode != 0 or len(errors) > 0: + raise ProofCompilationError( + f"Verus proof compilation failed with {len(errors)} error(s)", + backend_name=self.name, + source_file=str(tmp_path), + error_lines=errors, + raw_output=output, + ) + return VerificationResult( - success=proc.returncode == 0 and len(errors) == 0, + success=True, compiler_output=output, - errors=errors, + errors=[], elapsed_seconds=elapsed, backend=self.name, ) diff --git a/src/vericode/exceptions.py b/src/vericode/exceptions.py index 221d8c3..90ebf9e 100644 --- a/src/vericode/exceptions.py +++ b/src/vericode/exceptions.py @@ -36,18 +36,38 @@ def __init__( class ProofCompilationError(VericodeError): - """Raised when a proof fails to compile in the backend proof assistant.""" + """Raised when a proof fails to compile in the backend proof assistant. + + Carries structured diagnostic information so callers can + programmatically inspect the failure without parsing strings. + + Attributes: + backend_name: Canonical name of the backend that failed (e.g. ``"lean4"``). + source_file: Path to the temporary proof file that was compiled. + error_lines: Parsed list of individual error messages. + raw_output: Complete stdout + stderr from the compiler. + """ def __init__( self, message: str, *, + backend_name: str = "", + source_file: str = "", + error_lines: list[str] | None = None, + raw_output: str = "", + # Keep old keyword for backward compat in call sites backend: str | None = None, compiler_output: str | None = None, details: str | None = None, ) -> None: - self.backend = backend - self.compiler_output = compiler_output + self.backend_name = backend_name or backend or "" + self.source_file = source_file + self.error_lines = error_lines or [] + self.raw_output = raw_output or compiler_output or "" + # Preserve legacy attributes + self.backend = self.backend_name + self.compiler_output = self.raw_output super().__init__(message, details=details) diff --git a/src/vericode/proof_engine.py b/src/vericode/proof_engine.py index 5fe4a69..32732b5 100644 --- a/src/vericode/proof_engine.py +++ b/src/vericode/proof_engine.py @@ -13,7 +13,7 @@ from vericode.artifacts import bound_proof_source from vericode.backends.base import VerificationBackend, VerificationResult -from vericode.exceptions import RefinementExhaustedError +from vericode.exceptions import ProofCompilationError, RefinementExhaustedError from vericode.generator import DualGenerationResult, DualGenerator from vericode.spec import Spec @@ -152,9 +152,19 @@ async def run( # --- Verify --- verification_source = bound_proof_source(spec, code, proof, backend_name) - vresult: VerificationResult = await self._backend.verify( - verification_source - ) + try: + vresult: VerificationResult = await self._backend.verify( + verification_source + ) + except ProofCompilationError as exc: + # Convert structured exception to a VerificationResult so the + # refinement loop can feed errors back to the LLM. + vresult = VerificationResult( + success=False, + compiler_output=exc.raw_output, + errors=exc.error_lines or [str(exc)], + backend=exc.backend_name or backend_name, + ) attempt = RefinementAttempt( iteration=iteration, diff --git a/tests/test_backends.py b/tests/test_backends.py index ef9f829..07dc461 100644 --- a/tests/test_backends.py +++ b/tests/test_backends.py @@ -11,6 +11,7 @@ VerusBackend, get_backend, ) +from vericode.exceptions import ProofCompilationError # --------------------------------------------------------------------------- # get_backend registry tests @@ -112,14 +113,15 @@ def test_lean4_template_contains_section(self) -> None: class TestLean4Verify: """Tests for the Lean4Backend verify method.""" - async def test_lean_not_installed(self) -> None: - """If lean is not on PATH, verify returns an error result.""" + async def test_lean_verify_returns_result_or_raises(self) -> None: + """Verify either returns a success result or raises ProofCompilationError.""" backend = Lean4Backend() - # This will fail because lean is likely not installed in CI - result = await backend.verify("-- invalid lean source") - # Should not raise, should return a result - assert isinstance(result, VerificationResult) - assert result.backend == "lean4" + try: + result = await backend.verify("-- just a comment") + assert isinstance(result, VerificationResult) + assert result.backend == "lean4" + except ProofCompilationError as exc: + assert exc.backend_name == "lean4" async def test_check_installed_returns_bool(self) -> None: backend = Lean4Backend() @@ -130,11 +132,15 @@ async def test_check_installed_returns_bool(self) -> None: class TestDafnyVerify: """Tests for the DafnyBackend verify method.""" - async def test_dafny_not_installed(self) -> None: + async def test_dafny_verify_returns_result_or_raises(self) -> None: + """Verify either returns a success result or raises ProofCompilationError.""" backend = DafnyBackend() - result = await backend.verify("// invalid dafny source") - assert isinstance(result, VerificationResult) - assert result.backend == "dafny" + try: + result = await backend.verify("// valid dafny source") + assert isinstance(result, VerificationResult) + assert result.backend == "dafny" + except ProofCompilationError as exc: + assert exc.backend_name == "dafny" async def test_check_installed_returns_bool(self) -> None: backend = DafnyBackend() @@ -145,11 +151,15 @@ async def test_check_installed_returns_bool(self) -> None: class TestVerusVerify: """Tests for the VerusBackend verify method.""" - async def test_verus_not_installed(self) -> None: + async def test_verus_verify_returns_result_or_raises(self) -> None: + """Verify either returns a success result or raises ProofCompilationError.""" backend = VerusBackend() - result = await backend.verify("// invalid verus source") - assert isinstance(result, VerificationResult) - assert result.backend == "verus" + try: + result = await backend.verify("// valid verus source") + assert isinstance(result, VerificationResult) + assert result.backend == "verus" + except ProofCompilationError as exc: + assert exc.backend_name == "verus" async def test_check_installed_returns_bool(self) -> None: backend = VerusBackend() diff --git a/tests/test_backends_subprocess.py b/tests/test_backends_subprocess.py index c85eabf..a4b892c 100644 --- a/tests/test_backends_subprocess.py +++ b/tests/test_backends_subprocess.py @@ -4,8 +4,8 @@ mock ``asyncio.create_subprocess_exec`` to exercise the full verify() path including: - Successful compilation -- Compilation with errors -- Binary not found (FileNotFoundError) +- Compilation with errors (raises ProofCompilationError) +- Binary not found (raises ProofCompilationError) - Error parsing logic """ @@ -18,6 +18,7 @@ from vericode.backends.dafny import DafnyBackend, _parse_dafny_errors from vericode.backends.lean4 import Lean4Backend, _parse_lean_errors from vericode.backends.verus import VerusBackend, _parse_verus_errors +from vericode.exceptions import ProofCompilationError def _mock_process( @@ -63,21 +64,21 @@ async def test_failed_verification(self, mock_exec: AsyncMock) -> None: stderr=b"error: unsolved goals\nerror: type mismatch", ) backend = Lean4Backend() - result = await backend.verify("theorem bad := by sorry") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("theorem bad := by sorry") - assert result.success is False - assert len(result.errors) == 2 - assert "unsolved goals" in result.errors[0] - assert result.backend == "lean4" + assert exc_info.value.backend_name == "lean4" + assert len(exc_info.value.error_lines) == 2 + assert "unsolved goals" in exc_info.value.error_lines[0] @patch("vericode.backends.lean4.asyncio.create_subprocess_exec") async def test_binary_not_found(self, mock_exec: AsyncMock) -> None: mock_exec.side_effect = FileNotFoundError("lean not found") backend = Lean4Backend() - result = await backend.verify("anything") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("anything") - assert result.success is False - assert "not found" in result.errors[0] + assert "not found" in exc_info.value.error_lines[0] @patch("vericode.backends.lean4.asyncio.create_subprocess_exec") async def test_check_installed_success(self, mock_exec: AsyncMock) -> None: @@ -113,14 +114,14 @@ async def test_mixed_stdout_stderr(self, mock_exec: AsyncMock) -> None: async def test_returncode_zero_but_errors_in_output( self, mock_exec: AsyncMock ) -> None: - """Edge case: returncode=0 but error lines in output -> failure.""" + """Edge case: returncode=0 but error lines in output -> raises.""" mock_exec.return_value = _mock_process( returncode=0, stderr=b"error: something unexpected", ) backend = Lean4Backend() - result = await backend.verify("theorem t := by trivial") - assert result.success is False + with pytest.raises(ProofCompilationError): + await backend.verify("theorem t := by trivial") # --------------------------------------------------------------------------- @@ -153,19 +154,20 @@ async def test_failed_verification(self, mock_exec: AsyncMock) -> None: stderr=b"", ) backend = DafnyBackend() - result = await backend.verify("method Bad() ensures false {}") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("method Bad() ensures false {}") - assert result.success is False - assert len(result.errors) == 2 + assert exc_info.value.backend_name == "dafny" + assert len(exc_info.value.error_lines) == 2 @patch("vericode.backends.dafny.asyncio.create_subprocess_exec") async def test_binary_not_found(self, mock_exec: AsyncMock) -> None: mock_exec.side_effect = FileNotFoundError("dafny not found") backend = DafnyBackend() - result = await backend.verify("anything") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("anything") - assert result.success is False - assert "not found" in result.errors[0] + assert "not found" in exc_info.value.error_lines[0] @patch("vericode.backends.dafny.asyncio.create_subprocess_exec") async def test_check_installed_success(self, mock_exec: AsyncMock) -> None: @@ -201,8 +203,8 @@ async def test_returncode_zero_but_errors_in_output( stdout=b"Error: something went wrong", ) backend = DafnyBackend() - result = await backend.verify("method M() {}") - assert result.success is False + with pytest.raises(ProofCompilationError): + await backend.verify("method M() {}") # --------------------------------------------------------------------------- @@ -234,19 +236,20 @@ async def test_failed_verification(self, mock_exec: AsyncMock) -> None: stderr=b"error[E0308]: mismatched types\nerror: aborting due to error", ) backend = VerusBackend() - result = await backend.verify("verus! { fn bad() {} }") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("verus! { fn bad() {} }") - assert result.success is False - assert len(result.errors) == 2 + assert exc_info.value.backend_name == "verus" + assert len(exc_info.value.error_lines) == 2 @patch("vericode.backends.verus.asyncio.create_subprocess_exec") async def test_binary_not_found(self, mock_exec: AsyncMock) -> None: mock_exec.side_effect = FileNotFoundError("verus not found") backend = VerusBackend() - result = await backend.verify("anything") + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("anything") - assert result.success is False - assert "not found" in result.errors[0] + assert "not found" in exc_info.value.error_lines[0] @patch("vericode.backends.verus.asyncio.create_subprocess_exec") async def test_check_installed_success(self, mock_exec: AsyncMock) -> None: @@ -275,8 +278,8 @@ async def test_returncode_zero_but_errors(self, mock_exec: AsyncMock) -> None: stderr=b"error: verification failed", ) backend = VerusBackend() - result = await backend.verify("verus! { fn f() {} }") - assert result.success is False + with pytest.raises(ProofCompilationError): + await backend.verify("verus! { fn f() {} }") # --------------------------------------------------------------------------- @@ -393,9 +396,9 @@ async def test_verify_binary_not_found( with patch(f"{module_path}.asyncio.create_subprocess_exec") as mock_exec: mock_exec.side_effect = FileNotFoundError backend = backend_cls() - result = await backend.verify("proof") - assert result.success is False - assert len(result.errors) > 0 + with pytest.raises(ProofCompilationError) as exc_info: + await backend.verify("proof") + assert len(exc_info.value.error_lines) > 0 @pytest.mark.parametrize( "backend_cls", diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py index 9f6538d..bdc6b81 100644 --- a/tests/test_exceptions.py +++ b/tests/test_exceptions.py @@ -59,6 +59,34 @@ def test_compiler_output_attribute(self) -> None: err = ProofCompilationError("fail", compiler_output="error: unsolved goals") assert err.compiler_output == "error: unsolved goals" + def test_structured_fields(self) -> None: + err = ProofCompilationError( + "compilation failed", + backend_name="dafny", + source_file="/tmp/proof.dfy", + error_lines=["Error: postcondition", "Error: assertion"], + raw_output="full compiler output here", + ) + assert err.backend_name == "dafny" + assert err.source_file == "/tmp/proof.dfy" + assert len(err.error_lines) == 2 + assert err.raw_output == "full compiler output here" + # Legacy aliases + assert err.backend == "dafny" + assert err.compiler_output == "full compiler output here" + + def test_backend_name_fallback_to_backend(self) -> None: + """If only legacy 'backend' kwarg is passed, backend_name mirrors it.""" + err = ProofCompilationError("fail", backend="verus") + assert err.backend_name == "verus" + + def test_defaults_for_structured_fields(self) -> None: + err = ProofCompilationError("fail") + assert err.backend_name == "" + assert err.source_file == "" + assert err.error_lines == [] + assert err.raw_output == "" + class TestBackendNotFoundError: def test_message_includes_backend(self) -> None: From 6b26dc5356a0087a29aa00022d45d4423293c386 Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 20:33:01 -0400 Subject: [PATCH 04/10] feat: add batch progress reporting Add a --progress flag to the batch CLI command that shows a rich progress bar with percentage and elapsed time. Add ProgressCallback support to verify() so callers can receive stage notifications (setup, generating, verified) during pipeline execution. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/vericode/cli.py | 68 +++++++++++++----- src/vericode/verifier.py | 19 +++++ tests/test_progress.py | 152 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 223 insertions(+), 16 deletions(-) create mode 100644 tests/test_progress.py diff --git a/src/vericode/cli.py b/src/vericode/cli.py index 7ea01ab..5fd7455 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -288,6 +288,12 @@ def prove( help="Target implementation language. Defaults to the backend's native language.", ) @click.option("--provider", default="anthropic", show_default=True) +@click.option( + "--progress", + is_flag=True, + default=False, + help="Show a rich progress bar during batch verification.", +) @click.pass_context def batch( ctx: click.Context, @@ -296,6 +302,7 @@ def batch( backend: str, lang: str | None, provider: str, + progress: bool, ) -> None: """Batch-verify multiple spec files. @@ -315,29 +322,58 @@ def batch( console.print(f"Found {len(yaml_files)} spec file(s)") + from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn + from vericode.models import get_provider as get_llm_provider from vericode.verifier import verify as run_verify llm = get_llm_provider(provider) language = lang or _BATCH_LANGUAGE_BY_BACKEND.get(backend.lower(), "python") - for yaml_file in yaml_files: - spec = load_spec_from_yaml(str(yaml_file)) - console.print(f"\n[bold]Processing:[/bold] {yaml_file.name}") - - result: VerificationOutput = _run_async( - run_verify(spec, language=language, backend=backend, provider=llm) - ) + def _run_batch( + progress_bar: Progress | None, + task_id: object | None, + ) -> None: + for yaml_file in yaml_files: + spec = load_spec_from_yaml(str(yaml_file)) + if not progress: + console.print(f"\n[bold]Processing:[/bold] {yaml_file.name}") + + result: VerificationOutput = _run_async( + run_verify(spec, language=language, backend=backend, provider=llm) + ) - stem = yaml_file.stem - if result.verified and result.certificate: - extension = _LANGUAGE_EXTENSIONS.get(result.language or language, ".txt") - (output_dir / f"{stem}{extension}").write_text(result.code) - (output_dir / f"{stem}.proof").write_text(result.proof) - (output_dir / f"{stem}.cert.json").write_text(result.certificate.to_json()) - console.print(" [green]Verified[/green]") - else: - console.print(" [red]Failed[/red]") + stem = yaml_file.stem + if result.verified and result.certificate: + extension = _LANGUAGE_EXTENSIONS.get( + result.language or language, ".txt" + ) + (output_dir / f"{stem}{extension}").write_text(result.code) + (output_dir / f"{stem}.proof").write_text(result.proof) + (output_dir / f"{stem}.cert.json").write_text( + result.certificate.to_json() + ) + if not progress: + console.print(" [green]Verified[/green]") + else: + if not progress: + console.print(" [red]Failed[/red]") + + if progress_bar is not None and task_id is not None: + progress_bar.update(task_id, advance=1) # type: ignore[arg-type] + + if progress: + with Progress( + TextColumn("[bold blue]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeElapsedColumn(), + console=console, + ) as progress_bar: + task = progress_bar.add_task("Verifying", total=len(yaml_files)) + _run_batch(progress_bar, task) + else: + _run_batch(None, None) # --------------------------------------------------------------------------- diff --git a/src/vericode/verifier.py b/src/vericode/verifier.py index 60ebe22..5804c46 100644 --- a/src/vericode/verifier.py +++ b/src/vericode/verifier.py @@ -9,6 +9,7 @@ import json import logging +from collections.abc import Callable from dataclasses import dataclass, field from datetime import UTC, datetime @@ -23,6 +24,9 @@ logger = logging.getLogger(__name__) +# A progress callback receives (stage_name, current_step, total_steps). +ProgressCallback = Callable[[str, int, int], None] + @dataclass class ProofCertificate: @@ -163,6 +167,17 @@ def _build_bound_artifact( return _sha256(_spec_canonical(spec)), _sha256(code), proof_source +def _notify( + callback: ProgressCallback | None, + stage: str, + current: int, + total: int, +) -> None: + """Fire the progress callback if one is registered.""" + if callback is not None: + callback(stage, current, total) + + async def verify( spec_input: str | Spec, *, @@ -175,6 +190,7 @@ async def verify( existing_code: str | None = None, use_cache: bool = True, cache: VerificationCache | None = None, + progress_callback: ProgressCallback | None = None, ) -> VerificationOutput: """Run the full vericode pipeline. @@ -250,10 +266,12 @@ async def verify( ) # --- Build pipeline --- + _notify(progress_callback, "setup", 1, 3) generator = DualGenerator(provider, temperature=temperature, max_tokens=max_tokens) engine = ProofEngine(generator, backend_obj, max_iterations=max_iterations) # --- Execute --- + _notify(progress_callback, "generating", 2, 3) logger.info( "Starting verification pipeline", extra={ @@ -282,6 +300,7 @@ async def verify( ], ) + _notify(progress_callback, "verified", 3, 3) certificate = _build_certificate(spec, result.code, result.proof, backend_obj.name) output = VerificationOutput( diff --git a/tests/test_progress.py b/tests/test_progress.py new file mode 100644 index 0000000..9c21f30 --- /dev/null +++ b/tests/test_progress.py @@ -0,0 +1,152 @@ +"""Tests for batch progress reporting and verify() progress callback.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import AsyncMock, patch + +import yaml +from click.testing import CliRunner + +from tests.conftest import FakeBackend, FakeLLMProvider +from vericode.cli import main +from vericode.spec import Spec +from vericode.verifier import ProofCertificate, VerificationOutput, verify + + +def _mock_verified_output() -> VerificationOutput: + return VerificationOutput( + code="def sort(lst): return sorted(lst)", + proof="theorem sort_correct := trivial", + verified=True, + iterations=1, + certificate=ProofCertificate( + spec_hash="a" * 64, + code_hash="b" * 64, + proof_hash="c" * 64, + backend="lean4", + timestamp="2026-01-01T00:00:00+00:00", + ), + backend="lean4", + language="lean", + ) + + +# --------------------------------------------------------------------------- +# verify() progress_callback tests +# --------------------------------------------------------------------------- + + +class TestVerifyProgressCallback: + """Tests for the progress_callback parameter on verify().""" + + async def test_callback_is_called(self) -> None: + """The progress callback should be invoked at each pipeline stage.""" + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + stages: list[tuple[str, int, int]] = [] + + def _on_progress(stage: str, current: int, total: int) -> None: + stages.append((stage, current, total)) + + await verify( + Spec(description="Sort a list"), + backend=backend, + provider=provider, + progress_callback=_on_progress, + ) + + assert len(stages) >= 2 + stage_names = [s[0] for s in stages] + assert "setup" in stage_names + assert "generating" in stage_names + + async def test_callback_none_is_safe(self) -> None: + """Passing progress_callback=None should not crash.""" + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + + result = await verify( + "Sort a list", + backend=backend, + provider=provider, + progress_callback=None, + ) + assert result.verified is True + + +# --------------------------------------------------------------------------- +# batch --progress CLI tests +# --------------------------------------------------------------------------- + + +class TestBatchProgress: + """Tests for the --progress flag on the batch command.""" + + def test_batch_progress_flag_exists(self) -> None: + runner = CliRunner() + result = runner.invoke(main, ["batch", "--help"]) + assert result.exit_code == 0 + assert "--progress" in result.output + + def test_batch_with_progress(self, tmp_path: Path) -> None: + specs_dir = tmp_path / "specs" + specs_dir.mkdir() + output_dir = tmp_path / "output" + + for name in ("sort", "search"): + (specs_dir / f"{name}.yaml").write_text( + yaml.dump({"description": f"{name} a list", "function_name": name}) + ) + + mock_output = _mock_verified_output() + + with ( + patch("vericode.models.get_provider", return_value=AsyncMock()), + patch( + "vericode.verifier.verify", + new_callable=AsyncMock, + return_value=mock_output, + ), + ): + runner = CliRunner() + result = runner.invoke( + main, + [ + "batch", + "--specs", + str(specs_dir), + "-o", + str(output_dir), + "--progress", + ], + ) + assert result.exit_code == 0 + + def test_batch_without_progress(self, tmp_path: Path) -> None: + """Without --progress, batch should work the same as before.""" + specs_dir = tmp_path / "specs" + specs_dir.mkdir() + output_dir = tmp_path / "output" + + (specs_dir / "sort.yaml").write_text( + yaml.dump({"description": "sort a list", "function_name": "sort"}) + ) + + mock_output = _mock_verified_output() + + with ( + patch("vericode.models.get_provider", return_value=AsyncMock()), + patch( + "vericode.verifier.verify", + new_callable=AsyncMock, + return_value=mock_output, + ), + ): + runner = CliRunner() + result = runner.invoke( + main, + ["batch", "--specs", str(specs_dir), "-o", str(output_dir)], + ) + assert result.exit_code == 0 + assert "1 spec file(s)" in result.output From 0f3a58b9bf7b2705c71207557ca710b2b3b0dd9a Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 20:34:09 -0400 Subject: [PATCH 05/10] docs: add getting started guide with sorting walkthrough Expand the getting started guide with a complete end-to-end walkthrough of verifying a sorting function: writing the YAML spec, running CLI verification, using the Python API, checking complexity scores, inspecting proof certificates, and using the verification cache. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/getting_started.md | 190 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 188 insertions(+), 2 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 2585b70..9488377 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -7,7 +7,8 @@ 3. a machine-check result 4. a certificate binding the spec, code, and proof bundle together -This guide covers the shortest path from install to a verified run. +This guide covers the shortest path from install to a verified run, then +walks through a complete example verifying a sorting function. ## Install @@ -59,6 +60,167 @@ export DEEPSEEK_API_KEY=... If you do not pass a provider explicitly in Python, the top-level `verify()` path defaults to the Anthropic provider. +## Walkthrough: Verifying a Sorting Function + +This section walks through verifying a sorting function end-to-end, from +writing the spec to inspecting the proof certificate. + +### Step 1: Write the specification + +Create a file called `sort_spec.yaml`: + +```yaml +description: > + Sort a list of integers in non-decreasing order. + The output must be a permutation of the input. + The function must handle empty lists and single-element lists. +function_name: sort +input_types: + lst: list[int] +output_type: list[int] +preconditions: [] +postconditions: + - "is_sorted(result)" + - "is_permutation(result, lst)" +edge_cases: + - "lst == []" + - "len(lst) == 1" +``` + +The specification tells `vericode` exactly what the function must satisfy. +Postconditions are the properties the formal proof will encode. + +### Step 2: Run verification from the CLI + +```bash +vericode verify --spec sort_spec.yaml --lang python --backend lean4 +``` + +`vericode` will: + +1. Parse the YAML spec into a structured `Spec` object. +2. Send the spec to the LLM, which generates both a Python implementation and + a Lean 4 proof in a single pass. +3. Compile the proof with the Lean 4 toolchain. +4. If the proof fails, feed the compiler errors back to the LLM for refinement. +5. Repeat up to `--max-iterations` times (default 5). + +On success you will see output like: + +``` + vericode + Function: sort + Language: python + Backend: lean4 + Provider: anthropic + + Verification successful! + Iterations: 2 + + Implementation + def sort(lst: list[int]) -> list[int]: + ... + + Proof + theorem sort_correct : + ... + + Proof Receipt + { + "spec_hash": "a1b2c3...", + "code_hash": "d4e5f6...", + "proof_hash": "789abc...", + "backend": "lean4", + "timestamp": "2026-03-30T12:00:00+00:00", + "verified": true + } +``` + +### Step 3: Run the same verification from Python + +```python +import asyncio +from vericode import Spec, verify + + +async def main() -> None: + spec = Spec( + description=( + "Sort a list of integers in non-decreasing order. " + "The output must be a permutation of the input." + ), + function_name="sort", + input_types={"lst": "list[int]"}, + output_type="list[int]", + postconditions=[ + "is_sorted(result)", + "is_permutation(result, lst)", + ], + edge_cases=["lst == []", "len(lst) == 1"], + ) + + result = await verify( + spec, + language="python", + backend="lean4", + ) + + print(f"Verified: {result.verified}") + print(f"Iterations: {result.iterations}") + + if result.verified: + print(f"\n--- Implementation ---\n{result.code}") + print(f"\n--- Proof ---\n{result.proof}") + print(f"\n--- Certificate ---\n{result.certificate.to_json()}") + else: + for err in result.errors: + print(f"Error: {err}") + + +asyncio.run(main()) +``` + +### Step 4: Check the complexity score + +Before running verification, you can estimate how hard the spec is to verify +using `complexity_score()`: + +```python +score = spec.complexity_score() +print(f"Complexity: {score:.2f}") +# A score above 0.5 suggests the spec may need more refinement iterations. +``` + +### Step 5: Inspect the proof certificate + +The certificate binds the spec, implementation, and proof together via SHA-256 +hashes. You can re-verify it later: + +```python +from vericode.verifier import ProofCertificate + +valid = ProofCertificate.verify_certificate( + result.certificate, + spec, + result.code, + result.proof, +) +print(f"Certificate valid: {valid}") +``` + +If anyone changes the code or the spec after verification, the certificate will +no longer validate. + +### Step 6: Verify existing code with `prove` + +If you already have an implementation and just want a proof: + +```bash +vericode prove --code sort.py --spec "output is sorted and is a permutation of input" +``` + +This tells the LLM to keep the implementation fixed and only generate a proof. + ## Fastest CLI Path Verify a natural-language prompt: @@ -85,6 +247,12 @@ Batch a directory of YAML specs: vericode batch --specs specs/ --output verified/ ``` +Batch with a progress bar: + +```bash +vericode batch --specs specs/ --output verified/ --progress +``` + `batch` defaults the implementation language from the backend unless `--lang` is supplied: @@ -168,13 +336,31 @@ postconditions: - `code`: generated or preserved implementation - `proof`: backend-specific proof text - `verified`: final machine-check status -- `iterations`: refinement rounds taken by the proof engine +- `iterations`: refinement rounds taken by the proof engine (0 if cached) - `backend`: backend name used for the run - `certificate`: `ProofCertificate` binding the spec, code, and proof bundle The certificate is designed to be machine-checkable later; it stores hashes of the canonicalized spec, implementation, and bound proof source. +## Verification Cache + +Successful verification results are cached by default, keyed by a SHA-256 hash +of the spec, backend, and provider. Subsequent runs with identical inputs skip +the LLM and proof-assistant pipeline entirely. + +To bypass the cache: + +```bash +vericode verify "sort a list" --no-cache +``` + +Or in Python: + +```python +result = await verify(spec, backend="lean4", use_cache=False) +``` + ## `verify` vs `prove` - `verify` starts from a natural-language or YAML spec and asks the model to From f6f964eabbfd6e63b5cbd5298332405e9de3a290 Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 22:29:56 -0400 Subject: [PATCH 06/10] tighten verification cache identity --- src/vericode/cache.py | 30 ++++- src/vericode/verifier.py | 20 +++- tests/test_cache.py | 247 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 278 insertions(+), 19 deletions(-) diff --git a/src/vericode/cache.py b/src/vericode/cache.py index ba456f9..e96993d 100644 --- a/src/vericode/cache.py +++ b/src/vericode/cache.py @@ -1,8 +1,8 @@ """Content-addressed verification cache. -Caches successful ``VerificationOutput`` results keyed by a hash of -(spec + backend + provider) so that repeated runs with the same inputs -can skip the expensive LLM + proof-assistant pipeline. +Caches successful ``VerificationOutput`` results keyed by a hash of the +full verification identity: spec, backend, provider, target language, +and any user-supplied implementation under proof. """ from __future__ import annotations @@ -42,22 +42,40 @@ class CacheEntry: metadata: dict[str, str] = field(default_factory=dict) -def cache_key(spec: Spec, backend: str, provider: str) -> str: +def cache_key( + spec: Spec, + backend: str, + provider: str, + *, + language: str, + existing_code: str | None = None, + temperature: float, + max_tokens: int, +) -> str: """Compute a content-addressed cache key. The key is a SHA-256 hex digest derived from the canonical spec - representation, the backend name, and the provider name. + representation plus every input that can change the resulting proof + artifact. Args: spec: The specification being verified. backend: Name of the verification backend. provider: Name of the LLM provider. + language: Target implementation language. + existing_code: Optional user-supplied implementation being proved. + temperature: Generation sampling temperature. + max_tokens: Generation token budget. Returns: A 64-character hex digest string. """ canonical = canonical_spec(spec) - combined = f"{canonical}|{backend.lower()}|{provider.lower()}" + code_hash = sha256_hex(existing_code or "") + combined = ( + f"{canonical}|{backend.lower()}|{provider.lower()}|" + f"{language.lower()}|{code_hash}|{temperature:.6f}|{max_tokens}" + ) return sha256_hex(combined) diff --git a/src/vericode/verifier.py b/src/vericode/verifier.py index 5804c46..5c58a70 100644 --- a/src/vericode/verifier.py +++ b/src/vericode/verifier.py @@ -249,7 +249,15 @@ async def verify( provider_name = provider.provider_name vcache = cache or VerificationCache() if use_cache: - key = cache_key(spec, backend_obj.name, provider_name) + key = cache_key( + spec, + backend_obj.name, + provider_name, + language=language, + existing_code=existing_code, + temperature=temperature, + max_tokens=max_tokens, + ) hit = vcache.get(key) if hit is not None: logger.info("Returning cached verification result") @@ -315,7 +323,15 @@ async def verify( # --- Persist to cache on success --- if use_cache and output.verified and output.certificate is not None: - key = cache_key(spec, backend_obj.name, provider_name) + key = cache_key( + spec, + backend_obj.name, + provider_name, + language=language, + existing_code=existing_code, + temperature=temperature, + max_tokens=max_tokens, + ) entry = CacheEntry( cache_key=key, code=output.code, diff --git a/tests/test_cache.py b/tests/test_cache.py index 03e4d8e..9df2791 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -18,41 +18,200 @@ class TestCacheKey: def test_deterministic(self) -> None: """Same inputs always produce the same key.""" spec = Spec(description="Sort a list", function_name="sort") - k1 = cache_key(spec, "lean4", "anthropic") - k2 = cache_key(spec, "lean4", "anthropic") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert k1 == k2 def test_different_backend_different_key(self) -> None: spec = Spec(description="Sort a list", function_name="sort") - k1 = cache_key(spec, "lean4", "anthropic") - k2 = cache_key(spec, "dafny", "anthropic") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "dafny", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert k1 != k2 def test_different_provider_different_key(self) -> None: spec = Spec(description="Sort a list", function_name="sort") - k1 = cache_key(spec, "lean4", "anthropic") - k2 = cache_key(spec, "lean4", "openai") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "openai", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert k1 != k2 def test_different_spec_different_key(self) -> None: spec_a = Spec(description="Sort a list", function_name="sort") spec_b = Spec(description="Search a list", function_name="search") - k1 = cache_key(spec_a, "lean4", "anthropic") - k2 = cache_key(spec_b, "lean4", "anthropic") + k1 = cache_key( + spec_a, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec_b, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert k1 != k2 def test_key_is_64_hex_chars(self) -> None: spec = Spec(description="Sort a list") - key = cache_key(spec, "lean4", "anthropic") + key = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert len(key) == 64 assert all(c in "0123456789abcdef" for c in key) def test_case_insensitive_backend_and_provider(self) -> None: spec = Spec(description="Sort a list") - k1 = cache_key(spec, "LEAN4", "Anthropic") - k2 = cache_key(spec, "lean4", "anthropic") + k1 = cache_key( + spec, + "LEAN4", + "Anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) assert k1 == k2 + def test_different_language_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="rust", + temperature=0.2, + max_tokens=4096, + ) + assert k1 != k2 + + def test_different_existing_code_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + existing_code="def sort(xs): return xs", + temperature=0.2, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + existing_code="def sort(xs): return sorted(xs)", + temperature=0.2, + max_tokens=4096, + ) + assert k1 != k2 + + def test_different_temperature_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.1, + max_tokens=4096, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.9, + max_tokens=4096, + ) + assert k1 != k2 + + def test_different_max_tokens_different_key(self) -> None: + spec = Spec(description="Sort a list", function_name="sort") + k1 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=2048, + ) + k2 = cache_key( + spec, + "lean4", + "anthropic", + language="python", + temperature=0.2, + max_tokens=4096, + ) + assert k1 != k2 + # --------------------------------------------------------------------------- # VerificationCache tests @@ -199,3 +358,69 @@ async def test_no_cache_flag_skips_cache(self, tmp_path: Path) -> None: ) assert result.verified is True assert provider.call_count == 2 # LLM was called again + + async def test_existing_code_uses_distinct_cache_entries( + self, tmp_path: Path + ) -> None: + """Different user-supplied implementations must not share cache hits.""" + from tests.conftest import FakeBackend, FakeLLMProvider + from vericode.cache import VerificationCache + from vericode.verifier import verify + + spec = Spec(description="Sort a list", function_name="sort") + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + vc = VerificationCache(cache_dir=tmp_path / "cache") + + result_one = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + existing_code="def sort(xs): return xs", + ) + result_two = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + existing_code="def sort(xs): return sorted(xs)", + ) + + assert result_one.verified is True + assert result_two.verified is True + assert provider.call_count == 2 + + async def test_generation_settings_use_distinct_cache_entries( + self, tmp_path: Path + ) -> None: + """Different generation settings must not share cached artifacts.""" + from tests.conftest import FakeBackend, FakeLLMProvider + from vericode.cache import VerificationCache + from vericode.verifier import verify + + spec = Spec(description="Sort a list", function_name="sort") + provider = FakeLLMProvider() + backend = FakeBackend(succeed=True) + vc = VerificationCache(cache_dir=tmp_path / "cache") + + first = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + temperature=0.1, + max_tokens=2048, + ) + second = await verify( + spec, + backend=backend, + provider=provider, + cache=vc, + temperature=0.7, + max_tokens=4096, + ) + + assert first.verified is True + assert second.verified is True + assert provider.call_count == 2 From bf6273bf4ee38219b5a6593f2f8fc5ab58a04e63 Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 30 Mar 2026 23:19:51 -0400 Subject: [PATCH 07/10] apply ci formatting --- src/vericode/cli.py | 4 +++- tests/conftest.py | 1 + tests/test_data_structures.py | 4 ---- tests/test_verifier.py | 8 -------- 4 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/vericode/cli.py b/src/vericode/cli.py index 5fd7455..4b8a9f4 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -114,7 +114,9 @@ def main(ctx: click.Context, verbose: bool) -> None: help="LLM provider.", ) @click.option("--output", "-o", type=click.Path(), help="Write results to a file.") -@click.option("--no-cache", is_flag=True, default=False, help="Skip verification cache.") +@click.option( + "--no-cache", is_flag=True, default=False, help="Skip verification cache." +) @click.pass_context def verify( ctx: click.Context, diff --git a/tests/conftest.py b/tests/conftest.py index 894973c..7b7867f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,7 @@ def _isolate_cache(monkeypatch: pytest.MonkeyPatch) -> None: _cache_mod, "_DEFAULT_CACHE_DIR", Path(tempfile.mkdtemp()) / "vericode" ) + # --------------------------------------------------------------------------- # Fake / stub implementations for testing without real LLMs or proof tools # --------------------------------------------------------------------------- diff --git a/tests/test_data_structures.py b/tests/test_data_structures.py index cc77f5b..87e7e5b 100644 --- a/tests/test_data_structures.py +++ b/tests/test_data_structures.py @@ -51,7 +51,6 @@ async def test_merge_sorted_lists(self) -> None: language="python", backend=backend, provider=provider, - ) assert result.verified is True @@ -86,7 +85,6 @@ async def test_insert_into_bst(self) -> None: ), backend=backend, provider=provider, - ) assert result.verified is True @@ -101,7 +99,6 @@ async def test_data_structure_with_refinement(self) -> None: backend=backend, provider=provider, max_iterations=5, - ) assert result.verified is True @@ -121,7 +118,6 @@ async def test_reverse_list(self) -> None: spec, backend=backend, provider=provider, - ) assert result.verified is True diff --git a/tests/test_verifier.py b/tests/test_verifier.py index 24faf44..c9bea5b 100644 --- a/tests/test_verifier.py +++ b/tests/test_verifier.py @@ -100,7 +100,6 @@ async def test_successful_verification(self, sort_spec: Spec) -> None: backend=backend, provider=provider, max_iterations=3, - ) assert result.verified is True @@ -121,7 +120,6 @@ async def test_failed_verification_returns_partial(self, sort_spec: Spec) -> Non backend=backend, provider=provider, max_iterations=2, - ) assert result.verified is False @@ -138,7 +136,6 @@ async def test_verify_with_string_spec(self) -> None: language="python", backend=backend, provider=provider, - ) assert result.verified is True @@ -155,7 +152,6 @@ async def test_verify_with_refinement(self, sort_spec: Spec) -> None: backend=backend, provider=provider, max_iterations=5, - ) assert result.verified is True @@ -169,7 +165,6 @@ async def test_certificate_hashes_are_sha256(self, sort_spec: Spec) -> None: sort_spec, backend=backend, provider=provider, - ) assert result.certificate is not None @@ -186,7 +181,6 @@ async def test_certificate_timestamp_is_iso(self, sort_spec: Spec) -> None: sort_spec, backend=backend, provider=provider, - ) assert result.certificate is not None @@ -234,7 +228,6 @@ async def test_verify_with_backend_name_string(self) -> None: backend="lean4", provider=provider, max_iterations=1, - ) # We expect failure since lean4 is not installed, but no crash @@ -253,7 +246,6 @@ async def test_verify_with_existing_code_keeps_source_fixed( provider=provider, existing_code="def original(lst): return lst", max_iterations=3, - ) assert result.verified is True From f53c930faf06d5e3085f56e402c32d83404b53fa Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 18 May 2026 11:45:26 -0400 Subject: [PATCH 08/10] Add cache inspection command --- src/vericode/cache.py | 24 ++++++++++++++++++++++++ src/vericode/cli.py | 28 ++++++++++++++++++++++++++++ tests/test_cache.py | 43 +++++++++++++++++++++++++++++++++++++++++++ tests/test_cli.py | 22 ++++++++++++++++++++++ 4 files changed, 117 insertions(+) diff --git a/src/vericode/cache.py b/src/vericode/cache.py index e96993d..9687e66 100644 --- a/src/vericode/cache.py +++ b/src/vericode/cache.py @@ -161,3 +161,27 @@ def clear(self) -> int: removed += 1 logger.info("Cleared cache", extra={"removed": removed}) return removed + + def stats(self) -> dict[str, int | str]: + """Return cache inventory metadata for CLI inspection.""" + if not self._cache_dir.exists(): + return { + "cache_dir": str(self._cache_dir), + "entries": 0, + "bytes": 0, + } + + files = list(self._cache_dir.glob("*.json")) + total_bytes = 0 + live_entries = 0 + for path in files: + try: + total_bytes += path.stat().st_size + live_entries += 1 + except OSError: + continue + return { + "cache_dir": str(self._cache_dir), + "entries": live_entries, + "bytes": total_bytes, + } diff --git a/src/vericode/cli.py b/src/vericode/cli.py index 4b8a9f4..a603042 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -378,6 +378,34 @@ def _run_batch( _run_batch(None, None) +@main.command(name="cache") +@click.option( + "--cache-dir", + type=click.Path(path_type=Path), + default=None, + help="Override the verification cache directory.", +) +@click.option("--json", "as_json", is_flag=True, help="Emit machine-readable JSON.") +def cache_inspect(cache_dir: Path | None, as_json: bool) -> None: + """Inspect the local verification cache.""" + from vericode.cache import VerificationCache + + stats = VerificationCache(cache_dir=cache_dir).stats() + if as_json: + click.echo(json.dumps(stats, indent=2)) + return + + console.print( + Panel( + f"[bold]Cache dir:[/bold] {stats['cache_dir']}\n" + f"[bold]Entries:[/bold] {stats['entries']}\n" + f"[bold]Bytes:[/bold] {stats['bytes']}", + title="vericode cache", + border_style="magenta", + ) + ) + + # --------------------------------------------------------------------------- # Display helpers # --------------------------------------------------------------------------- diff --git a/tests/test_cache.py b/tests/test_cache.py index 9df2791..56d67bf 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -3,6 +3,7 @@ from __future__ import annotations from pathlib import Path +from unittest.mock import patch from vericode.cache import CacheEntry, VerificationCache, cache_key from vericode.spec import Spec @@ -288,6 +289,48 @@ def test_cache_dir_created_on_put(self, tmp_path: Path) -> None: assert cache_dir.exists() assert (cache_dir / "test.json").exists() + def test_stats_for_missing_cache_dir(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "missing" + stats = VerificationCache(cache_dir=cache_dir).stats() + assert stats == { + "cache_dir": str(cache_dir), + "entries": 0, + "bytes": 0, + } + + def test_stats_reports_entries_and_bytes(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + (cache_dir / "a.json").write_text("{}") + (cache_dir / "b.json").write_text('{"ok": true}') + (cache_dir / "ignored.txt").write_text("not counted") + + stats = VerificationCache(cache_dir=cache_dir).stats() + + assert stats["cache_dir"] == str(cache_dir) + assert stats["entries"] == 2 + assert stats["bytes"] == 14 + + def test_stats_ignores_files_removed_during_scan(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + disappearing = cache_dir / "gone.json" + surviving = cache_dir / "ok.json" + disappearing.write_text("{}") + surviving.write_text("{}") + original_stat = Path.stat + + def flaky_stat(path: Path, *args: object, **kwargs: object) -> object: + if path == disappearing: + raise FileNotFoundError(path) + return original_stat(path, *args, **kwargs) + + with patch.object(Path, "stat", flaky_stat): + stats = VerificationCache(cache_dir=cache_dir).stats() + + assert stats["entries"] == 1 + assert stats["bytes"] == 2 + # --------------------------------------------------------------------------- # Integration with verify pipeline diff --git a/tests/test_cli.py b/tests/test_cli.py index 6000e13..a3cfccd 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2,6 +2,8 @@ from __future__ import annotations +import json +from pathlib import Path from unittest.mock import AsyncMock, patch import pytest @@ -42,6 +44,10 @@ def test_batch_help(self, runner: CliRunner) -> None: result = runner.invoke(main, ["batch", "--help"]) assert result.exit_code == 0 + def test_cache_help(self, runner: CliRunner) -> None: + result = runner.invoke(main, ["cache", "--help"]) + assert result.exit_code == 0 + def test_verify_no_args(self, runner: CliRunner) -> None: """Verify with no description and no --spec should error.""" result = runner.invoke(main, ["verify"]) @@ -116,3 +122,19 @@ def test_verify_failed_output(self, runner: CliRunner) -> None: def test_verbose_flag(self, runner: CliRunner) -> None: result = runner.invoke(main, ["-v", "--help"]) assert result.exit_code == 0 + + def test_cache_json(self, runner: CliRunner, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + (cache_dir / "entry.json").write_text("{}") + + result = runner.invoke( + main, + ["cache", "--cache-dir", str(cache_dir), "--json"], + ) + + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["entries"] == 1 + assert payload["cache_dir"] == str(cache_dir) + assert payload["bytes"] == 2 From 4cd3f560ce0c84b2be3f1f3b1020cc843e310073 Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 18 May 2026 12:38:55 -0400 Subject: [PATCH 09/10] Add cache entry listing --- src/vericode/cache.py | 32 +++++++++++++++++++++++++++++ src/vericode/cli.py | 23 ++++++++++++++++++--- tests/test_cache.py | 47 +++++++++++++++++++++++++++++++++++++++++++ tests/test_cli.py | 23 +++++++++++++++++++++ 4 files changed, 122 insertions(+), 3 deletions(-) diff --git a/src/vericode/cache.py b/src/vericode/cache.py index 9687e66..0f48e9f 100644 --- a/src/vericode/cache.py +++ b/src/vericode/cache.py @@ -185,3 +185,35 @@ def stats(self) -> dict[str, int | str]: "entries": live_entries, "bytes": total_bytes, } + + def list_entries(self, limit: int = 20) -> list[dict[str, int | str]]: + """Return lightweight metadata for cache entries.""" + if not self._cache_dir.exists(): + return [] + + files: list[tuple[float, Path]] = [] + for path in self._cache_dir.glob("*.json"): + try: + files.append((path.stat().st_mtime, path)) + except OSError: + continue + + entries: list[dict[str, int | str]] = [] + for _mtime, path in sorted(files, key=lambda item: item[0], reverse=True): + if len(entries) >= limit: + break + try: + data = json.loads(path.read_text()) + if not isinstance(data, dict): + continue + entries.append( + { + "cache_key": str(data.get("cache_key", path.stem)), + "backend": str(data.get("backend", "")), + "language": str(data.get("language", "")), + "bytes": path.stat().st_size, + } + ) + except (json.JSONDecodeError, OSError): + continue + return entries diff --git a/src/vericode/cli.py b/src/vericode/cli.py index a603042..8b27219 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -386,13 +386,24 @@ def _run_batch( help="Override the verification cache directory.", ) @click.option("--json", "as_json", is_flag=True, help="Emit machine-readable JSON.") -def cache_inspect(cache_dir: Path | None, as_json: bool) -> None: +@click.option("--list", "list_entries", is_flag=True, help="List recent entries.") +def cache_inspect( + cache_dir: Path | None, + as_json: bool, + list_entries: bool, +) -> None: """Inspect the local verification cache.""" from vericode.cache import VerificationCache - stats = VerificationCache(cache_dir=cache_dir).stats() + cache = VerificationCache(cache_dir=cache_dir) + stats = cache.stats() + entries_detail = cache.list_entries() if list_entries else [] + payload = { + **stats, + "entries_detail": entries_detail, + } if as_json: - click.echo(json.dumps(stats, indent=2)) + click.echo(json.dumps(payload, indent=2)) return console.print( @@ -404,6 +415,12 @@ def cache_inspect(cache_dir: Path | None, as_json: bool) -> None: border_style="magenta", ) ) + if list_entries: + for entry in entries_detail: + console.print( + f"- {entry['cache_key']} [{entry['backend']}/{entry['language']}] " + f"{entry['bytes']} bytes" + ) # --------------------------------------------------------------------------- diff --git a/tests/test_cache.py b/tests/test_cache.py index 56d67bf..1df0ed4 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json from pathlib import Path from unittest.mock import patch @@ -331,6 +332,52 @@ def flaky_stat(path: Path, *args: object, **kwargs: object) -> object: assert stats["entries"] == 1 assert stats["bytes"] == 2 + def test_list_entries_returns_metadata(self, tmp_path: Path) -> None: + vc = VerificationCache(cache_dir=tmp_path / "cache") + vc.put( + CacheEntry( + cache_key="entry", + code="code", + proof="proof", + backend="lean4", + language="python", + certificate_json="{}", + ) + ) + + entries = vc.list_entries() + + expected_bytes = (vc.cache_dir / "entry.json").stat().st_size + assert entries == [ + { + "cache_key": "entry", + "backend": "lean4", + "language": "python", + "bytes": expected_bytes, + } + ] + + def test_list_entries_skips_unusable_json_shapes(self, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + (cache_dir / "array.json").write_text("[]") + (cache_dir / "string.json").write_text('"not-an-entry"') + (cache_dir / "number.json").write_text("123") + (cache_dir / "invalid.json").write_text("{") + (cache_dir / "entry.json").write_text( + json.dumps( + { + "cache_key": "entry", + "backend": "lean4", + "language": "python", + } + ) + ) + + entries = VerificationCache(cache_dir=cache_dir).list_entries() + + assert [entry["cache_key"] for entry in entries] == ["entry"] + # --------------------------------------------------------------------------- # Integration with verify pipeline diff --git a/tests/test_cli.py b/tests/test_cli.py index a3cfccd..78c231c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,3 +138,26 @@ def test_cache_json(self, runner: CliRunner, tmp_path: Path) -> None: assert payload["entries"] == 1 assert payload["cache_dir"] == str(cache_dir) assert payload["bytes"] == 2 + + def test_cache_json_lists_entries(self, runner: CliRunner, tmp_path: Path) -> None: + cache_dir = tmp_path / "cache" + cache_dir.mkdir() + (cache_dir / "wrong-shape.json").write_text("[]") + (cache_dir / "entry.json").write_text( + json.dumps( + { + "cache_key": "entry", + "backend": "lean4", + "language": "python", + } + ) + ) + + result = runner.invoke( + main, + ["cache", "--cache-dir", str(cache_dir), "--json", "--list"], + ) + + assert result.exit_code == 0 + payload = json.loads(result.output) + assert payload["entries_detail"][0]["cache_key"] == "entry" From a457480d6eaecffc2a0dc4d3c0c4a20eeb2f81ff Mon Sep 17 00:00:00 2001 From: sushaan-k Date: Mon, 18 May 2026 13:41:17 -0400 Subject: [PATCH 10/10] Preserve cache stats JSON shape --- src/vericode/cli.py | 7 +++---- tests/test_cli.py | 1 + 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/vericode/cli.py b/src/vericode/cli.py index 8b27219..4da54d1 100644 --- a/src/vericode/cli.py +++ b/src/vericode/cli.py @@ -398,10 +398,9 @@ def cache_inspect( cache = VerificationCache(cache_dir=cache_dir) stats = cache.stats() entries_detail = cache.list_entries() if list_entries else [] - payload = { - **stats, - "entries_detail": entries_detail, - } + payload: dict[str, object] = dict(stats) + if list_entries: + payload["entries_detail"] = entries_detail if as_json: click.echo(json.dumps(payload, indent=2)) return diff --git a/tests/test_cli.py b/tests/test_cli.py index 78c231c..d73813b 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -138,6 +138,7 @@ def test_cache_json(self, runner: CliRunner, tmp_path: Path) -> None: assert payload["entries"] == 1 assert payload["cache_dir"] == str(cache_dir) assert payload["bytes"] == 2 + assert "entries_detail" not in payload def test_cache_json_lists_entries(self, runner: CliRunner, tmp_path: Path) -> None: cache_dir = tmp_path / "cache"