Fix public verification and demo challenge trust

bnovik0v · bnovik0v · commit 52f95d031109 · 2026-03-07T04:10:02.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## 0.2.1 - 2026-03-07
+
+- Made `obfuscated_text_lock` fail gracefully when verifying from a public-only challenge payload
+- Removed the demo server fallback that trusted client-supplied challenge objects during verification
+- Added regression tests for public challenge verification and demo challenge-ID enforcement
+
 ## 0.2.0 - 2026-03-07
 
 - Repositioned `agentproof` as an LLM-capability CAPTCHA library
diff --git a/demo/app.py b/demo/app.py
@@ -600,9 +600,6 @@ def load_challenge(payload: dict[str, Any]) -> Challenge | None:
     challenge_id = payload.get("challenge_id")
     if isinstance(challenge_id, str):
         return ISSUED_CHALLENGES.get(challenge_id)
-    challenge_data = payload.get("challenge")
-    if isinstance(challenge_data, dict):
-        return Challenge(**challenge_data)
     return None
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agentproof-ai"
-version = "0.2.0"
+version = "0.2.1"
 description = "LLM-capability CAPTCHA and obfuscated verification challenges for Python applications."
 readme = "README.md"
 license = "MIT"
diff --git a/src/agentproof/challenges/obfuscated_text.py b/src/agentproof/challenges/obfuscated_text.py
@@ -98,6 +98,12 @@ def verify(self, challenge: Challenge, response: AgentResponse) -> VerificationR
             return VerificationResult.failure("challenge_type_mismatch")
         if utc_now() > parse_datetime(challenge.expires_at):
             return VerificationResult.failure("challenge_expired")
+        expected_answer = challenge.private_data.get("expected_answer")
+        if not isinstance(expected_answer, str):
+            return VerificationResult.failure("missing_private_verification_data")
+        template_id = challenge.private_data.get("template_id")
+        if not isinstance(template_id, str):
+            return VerificationResult.failure("missing_private_verification_data")
         answer = response.payload.get("answer")
         if not isinstance(answer, str) or not answer.strip():
             return VerificationResult.failure("missing_answer")
@@ -107,15 +113,14 @@ def verify(self, challenge: Challenge, response: AgentResponse) -> VerificationR
         normalized_answer = answer.strip().upper()
         if not _is_hyphen_answer(normalized_answer):
             return VerificationResult.failure("invalid_answer_format")
-        expected_answer = self._expected_answer(challenge)
         if normalized_answer != expected_answer:
             return VerificationResult.failure(
                 "answer_mismatch",
                 expected_format="UPPERCASE-HYPHENATED",
             )
         return VerificationResult.success(
             answer=normalized_answer,
-            template_id=self._template_id(challenge),
+            template_id=template_id,
             difficulty=challenge.data.get("difficulty"),
         )
 
@@ -141,21 +146,6 @@ def _template_builders() -> dict[str, TemplateBuilder]:
             "vowel_count": _build_vowel_count,
         }
 
-    @staticmethod
-    def _expected_answer(challenge: Challenge) -> str:
-        expected_answer = challenge.private_data.get("expected_answer")
-        if not isinstance(expected_answer, str):
-            raise ValueError("obfuscated_text_lock challenge is missing private expected_answer")
-        return expected_answer
-
-    @staticmethod
-    def _template_id(challenge: Challenge) -> str:
-        template_id = challenge.private_data.get("template_id")
-        if not isinstance(template_id, str):
-            raise ValueError("obfuscated_text_lock challenge is missing private template_id")
-        return template_id
-
-
 def _build_amber_sort(rng: random.Random) -> tuple[list[str], str]:
     amber_words = rng.sample(TOKEN_POOL, 3)
     noise_words = rng.sample([word for word in TOKEN_POOL if word not in amber_words], 2)
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 from datetime import timedelta
+from typing import Any, cast
 
 import pytest
 
@@ -97,6 +98,20 @@ def test_obfuscated_text_rejects_wrong_answer() -> None:
     assert result.reason == "answer_mismatch"
 
 
+def test_obfuscated_text_public_roundtrip_fails_gracefully() -> None:
+    challenge = generate_challenge(ChallengeSpec(challenge_type="obfuscated_text_lock"))
+    public_payload = cast(dict[str, Any], challenge.to_dict())
+    public_only = Challenge(**public_payload)
+    response = AgentResponse(
+        challenge_id=public_only.challenge_id,
+        challenge_type=public_only.challenge_type,
+        payload={"answer": "NOT-IMPORTANT"},
+    )
+    result = verify_response(public_only, response)
+    assert not result.ok
+    assert result.reason == "missing_private_verification_data"
+
+
 def test_semantic_math_rejects_wrong_word_count() -> None:
     spec = ChallengeSpec(
         challenge_type="semantic_math_lock",
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -134,3 +134,36 @@ def test_cli_solve_obfuscated_returns_nonzero(tmp_path: Path) -> None:
         exit_code = main(["solve", str(challenge_file)])
     assert exit_code == 2
     assert "no built-in solver" in stderr.getvalue()
+
+
+def test_cli_verify_public_obfuscated_file_fails_cleanly(tmp_path: Path) -> None:
+    internal_file = tmp_path / "challenge.internal.json"
+    public_file = tmp_path / "challenge.public.json"
+    response_file = tmp_path / "response.json"
+    result_file = tmp_path / "result.json"
+
+    assert (
+        main(
+            [
+                "generate",
+                "obfuscated_text_lock",
+                "--output",
+                str(internal_file),
+                "--public-output",
+                str(public_file),
+            ]
+        )
+        == 0
+    )
+
+    public_challenge = json.loads(public_file.read_text(encoding="utf-8"))
+    response = {
+        "challenge_id": public_challenge["challenge_id"],
+        "challenge_type": public_challenge["challenge_type"],
+        "payload": {"answer": "WRONG-ANSWER"},
+    }
+    response_file.write_text(json.dumps(response), encoding="utf-8")
+
+    assert main(["verify", str(public_file), str(response_file), "--output", str(result_file)]) == 1
+    result = json.loads(result_file.read_text(encoding="utf-8"))
+    assert result["reason"] == "missing_private_verification_data"
diff --git a/tests/test_demo_app.py b/tests/test_demo_app.py
@@ -106,3 +106,36 @@ def test_demo_obfuscated_manual_verify() -> None:
         server.shutdown()
         server.server_close()
         thread.join(timeout=5)
+
+
+def test_demo_verify_rejects_unknown_challenge_id() -> None:
+    module = load_demo_app_module()
+    server, thread = module.serve_in_background(port=0)
+    host, port = cast(tuple[str, int], server.server_address)
+    base_url = f"http://{host}:{port}"
+    try:
+        for _ in range(10):
+            if thread.is_alive():
+                break
+            time.sleep(0.05)
+
+        try:
+            request_json(
+                f"{base_url}/api/verify",
+                {
+                    "challenge_id": "not-issued",
+                    "response": {
+                        "challenge_id": "not-issued",
+                        "challenge_type": "proof_of_work",
+                        "payload": {"nonce": "0", "hash": "deadbeef"},
+                    },
+                },
+            )
+        except Exception as exc:  # noqa: BLE001
+            assert "HTTP Error 404" in str(exc)
+        else:
+            raise AssertionError("expected verify request to fail")
+    finally:
+        server.shutdown()
+        server.server_close()
+        thread.join(timeout=5)
diff --git a/uv.lock b/uv.lock