microsoft · RonShakutai · Mar 7, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
diff --git a/evaluation/ai-assistant/.gitignore b/evaluation/ai-assistant/.gitignore
@@ -0,0 +1,32 @@
+# Override root gitignore's lib/ rule for src/app/lib/
+!src/app/lib/
+
+# Dependencies
+node_modules/
+
+# Build output
+dist/
+
+# Environment / IDE
+.env
+.env.*
+.vscode/
+.idea/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Python backend
+backend/.venv/
+backend/__pycache__/
+backend/**/__pycache__/
+
+# Persisted dataset copies
+backend/data/
+
+*.pyc
+*.pyo
+
+# Vite
+*.local
diff --git a/evaluation/ai-assistant/backend/datasets.json b/evaluation/ai-assistant/backend/datasets.json
@@ -0,0 +1,19 @@
+[
+  {
+    "id": "example-dataset",
+    "filename": "example_pii_dataset.csv",
+    "name": "Example Dataset",
+    "description": "3 synthetic PII records covering healthcare, finance, and lab-result scenarios.",
+    "path": "data/example_pii_dataset.csv",
+    "stored_path": "/Users/ronshakutai/projects_folder/presidio/evaluation/ai-assistant/backend/data/Example_Dataset_example-dataset.csv",
+    "format": "csv",
+    "record_count": 3,
+    "has_entities": false,
+    "has_final_entities": true,
+    "ran_configs": [
+      "default_spacy"
+    ],
+    "text_column": "text",
+    "entities_column": null
+  }
+]
diff --git a/evaluation/ai-assistant/backend/llm_service.py b/evaluation/ai-assistant/backend/llm_service.py
@@ -0,0 +1,97 @@
+"""LLM Judge service using Azure OpenAI via LangExtract."""
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+from models import Entity
+
+logger = logging.getLogger(__name__)
+
+# Lazy-loaded recognizer singleton
+_recognizer = None
+
+
+class LLMServiceError(Exception):
+    """Raised when LLM service encounters an error."""
+
+
+def configure(
+    azure_endpoint: str,
+    api_key: Optional[str] = None,
+    deployment_name: str = "gpt-4o",
+    api_version: str = "2024-02-15-preview",
+) -> dict:
+    """Initialise the Azure OpenAI LangExtract recognizer.
+
+    :param azure_endpoint: Azure OpenAI endpoint URL.
+    :param api_key: API key (or None for managed identity).
+    :param deployment_name: Azure deployment / model name.
+    :param api_version: Azure OpenAI API version.
+    :returns: Status dict.
+    """
+    global _recognizer
+
+    try:
+        from presidio_analyzer.predefined_recognizers.third_party.azure_openai_langextract_recognizer import (  # noqa: E501
+            AzureOpenAILangExtractRecognizer,
+        )
+    except ImportError as exc:
+        raise LLMServiceError(
+            "langextract or presidio-analyzer is not installed. "
+            "Run: pip install langextract presidio-analyzer"
+        ) from exc
+
+    try:
+        _recognizer = AzureOpenAILangExtractRecognizer(
+            model_id=deployment_name,
+            azure_endpoint=azure_endpoint,
+            api_key=api_key,
+            api_version=api_version,
+        )
+    except Exception as exc:
+        _recognizer = None
+        raise LLMServiceError(f"Failed to initialise recognizer: {exc}") from exc
+
+    logger.info(
+        "LLM Judge configured: endpoint=%s deployment=%s",
+        azure_endpoint,
+        deployment_name,
+    )
+    return {"status": "configured", "deployment": deployment_name}
+
+
+def is_configured() -> bool:
+    """Return True if the recognizer has been initialised."""
+    return _recognizer is not None
+
+
+def disconnect() -> None:
+    """Reset the recognizer so a new model can be configured."""
+    global _recognizer
+    _recognizer = None
+    logger.info("LLM Judge disconnected")
+
+
+def analyze_text(text: str) -> list[Entity]:
+    """Run the LLM recognizer on a single text and return Entity objects."""
+    if _recognizer is None:
+        raise LLMServiceError(
+            "LLM service not configured. Call /api/llm/configure first."
+        )
+
+    results = _recognizer.analyze(text=text, entities=None)
+
+    entities: list[Entity] = []
+    for r in results:
+        entities.append(
+            Entity(
+                text=text[r.start:r.end],
+                entity_type=r.entity_type,
+                start=r.start,
+                end=r.end,
+                score=round(r.score, 4),
+            )
+        )
+    return entities
diff --git a/evaluation/ai-assistant/backend/main.py b/evaluation/ai-assistant/backend/main.py
@@ -0,0 +1,44 @@
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+# Eagerly import presidio_analyzer so the module is fully initialised before
+# concurrent requests trigger lazy imports from different threads (which would
+# hit a circular-import race in the presidio_analyzer package).
+try:
+    import presidio_analyzer  # noqa: F401
+except ImportError:
+    pass  # Optional dependency – endpoints will return clear errors if missing
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from routers import (
+    decision,
+    evaluation,
+    llm,
+    presidio_service,
+    review,
+    upload,
+)
+
+app = FastAPI(title="Presidio Evaluation Flow API", version="0.1.0")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:5173"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.include_router(upload.router)
+app.include_router(review.router)
+app.include_router(evaluation.router)
+app.include_router(decision.router)
+app.include_router(llm.router)
+app.include_router(presidio_service.router)
+
+
+@app.get("/api/health")
+async def health():
+    """Return service health status."""
+    return {"status": "ok"}
diff --git a/evaluation/ai-assistant/backend/mock_data.py b/evaluation/ai-assistant/backend/mock_data.py
@@ -0,0 +1,113 @@
+"""Mock data for evaluation / decision stages only."""
+
+from datetime import datetime
+
+from models import (
+    Entity,
+    EntityMiss,
+    EvaluationMetrics,
+    EvaluationRun,
+    MissType,
+    RiskLevel,
+)
+
+EVALUATION_RUNS: list[EvaluationRun] = [
+    EvaluationRun(
+        id="run-001",
+        timestamp=datetime(2025, 2, 20, 10, 30),
+        sample_size=500,
+        config_version="baseline-v1.0",
+        metrics=EvaluationMetrics(
+            precision=0.87,
+            recall=0.73,
+            f1_score=0.79,
+            true_positives=245,
+            false_positives=36,
+            false_negatives=91,
+            true_negatives=128,
+        ),
+    ),
+    EvaluationRun(
+        id="run-002",
+        timestamp=datetime(2025, 2, 22, 14, 15),
+        sample_size=500,
+        config_version="tuned-v1.1",
+        metrics=EvaluationMetrics(
+            precision=0.91,
+            recall=0.81,
+            f1_score=0.86,
+            true_positives=272,
+            false_positives=27,
+            false_negatives=64,
+            true_negatives=137,
+        ),
+    ),
+    EvaluationRun(
+        id="run-003",
+        timestamp=datetime(2025, 2, 25, 9, 0),
+        sample_size=500,
+        config_version="tuned-v1.2",
+        metrics=EvaluationMetrics(
+            precision=0.94,
+            recall=0.88,
+            f1_score=0.91,
+            true_positives=296,
+            false_positives=19,
+            false_negatives=40,
+            true_negatives=145,
+        ),
+    ),
+]
+
+ENTITY_MISSES: list[EntityMiss] = [
+    EntityMiss(
+        record_id="rec-004",
+        record_text=(
+            "Credit card ending in 4532 was used for "
+            "transaction. Customer: alice.wong@company.com."
+        ),
+        missed_entity=Entity(
+            text="4532", entity_type="CREDIT_CARD", start=22, end=26, score=0.65
+        ),
+        miss_type=MissType.false_negative,
+        entity_type="CREDIT_CARD",
+        risk_level=RiskLevel.high,
+    ),
+    EntityMiss(
+        record_id="rec-002",
+        record_text=(
+            "Dr. Sarah Johnson reviewed the case. "
+            "Insurance Policy: POL-8821-USA."
+        ),
+        missed_entity=Entity(
+            text="POL-8821-USA", entity_type="INSURANCE_POLICY", start=56, end=68
+        ),
+        miss_type=MissType.false_negative,
+        entity_type="INSURANCE_POLICY",
+        risk_level=RiskLevel.medium,
+    ),
+    EntityMiss(
+        record_id="rec-005",
+        record_text=(
+            "Prescription for Robert Chen: Medication ABC-123, dosage 50mg. "
+            "Doctor notes indicate history of diabetes."
+        ),
+        missed_entity=Entity(
+            text="diabetes", entity_type="MEDICAL_CONDITION", start=97, end=105
+        ),
+        miss_type=MissType.false_negative,
+        entity_type="MEDICAL_CONDITION",
+        risk_level=RiskLevel.high,
+    ),
+    EntityMiss(
+        record_id="rec-003",
+        record_text=(
+            "Employee ID: EMP-8821, Jane Doe, "
+            "started 2023-06-01. Salary: $85,000."
+        ),
+        missed_entity=Entity(text="$85,000", entity_type="SALARY", start=61, end=68),
+        miss_type=MissType.false_negative,
+        entity_type="SALARY",
+        risk_level=RiskLevel.medium,
+    ),
+]