diff --git a/.env.example b/.env.example index 2fcf23a5..021db554 100644 --- a/.env.example +++ b/.env.example @@ -21,10 +21,11 @@ GCP__ZONE=us-central1-a GCP__MACHINE_TYPE=c3-standard-4 GCP__SERVICE_ACCOUNT=confidential-sa@verifiable-ai-hackathon.iam.gserviceaccount.com GCP__TEE_IMAGE_REFERENCE=ghcr.io/flare-foundation/flare-ai-kit:main -# For production, use `confidential-space-250301` -GCP__CONFIDENTIAL_IMAGE=confidential-space-debug-250301 +# For production, use `family/confidential-space` +GCP__CONFIDENTIAL_IMAGE=family/confidential-space-debug # For production, use `false` GCP__TEE_CONTAINER_LOG_REDIRECT=true +# Use either TDX or SEV GCP__CONFIDENTIAL_COMPUTE_TYPE=TDX GCP__SCOPES=https://www.googleapis.com/auth/cloud-platform GCP__TAGS=flare-ai,http-server,https-server diff --git a/.github/workflows/docker-scripts.yml b/.github/workflows/docker-agents.yml similarity index 66% rename from .github/workflows/docker-scripts.yml rename to .github/workflows/docker-agents.yml index b175f101..11d589de 100644 --- a/.github/workflows/docker-scripts.yml +++ b/.github/workflows/docker-agents.yml @@ -1,18 +1,13 @@ -name: Docker Scripts Validation +name: Docker Agents Validation on: - push: - branches: ["main"] - paths: - - "Dockerfile" - - "scripts/**" - - "pyproject.toml" - - "uv.lock" pull_request: - branches: ["main"] + branches: + - main paths: + - ".github/workflows/docker-agents.yml" - "Dockerfile" - - "scripts/**" + - "agents/**" - "pyproject.toml" - "uv.lock" @@ -20,19 +15,17 @@ permissions: contents: read jobs: - validate-docker-scripts: + validate-docker-agents: runs-on: ubuntu-latest - + strategy: matrix: - script: + agent: - name: "PDF Ingestion" extras: "pdf" - script_file: "ingest_pdf.py" + filename: "ingest_pdf.py" test_timeout: "300" # 5 minutes - env: - # Test environment variables LOG_LEVEL: INFO AGENT__GEMINI_MODEL: "gemini-2.0-flash" AGENT__GEMINI_API_KEY: ${{ secrets.AGENT__GEMINI_API_KEY }} @@ -47,25 +40,25 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Build Docker image for ${{ matrix.script.name }} + - name: Build Docker image for ${{ matrix.agent.name }} run: | docker build \ - --build-arg EXTRAS=${{ matrix.script.extras }} \ - --build-arg SCRIPT=${{ matrix.script.script_file }} \ - --tag fai-script-${{ matrix.script.extras }} \ + --build-arg EXTRAS=${{ matrix.agent.extras }} \ + --build-arg AGENT=${{ matrix.agent.filename }} \ + --tag fai-agent-${{ matrix.agent.extras }} \ --cache-from type=gha \ --cache-to type=gha,mode=max \ . - - name: Validate script exists in image + - name: Validate agent script exists in image run: | - docker run --rm fai-script-${{ matrix.script.extras }} \ - test -f "/app/scripts/${{ matrix.script.script_file }}" + docker run --rm fai-agent-${{ matrix.agent.extras }} \ + test -f "/app/agents/${{ matrix.agent.filename }}" - - name: Test script startup (dry run) + - name: Test agent startup (dry run) timeout-minutes: 5 run: | - # Simple validation that the script exists and dependencies are available + # Simple validation that the agent exists and dependencies are available docker run --rm \ -e LOG_LEVEL="$LOG_LEVEL" \ -e AGENT__GEMINI_MODEL="$AGENT__GEMINI_MODEL" \ @@ -73,20 +66,20 @@ jobs: -e ECOSYSTEM__WEB3_PROVIDER_URL="$ECOSYSTEM__WEB3_PROVIDER_URL" \ -e INGESTION__CHUNK_SIZE="$INGESTION__CHUNK_SIZE" \ -e TEE__SIMULATE_ATTESTATION_TOKEN="$TEE__SIMULATE_ATTESTATION_TOKEN" \ - fai-script-${{ matrix.script.extras }} \ + fai-agent-${{ matrix.agent.extras }} \ python -c " import sys import os - # Test that script file exists - script_path = '/app/scripts/${{ matrix.script.script_file }}' - if not os.path.exists(script_path): - print(f'❌ Script not found: {script_path}') + # Test that agent file exists + agent_path = '/app/agents/${{ matrix.agent.filename }}' + if not os.path.exists(agent_path): + print(f'❌ Agent not found: {agent_path}') sys.exit(1) - print(f'✅ Script exists: {script_path}') + print(f'✅ Agent exists: {agent_path}') # Test that required dependencies are available - if '${{ matrix.script.extras }}' == 'pdf': + if '${{ matrix.agent.extras }}' == 'pdf': try: import PIL import fitz # pymupdf @@ -96,25 +89,24 @@ jobs: print(f'❌ PDF dependency missing: {e}') sys.exit(1) - print('✅ Script validation completed successfully') + print('✅ Agent validation completed successfully') " - name: Test container health run: | # Test that the container can start and the Python environment is healthy - docker run --rm fai-script-${{ matrix.script.extras }} \ + docker run --rm fai-agent-${{ matrix.agent.extras }} \ python -c " import sys print(f'Python version: {sys.version}') print(f'Python path: {sys.path}') - # Test core dependencies (some modules may require optional deps) + # Test core dependencies try: import flare_ai_kit print('✅ flare-ai-kit imported successfully') except ImportError as e: - print(f'⚠️ flare-ai-kit import issue (may need more extras): {e}') - # Test basic Python packages instead + print(f'⚠️ flare-ai-kit import issue: {e}') import httpx, pydantic, structlog print('✅ Core Python dependencies available') @@ -127,16 +119,15 @@ jobs: print('✅ Container health check passed') " - - name: Test script dependencies for ${{ matrix.script.name }} + - name: Test agent dependencies for ${{ matrix.agent.name }} run: | # Test that the specific extras are properly installed - docker run --rm fai-script-${{ matrix.script.extras }} \ + docker run --rm fai-agent-${{ matrix.agent.extras }} \ python -c " import sys - - extras = '${{ matrix.script.extras }}' + extras = '${{ matrix.agent.extras }}' print(f'Testing dependencies for extras: {extras}') - + if 'pdf' in extras: try: import PIL @@ -146,7 +137,7 @@ jobs: except ImportError as e: print(f'❌ PDF dependency missing: {e}') sys.exit(1) - + if 'rag' in extras: try: import qdrant_client @@ -155,7 +146,7 @@ jobs: except ImportError as e: print(f'❌ RAG dependency missing: {e}') sys.exit(1) - + if 'a2a' in extras: try: import fastapi @@ -163,15 +154,13 @@ jobs: except ImportError as e: print(f'❌ A2A dependency missing: {e}') sys.exit(1) - + print('✅ All expected dependencies are available') " - - validate-build-args: runs-on: ubuntu-latest - + steps: - name: Checkout repository uses: actions/checkout@v6 @@ -182,21 +171,21 @@ jobs: - name: Test build without extras run: | docker build \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-base \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-base \ . - name: Test build with multiple extras run: | docker build \ --build-arg EXTRAS=pdf,rag \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-multi \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-multi \ . - name: Validate multi-extras build run: | - docker run --rm fai-script-multi \ + docker run --rm fai-agent-multi \ python -c " import PIL, fitz, pytesseract # PDF deps import qdrant_client, dulwich # RAG deps @@ -205,27 +194,27 @@ jobs: validate-documentation: runs-on: ubuntu-latest - + steps: - name: Checkout repository uses: actions/checkout@v6 - name: Check documentation exists run: | - test -f docs/docker_scripts_guide.md - echo "✅ Docker scripts guide exists" + test -f docs/docker_agents_guide.md + echo "✅ Docker agents guide exists" - name: Validate README updates run: | grep -q "parametric Dockerfile" README.md grep -q "EXTRAS" README.md - grep -q "docker_scripts_guide.md" README.md - echo "✅ README contains Docker scripts documentation" + echo "✅ README contains Docker agents documentation" - - name: Check scripts directory structure + - name: Check agents directory structure run: | - test -d scripts - test -f scripts/ingest_pdf.py - test -d scripts/data - test -f scripts/data/create_sample_invoice.py - echo "✅ Scripts directory structure is correct" + test -d agents + test -f agents/ingest_pdf.py + test -d agents/data + test -f agents/data/create_sample_invoice.py + + echo "✅ Agents directory structure is correct" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b5dda46f..6d088049 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,10 +1,9 @@ name: Lint on: - push: - branches: ["main"] pull_request: - branches: ["main"] + branches: + - main permissions: contents: read @@ -21,20 +20,12 @@ jobs: uses: astral-sh/setup-uv@v7 with: version: "latest" - - # Install dependencies - name: Install dependencies run: uv sync --frozen --all-extras - - # Format code with ruff - name: Format with ruff run: uv run ruff format --diff - - # Lint code with ruff - name: Lint with ruff run: uv run ruff check --diff continue-on-error: false - - # Type check with pyright - name: Type check with pyright run: uv run pyright diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5032354d..36de18c6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,10 +1,9 @@ name: Test on: - push: - branches: ["main"] pull_request: - branches: ["main"] + branches: + - main permissions: contents: read @@ -41,15 +40,10 @@ jobs: uses: astral-sh/setup-uv@v7 with: version: "latest" - - # Install dependencies + enable-cache: true - name: Install dependencies run: uv sync --frozen --all-extras - - # Run unit tests with pytest - name: Run unit tests run: uv run pytest tests/unit -v - - # Run integration tests with pytest - name: Run integration tests run: uv run pytest tests/integration -v diff --git a/Makefile b/Makefile index 3377b6c4..c8bb8ec0 100644 --- a/Makefile +++ b/Makefile @@ -1,18 +1,18 @@ -# Flare AI Kit - Docker Scripts Makefile -# Provides convenient targets for building and running Docker scripts +# Flare AI Kit - Docker Agents Makefile +# Provides convenient targets for building and running Docker agents .PHONY: help build-pdf run-pdf build-rag run-rag build-a2a run-a2a build-multi run-multi clean-images list-images # Default target help: ## Show this help message - @echo "Flare AI Kit - Docker Scripts" + @echo "Flare AI Kit - Docker Agents" @echo "=============================" @echo "" @echo "Available targets:" @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf " %-20s %s\n", $$1, $$2}' $(MAKEFILE_LIST) @echo "" @echo "Environment variables:" - @echo " DATA_DIR - Local directory to mount as /app/scripts/data (default: ./scripts/data)" + @echo " DATA_DIR - Local directory to mount as /app/agents/data (default: ./agents/data)" @echo " ENV_FILE - Environment file to use (default: .env)" @echo " DOCKER_OPTS - Additional docker run options" @echo "" @@ -22,7 +22,7 @@ help: ## Show this help message @echo " make run-pdf DOCKER_OPTS='--rm -it'" # Configuration -DATA_DIR ?= ./scripts/data +DATA_DIR ?= ./agents/data ENV_FILE ?= .env DOCKER_OPTS ?= --rm -it @@ -31,13 +31,13 @@ build-pdf: ## Build Docker image for PDF processing @echo "Building PDF processing image..." docker build \ --build-arg EXTRAS=pdf \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-pdf \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-pdf \ . - @echo "✅ PDF image built: fai-script-pdf" + @echo "✅ PDF image built: fai-agent-pdf" -run-pdf: build-pdf ## Build and run PDF processing script - @echo "Running PDF processing script..." +run-pdf: build-pdf ## Build and run PDF processing agent + @echo "Running PDF processing agent..." @if [ ! -f "$(ENV_FILE)" ]; then \ echo "⚠️ Environment file $(ENV_FILE) not found. Creating example..."; \ echo "AGENT__GEMINI_API_KEY=your_gemini_api_key_here" > $(ENV_FILE).example; \ @@ -48,21 +48,21 @@ run-pdf: build-pdf ## Build and run PDF processing script @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ - fai-script-pdf + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ + fai-agent-pdf # RAG Processing build-rag: ## Build Docker image for RAG processing @echo "Building RAG processing image..." docker build \ --build-arg EXTRAS=rag \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-rag \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-rag \ . - @echo "✅ RAG image built: fai-script-rag" + @echo "✅ RAG image built: fai-agent-rag" -run-rag: build-rag ## Build and run RAG processing script - @echo "Running RAG processing script..." +run-rag: build-rag ## Build and run RAG processing agent + @echo "Running RAG processing agent..." @if [ ! -f "$(ENV_FILE)" ]; then \ echo "⚠️ Environment file $(ENV_FILE) not found"; \ exit 1; \ @@ -70,21 +70,21 @@ run-rag: build-rag ## Build and run RAG processing script @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ - fai-script-rag + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ + fai-agent-rag # A2A Processing build-a2a: ## Build Docker image for A2A processing @echo "Building A2A processing image..." docker build \ --build-arg EXTRAS=a2a \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-a2a \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-a2a \ . - @echo "✅ A2A image built: fai-script-a2a" + @echo "✅ A2A image built: fai-agent-a2a" -run-a2a: build-a2a ## Build and run A2A processing script - @echo "Running A2A processing script..." +run-a2a: build-a2a ## Build and run A2A processing agent + @echo "Running A2A processing agent..." @if [ ! -f "$(ENV_FILE)" ]; then \ echo "⚠️ Environment file $(ENV_FILE) not found"; \ exit 1; \ @@ -92,21 +92,21 @@ run-a2a: build-a2a ## Build and run A2A processing script @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ - fai-script-a2a + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ + fai-agent-a2a # Multi-functionality build build-multi: ## Build Docker image with multiple extras (pdf,rag,a2a) @echo "Building multi-functionality image..." docker build \ --build-arg EXTRAS=pdf,rag,a2a \ - --build-arg SCRIPT=ingest_pdf.py \ - --tag fai-script-multi \ + --build-arg AGENT=ingest_pdf.py \ + --tag fai-agent-multi \ . - @echo "✅ Multi image built: fai-script-multi" + @echo "✅ Multi image built: fai-agent-multi" -run-multi: build-multi ## Build and run multi-functionality script - @echo "Running multi-functionality script..." +run-multi: build-multi ## Build and run multi-functionality agent + @echo "Running multi-functionality agent..." @if [ ! -f "$(ENV_FILE)" ]; then \ echo "⚠️ Environment file $(ENV_FILE) not found"; \ exit 1; \ @@ -114,31 +114,31 @@ run-multi: build-multi ## Build and run multi-functionality script @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ - fai-script-multi + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ + fai-agent-multi # Custom builds -build-custom: ## Build custom image (use EXTRAS and SCRIPT env vars) +build-custom: ## Build custom image (use EXTRAS and AGENT env vars) @if [ -z "$(EXTRAS)" ]; then \ echo "❌ EXTRAS environment variable is required"; \ - echo "Usage: make build-custom EXTRAS=pdf,rag SCRIPT=my_script.py"; \ + echo "Usage: make build-custom EXTRAS=pdf,rag AGENT=my_agent.py"; \ exit 1; \ fi - @if [ -z "$(SCRIPT)" ]; then \ - echo "❌ SCRIPT environment variable is required"; \ - echo "Usage: make build-custom EXTRAS=pdf,rag SCRIPT=my_script.py"; \ + @if [ -z "$(AGENT)" ]; then \ + echo "❌ AGENT environment variable is required"; \ + echo "Usage: make build-custom EXTRAS=pdf,rag AGENT=my_agent.py"; \ exit 1; \ fi - @echo "Building custom image with EXTRAS=$(EXTRAS) SCRIPT=$(SCRIPT)..." + @echo "Building custom image with EXTRAS=$(EXTRAS) AGENT=$(AGENT)..." docker build \ --build-arg EXTRAS=$(EXTRAS) \ - --build-arg SCRIPT=$(SCRIPT) \ - --tag fai-script-custom \ + --build-arg AGENT=$(AGENT) \ + --tag fai-agent-custom \ . - @echo "✅ Custom image built: fai-script-custom" + @echo "✅ Custom image built: fai-agent-custom" -run-custom: build-custom ## Build and run custom script (use EXTRAS and SCRIPT env vars) - @echo "Running custom script..." +run-custom: build-custom ## Build and run custom agent (use EXTRAS and AGENT env vars) + @echo "Running custom agent..." @if [ ! -f "$(ENV_FILE)" ]; then \ echo "⚠️ Environment file $(ENV_FILE) not found"; \ exit 1; \ @@ -146,8 +146,8 @@ run-custom: build-custom ## Build and run custom script (use EXTRAS and SCRIPT e @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ - fai-script-custom + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ + fai-agent-custom # Development helpers dev-shell: build-pdf ## Start interactive shell in PDF container for development @@ -155,21 +155,21 @@ dev-shell: build-pdf ## Start interactive shell in PDF container for development @mkdir -p $(DATA_DIR) docker run $(DOCKER_OPTS) \ --env-file $(ENV_FILE) \ - -v "$(shell pwd)/scripts:/app/scripts" \ + -v "$(shell pwd)/agents:/app/agents" \ -v "$(shell pwd)/src:/app/src" \ - -v "$(shell pwd)/$(DATA_DIR):/app/scripts/data" \ + -v "$(shell pwd)/$(DATA_DIR):/app/agents/data" \ --entrypoint /bin/bash \ - fai-script-pdf + fai-agent-pdf # Utility targets -list-images: ## List all fai-script Docker images - @echo "Flare AI Kit script images:" - @docker images --filter "reference=fai-script-*" --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}" +list-images: ## List all fai-agent Docker images + @echo "Flare AI Kit agent images:" + @docker images --filter "reference=fai-agent-*" --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedAt}}" -clean-images: ## Remove all fai-script Docker images - @echo "Removing all fai-script images..." - @docker images --filter "reference=fai-script-*" -q | xargs -r docker rmi -f - @echo "✅ Cleaned up fai-script images" +clean-images: ## Remove all fai-agent Docker images + @echo "Removing all fai-agent images..." + @docker images --filter "reference=fai-agent-*" -q | xargs -r docker rmi -f + @echo "✅ Cleaned up fai-agent images" # Test targets test-build: ## Test building all main image variants @@ -205,4 +205,4 @@ setup-env: ## Create example environment file fi # Quick start -quick-start: setup-env run-pdf ## Quick start: setup environment and run PDF script +quick-start: setup-env run-pdf ## Quick start: setup environment and run PDF agent diff --git a/README.md b/README.md index b7ce6adf..ec98bc19 100644 --- a/README.md +++ b/README.md @@ -114,20 +114,20 @@ docker build -t flare-ai-kit . docker run --rm --env-file .env flare-ai-kit ``` -### Running Individual Scripts +### Running Individual Agents -The repository includes a parametric Dockerfile for running specific scripts with only the dependencies they need: +The repository includes a parametric Dockerfile for running specific agents with only the dependencies they need: ```bash -# Build and run PDF ingestion script -docker build -t fai-script-pdf \ +# Build and run PDF agent +docker build -t fai-agent-pdf \ --build-arg EXTRAS=pdf \ - --build-arg SCRIPT=ingest_pdf.py . + --build-arg AGENT=ingest_pdf.py . docker run --rm -it \ - -v "$PWD/scripts/data:/app/scripts/data" \ + -v "$PWD/agents/data:/app/agents/data" \ --env-file .env \ - fai-script-pdf + fai-agent-pdf ``` Available `EXTRAS`: `pdf`, `rag`, `a2a`, `ftso`, `da`, `fassets`, `social`, `tee`, `wallet`, `ingestion` @@ -140,11 +140,11 @@ See [Docker Scripts Guide](docs/docker_scripts_guide.md) for detailed usage inst 1. **Configure GCP:** Set all `GCP__*` variables in your `.env` file. -2. **Deploy:** +2. **Build, push to Artifacts Registry and Deploy:** ```bash - chmod +x gcloud-deploy.sh - ./gcloud-deploy.sh # verbose: ./gcloud-deploy.sh -v + chmod +x deploy-tee.sh + ./deploy-tee.sh ``` ## 🤝 Contributing diff --git a/scripts/__init__.py b/agents/__init__.py similarity index 100% rename from scripts/__init__.py rename to agents/__init__.py diff --git a/scripts/data/__init__.py b/agents/data/__init__.py similarity index 100% rename from scripts/data/__init__.py rename to agents/data/__init__.py diff --git a/scripts/data/create_sample_invoice.py b/agents/data/create_sample_invoice.py similarity index 100% rename from scripts/data/create_sample_invoice.py rename to agents/data/create_sample_invoice.py diff --git a/agents/ingest_pdf.py b/agents/ingest_pdf.py new file mode 100644 index 00000000..1cf747d9 --- /dev/null +++ b/agents/ingest_pdf.py @@ -0,0 +1,265 @@ +""" +PDF ingestion agent. +""" + +from __future__ import annotations + +import json +import os +import re +from typing import TYPE_CHECKING, Any +from unittest.mock import AsyncMock, mock_open, patch + +import structlog +import uvicorn + +# Import from local data directory +from data.create_sample_invoice import create_invoice_and_build_template +from fastapi import FastAPI, HTTPException, Request +from google.adk.agents import Agent +from google.adk.runners import Runner +from google.adk.sessions import InMemorySessionService +from google.genai import types +from pydantic import BaseModel + +from flare_ai_kit import FlareAIKit +from flare_ai_kit.agent.pdf_tools import read_pdf_text_tool +from flare_ai_kit.config import AppSettings +from flare_ai_kit.ingestion.settings import ( + IngestionSettings, + OnchainContractSettings, + PDFIngestionSettings, + PDFTemplateSettings, +) + +if TYPE_CHECKING: + from pathlib import Path + +logger = structlog.get_logger(__name__) + +# --- TEE Configuration --- +EXTENSION_PORT = 8889 +MOCK_TX_HASH = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + +SYSTEM_INSTRUCTION = ( + "You are a PDF extraction agent. Independently read PDFs using tools and return ONLY JSON " + "matching the user provided template schema. \n" + "- Always call read_pdf_text first.\n" + "- If a field is not found, set value to null.\n" + "- Reply with a single JSON object only (no markdown, no prose)." +) + + +# --- Global State --- +class AppState: + def __init__(self) -> None: + self.status = "initialized" + self.last_result: dict[str, Any] | None = None + self.transaction_hash: str | None = None + + +app_state = AppState() +app = FastAPI() + + +# --- Helper Functions --- + + +def build_prompt( + pdf: Path, template: PDFTemplateSettings, max_pages: int | None +) -> str: + """Constructs the analysis prompt.""" + return ( + f"PDF_PATH: {pdf}\nMAX_PAGES: {max_pages or 'ALL'}\n" + f"TEMPLATE_SCHEMA:\n```json\n{json.dumps(template.model_dump())}\n```\n" + "Extract fields based on the schema above from the PDF." + ) + + +def extract_json(text: str) -> dict[str, Any]: + """Robustly extracts JSON from raw LLM response text.""" + try: + return json.loads(text) + except json.JSONDecodeError as err: + if match := re.search( + r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE + ): + return json.loads(match.group(1)) + if match := re.search(r"\{.*\}", text, re.DOTALL): + return json.loads(match.group(0)) + msg = f"Could not extract valid JSON from response: {text[:100]}..." + raise ValueError(msg) from err + + +async def run_extraction_agent( + agent: Agent, + pdf: Path, + template: PDFTemplateSettings, +) -> dict[str, Any]: + """Setup in-memory ADK agent, give it the PDF, template and prompt.""" + svc = InMemorySessionService() + await svc.create_session(app_name="agents", user_id="user", session_id="session") + runner = Runner(agent=agent, app_name="agents", session_service=svc) + + prompt = build_prompt(pdf, template, max_pages=1) + message = types.Content(role="user", parts=[types.Part(text=prompt)]) + + logger.info("calling agent", model=agent.model) + + final_text = "" + async for event in runner.run_async( + user_id="user", session_id="session", new_message=message + ): + if event.is_final_response() and event.content and event.content.parts: + final_text = event.content.parts[0].text + break + + if not final_text: + msg = "Agent produced no response." + raise RuntimeError(msg) + + return extract_json(final_text) + + +async def process_pdf_workflow() -> dict[str, Any]: + """ + Refactored logic from the original main() function. + This runs the actual business logic when triggered by the TEE. + """ + logger.info("Starting PDF workflow...") + app_state.status = "processing" + + try: + # Create PDF and save it (In a real TEE, this might come from the input payload) + pdf_path, template = create_invoice_and_build_template("generated_invoice") + logger.info("loaded pdf", path=pdf_path) + + # Add template to global settings + app_settings = AppSettings( + log_level="INFO", + ingestion=IngestionSettings( + pdf_ingestion=PDFIngestionSettings( + templates=[template], + use_ocr=False, + contract_settings=OnchainContractSettings( + contract_address="0x0000000000000000000000000000000000000000", + abi_name="OnchainDataRegistry", + function_name="registerDocument", + ), + ) + ), + ) + + # Inject Gemini API Key + if app_settings.agent and app_settings.agent.gemini_api_key: + api_key = app_settings.agent.gemini_api_key.get_secret_value() + os.environ["GOOGLE_API_KEY"] = api_key + + # Create ADK agent with tool access + agent = Agent( + name="flare_pdf_agent", + model=app_settings.agent.gemini_model, + tools=[read_pdf_text_tool], + instruction=SYSTEM_INSTRUCTION, + generate_content_config=types.GenerateContentConfig( + temperature=0.0, top_k=1, top_p=0.3, candidate_count=1 + ), + ) + + kit = FlareAIKit(config=app_settings) + + # Deterministic parsing + parsed = kit.pdf_processor.process_pdf( + file_path=str(pdf_path), template_name=template.template_name + ) + logger.info("deterministic parsed", parsed=parsed) + + # Agent parsing + result = await run_extraction_agent(agent, pdf_path, template) + logger.info("agent parsed", result=result) + + # Update state with result + app_state.last_result = result + + # Mock onchain contract posting + with ( + patch( + "flare_ai_kit.onchain.contract_poster.ContractPoster.post_data", + new_callable=AsyncMock, + return_value=MOCK_TX_HASH, + ) as mock_post, + patch( + "flare_ai_kit.onchain.contract_poster.open", mock_open(read_data="[]") + ), + ): + kit = FlareAIKit(config=app_settings) + tx_hash = await kit.pdf_processor.contract_poster.post_data(parsed) + logger.info( + "posted onchain", tx_hash=tx_hash, args=mock_post.call_args[0][0] + ) + + app_state.transaction_hash = tx_hash + app_state.status = "completed" + + return {"status": "success", "result": result, "tx_hash": tx_hash} + + except Exception as e: + logger.exception("Error in PDF workflow", error=str(e)) + app_state.status = "error" + raise + + +# --- Interface Endpoints --- + + +class ActionRequest(BaseModel): + action: str + payload: dict[str, Any] | None = None + + +@app.post("/action") +async def handle_action(request: Request) -> dict[str, Any]: + """ + Endpoint called to trigger operations. + """ + try: + # We accept generic JSON here to be flexible with TEE payloads + body = await request.json() + logger.info("Received action", body=body) + + # Trigger the workflow + # In a real scenario, we might inspect body['action'] to decide what to do. + result = await process_pdf_workflow() + except Exception as e: + logger.exception("Action failed") + raise HTTPException(status_code=500, detail={"description": str(e)}) from e + else: + return {"result": result} + + +@app.get("/state") +async def get_state( + stateVersion: str | None = None, # noqa: ARG001,N803 + state: str | None = None, # noqa: ARG001 +) -> dict[str, Any]: + """ + Endpoint called for the current state. + Returns ABI encoded state (here simulated as hex encoded JSON). + """ + current_state = { + "status": app_state.status, + "tx_hash": app_state.transaction_hash, + "last_data": app_state.last_result, + } + + # Convert state to a hex string to mimic ABI encoding requirement generically + state_json = json.dumps(current_state) + state_hex = "0x" + state_json.encode("utf-8").hex() + + return {"result": state_hex} + + +# --- Entrypoint --- + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=EXTENSION_PORT) # noqa: S104 diff --git a/deploy-tee.sh b/deploy-tee.sh new file mode 100755 index 00000000..04c07c08 --- /dev/null +++ b/deploy-tee.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# ============================================================================== +# TEE BUILD & DEPLOY SCRIPT +# 1. Builds the image using Docker +# 2. Pushes the built image to Artifacts Registry +# 3. Sets up a new GCP Confidential Space instance from the image +# ============================================================================== + +set -e # Exit immediately if any command fails + +# --- 1. Helper Functions --- +log_info() { echo -e "\033[1;34m[INFO]\033[0m $1"; } +log_warn() { echo -e "\033[1;33m[WARN]\033[0m $1"; } +log_succ() { echo -e "\033[1;32m[SUCCESS]\033[0m $1"; } +log_err() { echo -e "\033[1;31m[ERROR]\033[0m $1"; } + +usage() { + echo "Usage: $0 [options]" + echo "Options:" + echo " --agent Python agent to ingest (default: ingest_pdf.py)" + echo " --extras Suffix for image tag (default: pdf)" + echo " --skip-build Skip Docker build/push, only deploy infrastructure" + echo " --skip-deploy Skip infrastructure deploy, only build Docker" + echo " --force-yes Skip all confirmation prompts" + echo " --help Show this message" + exit 1 +} + +# --- 2. Defaults & Argument Parsing --- +AGENT="ingest_pdf.py" +EXTRAS="pdf" +DO_BUILD=true +DO_DEPLOY=true +FORCE_YES=false + +while [[ "$#" -gt 0 ]]; do + case $1 in + --agent) AGENT="$2"; shift ;; + --extras) EXTRAS="$2"; shift ;; + --skip-build) DO_BUILD=false ;; + --skip-deploy) DO_DEPLOY=false ;; + --force-yes) FORCE_YES=true ;; + --help) usage ;; + *) echo "Unknown parameter: $1"; usage ;; + esac + shift +done + +# --- 3. Load Environment --- +if [ -f .env ]; then + log_info "Sourcing .env file..." + set -a; source .env; set +a +else + log_err ".env file not found! Exiting." + exit 1 +fi + +# Ensure mandatory variable exists initially +: "${GCP__TEE_IMAGE_REFERENCE:?Please set GCP__TEE_IMAGE_REFERENCE in .env}" + +# --- 4. Dynamic Image Tag Calculation --- +# Get the repository base +# e.g., us-central1.../flare-ai-kit/fai-agent-pdf:latest -> us-central1.../flare-ai-kit +REPO_BASE=$(echo "$GCP__TEE_IMAGE_REFERENCE" | sed 's|\(.*\)/.*|\1|') + +# Construct the specific target image +TARGET_IMAGE="${REPO_BASE}/fai-agent-${EXTRAS}:latest" + +log_info "Configuration:" +echo " Agent: $AGENT" +echo " Extras: $EXTRAS" +echo " Target Image: $TARGET_IMAGE" + +# Overwrite the environment variable in memory so the Deploy step uses the correct image +GCP__TEE_IMAGE_REFERENCE="$TARGET_IMAGE" + + +# ============================================================================== +# PHASE 1: DOCKER BUILD & PUSH +# ============================================================================== +if [ "$DO_BUILD" = true ]; then + log_info "Starting Build Phase..." + + # Extract keys for ALLOWED_ENVS + KEYS=$(grep -E '^[A-Z]' .env | cut -d= -f1 | paste -sd, -) + + echo " Building Docker image..." + docker build -t "$TARGET_IMAGE" \ + --platform linux/amd64 \ + --build-arg AGENT="$AGENT" \ + --build-arg EXTRAS="$EXTRAS" \ + --build-arg ALLOWED_ENVS="LOG_LEVEL,${KEYS}" . + + echo " Pushing to Artifact Registry..." + docker push "$TARGET_IMAGE" + log_succ "Build and Push Complete." +else + log_info "Skipping Build Phase." +fi + + +# ============================================================================== +# PHASE 2: INFRASTRUCTURE DEPLOYMENT +# ============================================================================== +if [ "$DO_DEPLOY" = true ]; then + log_info "Starting Deployment Phase..." + + # Validate Deployment Variables + : "${GCP__INSTANCE_NAME:?Set GCP__INSTANCE_NAME}" + : "${GCP__PROJECT:?Set GCP__PROJECT}" + : "${GCP__ZONE:?Set GCP__ZONE}" + : "${GCP__MACHINE_TYPE:?Set GCP__MACHINE_TYPE}" + : "${GCP__SERVICE_ACCOUNT:?Set GCP__SERVICE_ACCOUNT}" + : "${GCP__CONFIDENTIAL_IMAGE:?Set GCP__CONFIDENTIAL_IMAGE}" + : "${GCP__CONFIDENTIAL_COMPUTE_TYPE:?Set GCP__CONFIDENTIAL_COMPUTE_TYPE}" + + # Check for Existing Instance + if gcloud compute instances describe "$GCP__INSTANCE_NAME" \ + --project="$GCP__PROJECT" --zone="$GCP__ZONE" --format="json" >/dev/null 2>&1; then + + log_warn "Instance '$GCP__INSTANCE_NAME' already exists." + + if [ "$FORCE_YES" = true ]; then + REPLY="y" + else + read -p " Do you want to delete and redeploy? (y/N) " -n 1 -r + echo + fi + + if [[ $REPLY =~ ^[Yy]$ ]]; then + log_info "Deleting instance..." + gcloud compute instances delete "$GCP__INSTANCE_NAME" \ + --project="$GCP__PROJECT" --zone="$GCP__ZONE" --quiet + else + log_err "Deployment cancelled by user." + exit 1 + fi + fi + + # Prepare TEE Metadata Variables + PREFIX_PATTERN="^(AGENT__|ECOSYSTEM__|VECTOR_DB__|GRAPH_DB__|SOCIAL__|TEE__|INGESTION__|LOG_LEVEL|APP_ENV)" + VAR_NAMES=$(printenv | grep -E "$PREFIX_PATTERN" | cut -d'=' -f1) + METADATA_VARS="" + + if [ -n "$VAR_NAMES" ]; then + for VAR_NAME in $VAR_NAMES; do + VAR_VALUE="${!VAR_NAME}" + # Pass both standard and Flare-specific env prefixes + METADATA_VARS="${METADATA_VARS},tee-env-${VAR_NAME}=${VAR_VALUE},stee-env-${VAR_NAME}=${VAR_VALUE}" + done + fi + + # Construct GCloud Command + COMMAND=( + gcloud compute instances create "$GCP__INSTANCE_NAME" + --project="$GCP__PROJECT" + --zone="$GCP__ZONE" + --machine-type="$GCP__MACHINE_TYPE" + --network-interface=network-tier=PREMIUM,nic-type=GVNIC,stack-type=IPV4_ONLY,subnet=default + --metadata="tee-image-reference=$GCP__TEE_IMAGE_REFERENCE,stee-image-reference=$GCP__TEE_IMAGE_REFERENCE,stee-container-log-redirect=$GCP__TEE_CONTAINER_LOG_REDIRECT,tee-container-log-redirect=$GCP__TEE_CONTAINER_LOG_REDIRECT${METADATA_VARS}" + --maintenance-policy=TERMINATE + --provisioning-model=STANDARD + --service-account="$GCP__SERVICE_ACCOUNT" + --scopes="$GCP__SCOPES" + --tags="$GCP__TAGS" + --create-disk=auto-delete=yes,boot=yes,device-name="$GCP__INSTANCE_NAME",image=projects/confidential-space-images/global/images/"$GCP__CONFIDENTIAL_IMAGE",mode=rw,size=20,type=pd-balanced + --shielded-secure-boot + --shielded-vtpm + --shielded-integrity-monitoring + --reservation-affinity=any + --confidential-compute-type="$GCP__CONFIDENTIAL_COMPUTE_TYPE" + ) + + log_info "Deploying instance using image: $GCP__TEE_IMAGE_REFERENCE" + + if [ "$FORCE_YES" != true ]; then + read -p " Ready to deploy. Continue? (y/N) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log_err "Cancelled." + exit 1 + fi + fi + + "${COMMAND[@]}" + log_succ "Instance '$GCP__INSTANCE_NAME' deployed successfully." +else + log_info "Skipping Deployment Phase." +fi \ No newline at end of file diff --git a/docs/docker_scripts_guide.md b/docs/docker_agents_guide.md similarity index 76% rename from docs/docker_scripts_guide.md rename to docs/docker_agents_guide.md index 6050d311..e3cbbd22 100644 --- a/docs/docker_scripts_guide.md +++ b/docs/docker_agents_guide.md @@ -1,12 +1,12 @@ -# Docker Scripts Guide +# Docker Agents Guide -This guide explains how to use the parametric Dockerfile to run scripts from the `scripts/` directory with specific dependency groups. +This guide explains how to use the parametric Dockerfile to run agents from the `agents/` directory with specific dependency groups. ## Overview The Dockerfile at the repository root is designed to be parametric, allowing you to: - Install only the dependencies needed for specific functionality (via `EXTRAS`) -- Run any script from the `scripts/` directory (via `SCRIPT`) +- Run any agent from the `agents/` directory (via `AGENT`) - Keep images minimal and reproducible using `uv.lock` ## Build Arguments @@ -24,23 +24,23 @@ Specifies which optional dependency groups to install. Available options: - `wallet` - Wallet functionality (eth-account, cryptography) - `ingestion` - General ingestion capabilities -### `SCRIPT` -Specifies which script to run from the `scripts/` directory. Default: `ingest_pdf.py` +### `AGENT` +Specifies which agents to run from the `agents/` directory. Default: `ingest_pdf.py` ## Basic Usage -### PDF Ingestion Script +### PDF Ingestion Agent ```bash # Build the image for PDF processing -docker build -t fai-script-pdf \ +docker build -t fai-agent-pdf \ --build-arg EXTRAS=pdf \ - --build-arg SCRIPT=ingest_pdf.py . + --build-arg AGENT=ingest_pdf.py . -# Run the script +# Run the agent docker run --rm -it \ - -v "$PWD/scripts/data:/app/scripts/data" \ - fai-script-pdf + -v "$PWD/agents/data:/app/agents/data" \ + fai-agent-pdf ``` ### With Environment Variables @@ -51,8 +51,8 @@ docker run --rm -it \ -e AGENT__GEMINI_API_KEY="your_gemini_api_key" \ -e ECOSYSTEM__WEB3_PROVIDER_URL="https://flare-api.flare.network/ext/C/rpc" \ -e LOG_LEVEL="INFO" \ - -v "$PWD/scripts/data:/app/scripts/data" \ - fai-script-pdf + -v "$PWD/agents/data:/app/agents/data" \ + fai-agent-pdf ``` ### Using Environment File @@ -68,8 +68,8 @@ EOF # Run with environment file docker run --rm -it \ --env-file .env.docker \ - -v "$PWD/scripts/data:/app/scripts/data" \ - fai-script-pdf + -v "$PWD/agents/data:/app/agents/data" \ + fai-agent-pdf ``` ## Advanced Usage @@ -78,42 +78,42 @@ docker run --rm -it \ ```bash # Build with multiple dependency groups -docker build -t fai-script-multi \ +docker build -t fai-agent-multi \ --build-arg EXTRAS="pdf,rag,a2a" \ - --build-arg SCRIPT=ingest_pdf.py . + --build-arg AGENT=ingest_pdf.py . ``` -### Custom Script +### Custom agent ```bash -# Build for a custom script (once you create more scripts) -docker build -t fai-script-custom \ +# Build for a custom agent (once you create more agents) +docker build -t fai-agent-custom \ --build-arg EXTRAS=rag \ - --build-arg SCRIPT=my_custom_script.py . + --build-arg AGENT=my_custom_agent.py . ``` ### Development Mode with Volume Mounts ```bash -# Mount the entire scripts directory for development +# Mount the entire agents directory for development docker run --rm -it \ - -v "$PWD/scripts:/app/scripts" \ + -v "$PWD/agents:/app/agents" \ -v "$PWD/src:/app/src" \ --env-file .env.docker \ - fai-script-pdf + fai-agent-pdf ``` ## Data Mounting ### PDF Data Directory -The PDF ingestion script expects data in `/app/scripts/data/`. Mount your local data: +The PDF ingestion agent expects data in `/app/agents/data/`. Mount your local data: ```bash # Mount local data directory docker run --rm -it \ - -v "$PWD/my-pdfs:/app/scripts/data" \ - fai-script-pdf + -v "$PWD/my-pdfs:/app/agents/data" \ + fai-agent-pdf ``` ### Persistent Output @@ -121,9 +121,9 @@ docker run --rm -it \ ```bash # Mount output directory for persistent results docker run --rm -it \ - -v "$PWD/scripts/data:/app/scripts/data" \ + -v "$PWD/agents/data:/app/agents/data" \ -v "$PWD/output:/app/output" \ - fai-script-pdf + fai-agent-pdf ``` ## Environment Variables @@ -167,9 +167,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ ### Script Not Found Error ``` -Error: Script /app/scripts/my_script.py not found +Error: Script /app/agents/my_agent.py not found ``` -Ensure your script exists in the `scripts/` directory and the filename matches the `SCRIPT` build arg. +Ensure your agent exists in the `agents/` directory and the filename matches the `AGENT` build arg. ### Missing Dependencies ``` @@ -199,8 +199,8 @@ This shouldn't happen with the provided Dockerfile, but if it does, ensure Tesse ## Examples Repository -See the `scripts/` directory for example scripts: +See the `agents/` directory for example agents: - `ingest_pdf.py` - PDF ingestion and processing -- More scripts will be added as the project grows +- More agents will be added as the project grows -Each script should be self-contained and follow the same pattern for consistency. +Each agent should be self-contained and follow the same pattern for consistency. diff --git a/gcloud-deploy.sh b/gcloud-deploy.sh deleted file mode 100755 index 3de3f3b4..00000000 --- a/gcloud-deploy.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/bash - -# Exit immediately on non-zero status. -set -e - -# --- Argument Parsing --- -PRINT_COMMAND=false -if [[ "$1" == "-v" || "$1" == "--verbose" ]]; then - PRINT_COMMAND=true -fi - -# --- Source Environment Variables --- -if [ -f .env ]; then - echo "--> Sourcing variables from .env..." - set -a # Automatically export all variables defined in the sourced file - source .env - set +a # Stop automatically exporting -else - echo "--> INFO: .env file not found. Relying on variables already in the environment." -fi - -# --- Variables --- -: "${GCP__INSTANCE_NAME:?Please set GCP__INSTANCE_NAME}" -: "${GCP__PROJECT:?Please set GCP__PROJECT}" -: "${GCP__ZONE:?Please set GCP__ZONE}" -: "${GCP__MACHINE_TYPE:?Please set GCP__MACHINE_TYPE}" -: "${GCP__TEE_IMAGE_REFERENCE:?Please set GCP__TEE_IMAGE_REFERENCE}" -: "${GCP__SERVICE_ACCOUNT:?Please set GCP__SERVICE_ACCOUNT}" -: "${GCP__CONFIDENTIAL_IMAGE:?Please set GCP__CONFIDENTIAL_IMAGE}" -: "${GCP__CONFIDENTIAL_COMPUTE_TYPE:?Please set GCP__CONFIDENTIAL_COMPUTE_TYPE}" -: "${GCP__SCOPES:?Please set GCP__SCOPES}" -: "${GCP__TAGS:?Please set GCP__TAGS}" -: "${GCP__TEE_CONTAINER_LOG_REDIRECT:?Please set GCP__TEE_CONTAINER_LOG_REDIRECT}" - -echo "--> Creating instance '$GCP__INSTANCE_NAME' in project '$GCP__PROJECT' with the following settings:" -echo " - Zone: $GCP__ZONE" -echo " - Machine Type: $GCP__MACHINE_TYPE" -echo " - Service Account: $GCP__SERVICE_ACCOUNT" -echo " - Image: $GCP__CONFIDENTIAL_IMAGE" -echo " - TEE Image Reference: $GCP__TEE_IMAGE_REFERENCE" -echo " - Confidential Compute Type: $GCP__CONFIDENTIAL_COMPUTE_TYPE" -echo " - Scopes: $GCP__SCOPES" -echo " - Tags: $GCP__TAGS" -echo " - TEE Log Redirect: $GCP__TEE_CONTAINER_LOG_REDIRECT" - -# --- Build TEE Environment Metadata --- -echo "--> Preparing TEE environment metadata from .env variables..." -PREFIX_PATTERN="^(AGENT__|ECOSYSTEM__|VECTOR_DB__|GRAPH_DB__|SOCIAL__|TEE__|INGESTION__)" -VAR_NAMES=$(printenv | grep -E "$PREFIX_PATTERN" | cut -d'=' -f1) -METADATA_VARS="" -if [ -n "$VAR_NAMES" ]; then - echo " Found the following variables for TEE:" - for VAR_NAME in $VAR_NAMES; do - # Indirect expansion: Get the VALUE of the variable whose NAME is in VAR_NAME. - VAR_VALUE="${!VAR_NAME}" - METADATA_VARS="${METADATA_VARS},tee-env-${VAR_NAME}=${VAR_VALUE}" - - # Display the variable being passed, but hide secrets. - if [[ "$VAR_NAME" == *SECRET* || "$VAR_NAME" == *KEY* || "$VAR_NAME" == *TOKEN* ]]; then - echo " - ${VAR_NAME}=******" - else - echo " - ${VAR_NAME}=${VAR_VALUE}" - fi - done -fi - -# --- Build Command Array --- -COMMAND=( - gcloud compute instances create "$GCP__INSTANCE_NAME" - --project="$GCP__PROJECT" - --zone="$GCP__ZONE" - --machine-type="$GCP__MACHINE_TYPE" - --network-interface=network-tier=PREMIUM,nic-type=GVNIC,stack-type=IPV4_ONLY,subnet=default - --metadata="tee-image-reference=$GCP__TEE_IMAGE_REFERENCE,tee-container-log-redirect=$GCP__TEE_CONTAINER_LOG_REDIRECT${METADATA_VARS}" - --maintenance-policy=TERMINATE - --provisioning-model=STANDARD - --service-account="$GCP__SERVICE_ACCOUNT" - --scopes="$GCP__SCOPES" - --tags="$GCP__TAGS" - --create-disk=auto-delete=yes,boot=yes,device-name="$GCP__INSTANCE_NAME",image=projects/confidential-space-images/global/images/"$GCP__CONFIDENTIAL_IMAGE",mode=rw,size=11,type=pd-balanced - --shielded-secure-boot - --shielded-vtpm - --shielded-integrity-monitoring - --reservation-affinity=any - --confidential-compute-type="$GCP__CONFIDENTIAL_COMPUTE_TYPE" -) - -# --- Confirmation --- - -# Print the command in a readable multi-line format. -if [ "$PRINT_COMMAND" = true ]; then - echo - echo "The following command will be executed:" - echo "----------------------------------------" - printf "%s" "${COMMAND[0]}" - for (( i=1; i<${#COMMAND[@]}; i++ )); do - PART="${COMMAND[$i]}" - if [[ "$PART" == --* ]]; then - printf ' \\\n' - printf ' %s' "$PART" - else - printf ' %s' "$PART" - fi - done - printf '\n' - echo "----------------------------------------" -fi - -read -p "Do you want to continue? (y/N) " -n 1 -r -echo - -if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo "Deployment cancelled by user." - exit 1 -fi - -# --- Execute Command --- -echo "--> Proceeding with deployment..." -"${COMMAND[@]}" - -echo "--> ✨ Instance '$GCP__INSTANCE_NAME' created successfully." \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e37f7418..fb5cc4d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,7 +115,7 @@ ignore = ["D203", "D212", "COM812", "S105", "D401", "D104", "ANN401", "ISC003", "src/flare_ai_kit/agent/ecosystem_tools_wrapper.py" = ["PLC0415"] "tests/**/*.py" = ["S", "ARG", "PLR2004", "SLF001", "BLE001", "E501", "T201", "D", "ANN", "F821", "PLC"] "examples/**/*.py" = ["D", "T201", "BLE001", "INP001", "E501"] -"scripts/**/*.py" = ["D", "T201", "BLE001", "INP001", "E501"] +"agents/**/*.py" = ["D", "T201", "BLE001", "INP001", "E501"] [tool.pyright] pythonVersion = "3.12" diff --git a/scripts/ingest_pdf.py b/scripts/ingest_pdf.py deleted file mode 100644 index 1ffa2500..00000000 --- a/scripts/ingest_pdf.py +++ /dev/null @@ -1,172 +0,0 @@ -"""PDF ingestion script that extracts data from PDFs using AI agents.""" - -from __future__ import annotations - -import asyncio -import json -import os -import re -from typing import TYPE_CHECKING, Any -from unittest.mock import AsyncMock, mock_open, patch - -import structlog - -# Import from local data directory -from data.create_sample_invoice import create_invoice_and_build_template -from google.adk.agents import Agent -from google.adk.runners import Runner -from google.adk.sessions import InMemorySessionService -from google.genai import types - -from flare_ai_kit import FlareAIKit -from flare_ai_kit.agent.pdf_tools import read_pdf_text_tool -from flare_ai_kit.config import AppSettings -from flare_ai_kit.ingestion.settings import ( - IngestionSettings, - OnchainContractSettings, - PDFIngestionSettings, - PDFTemplateSettings, -) - -if TYPE_CHECKING: - from pathlib import Path - -logger = structlog.get_logger(__name__) - -MOCK_TX_HASH = "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" - -SYSTEM_INSTRUCTION = ( - "You are a PDF extraction agent. Independently read PDFs using tools and return ONLY JSON " - "matching the user provided template schema. \n" - "- Always call read_pdf_text first.\n" - "- If a field is not found, set value to null.\n" - "- Reply with a single JSON object only (no markdown, no prose)." -) - - -def build_prompt( - pdf: Path, template: PDFTemplateSettings, max_pages: int | None -) -> str: - """Constructs the analysis prompt.""" - return ( - f"PDF_PATH: {pdf}\nMAX_PAGES: {max_pages or 'ALL'}\n" - f"TEMPLATE_SCHEMA:\n```json\n{json.dumps(template.model_dump())}\n```\n" - "Extract fields based on the schema above from the PDF." - ) - - -def extract_json(text: str) -> dict[str, Any]: - """Robustly extracts JSON from raw LLM response text.""" - try: - return json.loads(text) - except json.JSONDecodeError as err: - # Try to find JSON block within markdown fences or raw braces - if match := re.search( - r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE - ): - return json.loads(match.group(1)) - if match := re.search(r"\{.*\}", text, re.DOTALL): - return json.loads(match.group(0)) - msg = f"Could not extract valid JSON from response: {text[:100]}..." - raise ValueError(msg) from err - - -async def run_extraction_agent( - agent: Agent, - pdf: Path, - template: PDFTemplateSettings, -) -> dict[str, Any]: - """Setup in-memory ADK agent, give it the PDF, template and prompt.""" - svc = InMemorySessionService() - await svc.create_session(app_name="agents", user_id="user", session_id="session") - runner = Runner(agent=agent, app_name="agents", session_service=svc) - - prompt = build_prompt(pdf, template, max_pages=1) - message = types.Content(role="user", parts=[types.Part(text=prompt)]) - - logger.info("calling agent", model=agent.model) - - final_text = "" - async for event in runner.run_async( - user_id="user", session_id="session", new_message=message - ): - if event.is_final_response() and event.content and event.content.parts: - final_text = event.content.parts[0].text - break - - if not final_text: - msg = "Agent produced no response." - raise RuntimeError(msg) - - return extract_json(final_text) - - -async def main() -> None: - """Main function to demonstrate PDF ingestion and processing.""" - # Create PDF and save it - pdf_path, template = create_invoice_and_build_template("generated_invoice") - logger.info("loaded pdf", path=pdf_path) - - # Add template to global settings - app_settings = AppSettings( - log_level="INFO", - ingestion=IngestionSettings( - pdf_ingestion=PDFIngestionSettings( - templates=[template], - use_ocr=False, - contract_settings=OnchainContractSettings( - contract_address="0x0000000000000000000000000000000000000000", - abi_name="OnchainDataRegistry", - function_name="registerDocument", - ), - ) - ), - ) - - # Secrets will remain hidden - logger.info("settings", config=app_settings.model_dump()) - - # Inject Gemini API Key - if app_settings.agent and app_settings.agent.gemini_api_key: - api_key = app_settings.agent.gemini_api_key.get_secret_value() - os.environ["GOOGLE_API_KEY"] = api_key - - # Create ADK agent with tool access. - agent = Agent( - name="flare_pdf_agent", - model=app_settings.agent.gemini_model, - tools=[read_pdf_text_tool], - instruction=SYSTEM_INSTRUCTION, - generate_content_config=types.GenerateContentConfig( - temperature=0.0, top_k=1, top_p=0.3, candidate_count=1 - ), - ) - - kit = FlareAIKit(config=app_settings) - - # Deterministic parsing - parsed = kit.pdf_processor.process_pdf( - file_path=str(pdf_path), template_name=template.template_name - ) - logger.info("deterministic parsed", parsed=parsed) - - # Agent parsing - result = await run_extraction_agent(agent, pdf_path, template) - logger.info("agent parsed", result=result) - - # Mock onchain contract posting - with ( - patch( - "flare_ai_kit.onchain.contract_poster.ContractPoster.post_data", - new_callable=AsyncMock, - return_value=MOCK_TX_HASH, - ) as mock_post, - patch("flare_ai_kit.onchain.contract_poster.open", mock_open(read_data="[]")), - ): - kit = FlareAIKit(config=app_settings) - tx_hash = await kit.pdf_processor.contract_poster.post_data(parsed) - logger.info("posted onchain", tx_hash=tx_hash, args=mock_post.call_args[0][0]) - - -if __name__ == "__main__": - asyncio.run(main())