Merge pull request #228 from enoch3712/225-add-pydanticai-support

enoch3712 · web-flow · commit fb431a337189 · 2025-01-28T19:14:43.000+04:00
add pydanticai support
diff --git a/docs/assets/dynamic_parsing.png b/docs/assets/dynamic_parsing.png
diff --git a/docs/core-concepts/llm-integration/dynamic-parsing.md b/docs/core-concepts/llm-integration/dynamic-parsing.md
@@ -0,0 +1,78 @@
+# Dynamic Parsing
+
+<div align="center">
+  <img src="../../../assets/dynamic_parsing.png" alt="Dynamic Parsing" width="90%">
+</div>
+
+Dynamic parsing enables flexible handling of structured outputs from LLM responses. This feature is particularly useful when reasoning models are used (e.g. DeepSeek R1).
+
+## Overview
+
+The dynamic parsing feature can be enabled using the `set_dynamic()` method on your LLM instance. When enabled, the LLM will:
+
+1. Attempt to parse and validate JSON responses
+2. Include structured thinking process in the output
+3. Handle complex response models dynamically
+
+## Usage
+
+### Here's how to enable dynamic parsing:
+
+```python
+from extract_thinker import LLM
+
+# Initialize LLM
+llm = LLM("ollama/deepseek-r1:1.5b")
+
+# Enable dynamic parsing
+llm.set_dynamic(True)
+```
+
+### Uses this template structure:
+```python
+Please provide your thinking process within <think> tags, followed by your JSON output.
+
+JSON structure:
+{your_structure}
+
+OUTPUT example:
+<think>
+Your step-by-step reasoning and analysis goes here...
+</think>
+
+##JSON OUTPUT
+{
+    ...
+}
+```
+
+## Example: Invoice Extraction
+
+Here's a complete example of using dynamic parsing for invoice extraction:
+
+```python
+from extract_thinker import LLM, Extractor
+from extract_thinker.document_loader import DocumentLoaderPyPdf
+from pydantic import BaseModel
+from typing import List, Optional
+
+# Define your invoice model
+class InvoiceData(BaseModel):
+    invoice_number: str
+    date: str
+    total_amount: float
+    vendor_name: str
+    line_items: List[dict]
+    payment_terms: Optional[str]
+
+# Initialize LLM with dynamic parsing
+llm = LLM("ollama/deepseek-r1:1.5b")
+llm.set_dynamic(True)  # Enable dynamic JSON parsing
+
+# Setup document loader and extractor
+document_loader = DocumentLoaderPyPdf()
+extractor = Extractor(document_loader=document_loader, llm=llm)
+
+# Extract information from invoice
+result = extractor.extract("path/to/invoice.pdf", response_model=InvoiceData)
+```
diff --git a/docs/core-concepts/llm-integration/index.md b/docs/core-concepts/llm-integration/index.md
@@ -1,30 +1,61 @@
 # LLM Integration
 
-!!! warning "🚧 In Development"
-    This component is currently under active development. The API might change in future releases.
-
 The LLM component in ExtractThinker acts as a bridge between your document processing pipeline and various Language Model providers. It handles request formatting, response parsing, and provider-specific optimizations.
 
 <div align="center">
   <img src="../../assets/llm_image.png" alt="LLM Architecture" width="50%">
 </div>
 
-The architecture consists of:
-
-- **Parser**: Uses [instructor](https://github.com/jxnl/instructor) for structured outputs with Pydantic
-
-- **LLM Broker**: Leverages [litellm](https://github.com/BerriAI/litellm) for unified model interface
-
 ??? example "Base LLM Implementation"
     ```python
     --8<-- "extract_thinker/llm.py"
     ```
 
-## Basic Usage
+The architecture supports two different stacks:
+
+**Default Stack**: Combines instructor and litellm
+
+- Uses [instructor](https://python.useinstructor.com/) for structured outputs with Pydantic
+- Leverages [litellm](https://docs.litellm.ai/docs/) for unified model interface
+
+**Pydantic AI Stack** <span class="beta-badge">🧪 In Beta</span>
+
+- All-in-one solution for Pydantic model integration
+- Handles both model interfacing and structured outputs
+- Built by the Pydantic team ([Learn more](https://ai.pydantic.dev/))
+
+## Backend Options
 
 ```python
 from extract_thinker import LLM
+from extract_thinker.llm_engine import LLMEngine
 
-# Initialize with specific model
+# Initialize with default stack (instructor + litellm)
 llm = LLM("gpt-4o")
-```
+
+# Or use Pydantic AI stack (Beta)
+llm = LLM("openai:gpt-4o", backend=LLMEngine.PYDANTIC_AI)
+```
+
+ExtractThinker supports two LLM stacks:
+
+### Default Stack (instructor + litellm)
+The default stack combines instructor for structured outputs and litellm for model interfacing. It leverages [LiteLLM's unified API](https://docs.litellm.ai/docs/#litellm-python-sdk) for consistent model access:
+
+```python
+llm = LLM("gpt-4o", backend=LLMEngine.DEFAULT)
+```
+
+### Pydantic AI Stack (Beta)
+An alternative all-in-one solution for model integration powered by [Pydantic AI](https://ai.pydantic.dev/):
+
+```python
+llm = LLM("openai:gpt-4o", backend=LLMEngine.PYDANTIC_AI)
+```
+
+!!! note "Pydantic AI Limitations"
+    - Batch processing is not supported with the Pydantic AI backend
+    - Router functionality is not available
+    - Requires the `pydantic-ai` package to be installed
+    
+    [Read more about Pydantic AI features](https://ai.pydantic.dev/#why-use-pydanticai)
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
@@ -315,4 +315,15 @@
 .md-nav__item a[href="#"] span:first-child {
     opacity: 0.7;
     filter: grayscale(1);
+}
+
+/* Beta badge */
+.beta-badge {
+    background-color: #f5f3ff;
+    border: 1px solid #8b5cf6;
+    border-radius: 4px;
+    padding: 2px 8px;
+    font-size: 0.875rem;
+    color: #6d28d9;
+    margin-left: 8px;
 }
diff --git a/extract_thinker/extractor.py b/extract_thinker/extractor.py
@@ -5,6 +5,7 @@
 import uuid
 import litellm
 from pydantic import BaseModel
+from extract_thinker.llm_engine import LLMEngine
 from extract_thinker.concatenation_handler import ConcatenationHandler
 from extract_thinker.document_loader.document_loader import DocumentLoader
 from extract_thinker.document_loader.document_loader_llm_image import DocumentLoaderLLMImage
@@ -759,7 +760,20 @@ def extract_batch(
 
         Returns:
             A BatchJob object to monitor and retrieve batch processing results.
+
+        Raises:
+            ValueError: If batch processing is not supported by the current LLM configuration
         """
+        if not self.llm:
+            raise ValueError("LLM is not set. Please set an LLM before extraction.")
+
+        # Check if using pydantic-ai backend
+        if self.llm.backend == LLMEngine.PYDANTIC_AI:
+            raise ValueError(
+                "Batch processing is not supported with the PYDANTIC_AI backend. "
+                "Please use GPT4o models and default backend for batch operations."
+            )
+
         if not self.can_handle_batch():
             raise ValueError(
                 f"Model {self.llm.model} does not support batch processing. "
diff --git a/extract_thinker/llm.py b/extract_thinker/llm.py
@@ -1,7 +1,9 @@
+import asyncio
 from typing import List, Dict, Any, Optional
 import instructor
 import litellm
 from litellm import Router
+from extract_thinker.llm_engine import LLMEngine
 from extract_thinker.utils import add_classification_structure, extract_thinking_json
 
 # Add these constants at the top of the file, after the imports
@@ -25,15 +27,72 @@ class LLM:
     TEMPERATURE = 0  # Always zero for deterministic outputs (IDP)
     TIMEOUT = 3000  # Timeout in milliseconds
 
-    def __init__(self,
-                 model: str,
-                 token_limit: int = None):
-        self.client = instructor.from_litellm(litellm.completion, mode=instructor.Mode.MD_JSON)
+    def __init__(
+        self,
+        model: str,
+        token_limit: int = None,
+        backend: LLMEngine = LLMEngine.DEFAULT
+    ):
+        """Initialize LLM with specified backend.
+        
+        Args:
+            model: The model name (e.g. "gpt-4", "claude-3")
+            token_limit: Optional maximum tokens
+            backend: LLMBackend enum (default: LITELLM)
+        """
         self.model = model
-        self.router = None
         self.token_limit = token_limit
+        self.router = None
         self.is_dynamic = False
+        self.backend = backend
+
+        if self.backend == LLMEngine.DEFAULT:
+            self.client = instructor.from_litellm(
+                litellm.completion,
+                mode=instructor.Mode.MD_JSON
+            )
+            self.agent = None
+        elif self.backend == LLMEngine.PYDANTIC_AI:
+            self._check_pydantic_ai()
+            from pydantic_ai import Agent
+            from pydantic_ai.models import KnownModelName
+            from typing import cast
+            import asyncio
+            
+            self.client = None
+            self.agent = Agent(
+                cast(KnownModelName, self.model)
+            )
+        else:
+            raise ValueError(f"Unsupported backend: {self.backend}")
+
+    @staticmethod
+    def _check_pydantic_ai():
+        """Check if pydantic-ai is installed."""
+        try:
+            import pydantic_ai
+        except ImportError:
+            raise ImportError(
+                "Could not import pydantic-ai package. "
+                "Please install it with `pip install pydantic-ai`."
+            )
+
+    @staticmethod
+    def _get_pydantic_ai():
+        """Lazy load pydantic-ai."""
+        try:
+            import pydantic_ai
+            return pydantic_ai
+        except ImportError:
+            raise ImportError(
+                "Could not import pydantic-ai package. "
+                "Please install it with `pip install pydantic-ai`."
+            )
+
     def load_router(self, router: Router) -> None:
+        """Load a LiteLLM router for model fallbacks."""
+        if self.backend != LLMEngine.DEFAULT:
+            raise ValueError("Router is only supported with LITELLM backend")
         self.router = router
 
     def set_dynamic(self, is_dynamic: bool) -> None:
@@ -52,6 +111,28 @@ def request(
         messages: List[Dict[str, str]],
         response_model: Optional[str] = None
     ) -> Any:
+        # Handle Pydantic-AI backend differently
+        if self.backend == LLMEngine.PYDANTIC_AI:
+            # Combine messages into a single prompt
+            combined_prompt = " ".join([m["content"] for m in messages])
+            try:
+                # Create event loop if it doesn't exist
+                try:
+                    loop = asyncio.get_event_loop()
+                except RuntimeError:
+                    loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(loop)
+                
+                result = loop.run_until_complete(
+                    self.agent.run(
+                        combined_prompt, 
+                        result_type=response_model if response_model else str
+                    )
+                )
+                return result.data
+            except Exception as e:
+                raise ValueError(f"Failed to extract from source: {str(e)}")
+
         # Uncomment the following lines if you need to calculate max_tokens
         # contents = map(lambda message: message['content'], messages)
         # all_contents = ' '.join(contents)
diff --git a/extract_thinker/llm_engine.py b/extract_thinker/llm_engine.py
@@ -0,0 +1,12 @@
+from enum import Enum
+
+
+class LLMEngine(Enum):
+    """Supported LLM backends.
+    
+    Attributes:
+        DEFAULT: Uses litellm + instructor for model interfacing and structured outputs
+        PYDANTIC_AI: Uses pydantic-ai for enhanced Pydantic model integration
+    """
+    DEFAULT = "default"  # Default backend using litellm + instructor
+    PYDANTIC_AI = "pydantic_ai"  # Pydantic AI backend for enhanced model integration
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -56,6 +56,7 @@ nav:
           - Kofax: '#'
       - LLM Integration:
           - Overview: core-concepts/llm-integration/index.md
+          - Dynamic Parsing: core-concepts/llm-integration/dynamic-parsing.md
       - Classification: 
           - Overview: core-concepts/classification/index.md
           - Basic Classification: core-concepts/classification/basic.md
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "extract_thinker"
-version = "0.1.3"
+version = "0.1.4"
 description = "Library to extract data from files and documents agnositicaly using LLMs"
 authors = ["Júlio Almeida <enoch3712@gmail.com>"]
 readme = "README.md"
diff --git a/tests/critical/test_critical_classification.py b/tests/critical/test_critical_classification.py
@@ -24,7 +24,7 @@ def test_critical_classification():
     # Setup
     document_loader = DocumentLoaderPyPdf()
     extractor = Extractor(document_loader)
-    extractor.load_llm("groq/llama-3.1-70b-versatile")
+    extractor.load_llm("groq/llama-3.3-70b-versatile")
 
     process = Process()
     process.add_classify_extractor([[extractor]])
@@ -47,4 +47,8 @@ def test_critical_classification():
 
     # Assert
     assert result is not None
-    assert result.name == "Invoice" 
+    assert result.name == "Invoice"
+
+
+if __name__ == "__main__":
+    test_critical_classification()
diff --git a/tests/critical/test_critical_extraction.py b/tests/critical/test_critical_extraction.py
@@ -44,7 +44,7 @@ def test_critical_extract_with_pypdf():
 
     extractor = Extractor()
     extractor.load_document_loader(DocumentLoaderPyPdf())
-    extractor.load_llm("groq/llama-3.1-70b-versatile")
+    extractor.load_llm("groq/llama-3.3-70b-versatile")
 
     result = extractor.extract(test_file_path, InvoiceContract)
 
diff --git a/tests/test_batch_extractor.py b/tests/test_batch_extractor.py
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
diff --git a/tests/test_llm_backends.py b/tests/test_llm_backends.py