quotient-ai · crekhari · Mar 31, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 27, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,6 +22,7 @@ click = "^8.1.7"
 httpx = "^0.27.0"
 tenacity = "<=9.0.0"
 pyjwt = "^2.10.1"
+pydantic = "^2.10.6"
 
 
 

diff --git a/pytest.ini b/pytest.ini
@@ -1,2 +1,3 @@
 [pytest]
 addopts = -s
+asyncio_default_fixture_loop_scope = function
diff --git a/quotientai/async_client.py b/quotientai/async_client.py
@@ -2,15 +2,17 @@
 import random
 import json
 import time
+import logging
 from pathlib import Path
 import jwt
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import httpx
 
 from quotientai import resources
 from quotientai.exceptions import QuotientAIError, handle_async_errors
 from quotientai.resources.prompts import Prompt
+from quotientai.resources.logs import LogDocument
 from quotientai.resources.models import Model
 from quotientai.resources.datasets import Dataset
 from quotientai.resources.runs import Run
@@ -177,6 +179,7 @@ def __init__(self, logs_resource):
         self.inconsistency_detection: bool = False
         self._configured = False
         self.hallucination_detection_sample_rate = 0
+        self._logger = logging.getLogger('quotient-async-client')
 
     def init(
         self,
@@ -218,7 +221,7 @@ async def log(
         *,
         user_query: str,
         model_output: str,
-        documents: Optional[List[str]] = None,
+        documents: Optional[List[Union[str, LogDocument]]] = None,
         message_history: Optional[List[Dict[str, Any]]] = None,
         instructions: Optional[List[str]] = None,
         tags: Optional[Dict[str, Any]] = {},
@@ -251,6 +254,21 @@ async def log(
             else self.inconsistency_detection
         )
 
+        # Validate documents format
+        if documents:
+            for doc in documents:
+                if isinstance(doc, str):
+                    continue
+                elif isinstance(doc, dict):
+                    try:
+                        LogDocument(**doc)
+                    except Exception as _:
+                        self._logger.error(f"Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys. Metadata keys must be strings")
+                        return None
+                else:
+                    self._logger.error(f"Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys, got {type(doc)}")
+                    return None
+
         if self._should_sample():
             await self.logs_resource.create(
                 app_name=self.app_name,

diff --git a/quotientai/client.py b/quotientai/client.py
@@ -2,16 +2,17 @@
 import os
 import random
 import time
-
+import logging
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 import jwt
 
 import httpx
 
 from quotientai import resources
 from quotientai.exceptions import QuotientAIError, handle_errors
+from quotientai.resources.logs import LogDocument
 from quotientai.resources.prompts import Prompt
 from quotientai.resources.models import Model
 from quotientai.resources.datasets import Dataset
@@ -178,6 +179,7 @@ def __init__(self, logs_resource):
         self.inconsistency_detection: bool = False
         self._configured = False
         self.hallucination_detection_sample_rate = 0.0
+        self._logger = logging.getLogger('quotient-sync-client')
 
     def init(
         self,
@@ -219,7 +221,7 @@ def log(
         *,
         user_query: str,
         model_output: str,
-        documents: Optional[List[str]] = None,
+        documents: List[Union[str, LogDocument]] = None,
         message_history: Optional[List[Dict[str, Any]]] = None,
         instructions: Optional[List[str]] = None,
         tags: Optional[Dict[str, Any]] = {},
@@ -252,6 +254,21 @@ def log(
             else self.inconsistency_detection
         )
 
+        # Validate documents format
+        if documents:
+            for doc in documents:
+                if isinstance(doc, str):
+                    continue
+                elif isinstance(doc, dict):
+                    try:
+                        LogDocument(**doc)
+                    except Exception as _:
+                        self._logger.error(f"Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys. Metadata keys must be strings")
+                        return None
+                else:
+                    self._logger.error(f"Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys, got {type(doc)}")
+                    return None
+
         if self._should_sample():
             self.logs_resource.create(
                 app_name=self.app_name,

diff --git a/quotientai/resources/logs.py b/quotientai/resources/logs.py
@@ -1,13 +1,21 @@
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 import asyncio
 import logging
 from collections import deque
 from threading import Thread
 from dataclasses import dataclass
 from datetime import datetime
 import time
+from pydantic import BaseModel
 
 
+class LogDocument(BaseModel):
+    """
+    Represents a log document
+    """
+    page_content: str
+    metadata: Optional[Dict[str, Any]] = None
+
 @dataclass
 class Log:
     """
@@ -21,7 +29,7 @@ class Log:
     inconsistency_detection: bool
     user_query: str
     model_output: str
-    documents: List[str]
+    documents: List[Union[str, LogDocument]]
     message_history: Optional[List[Dict[str, Any]]]
     instructions: Optional[List[str]]
     tags: Dict[str, Any]
@@ -71,7 +79,7 @@ def create(
         inconsistency_detection: bool,
         user_query: str,
         model_output: str,
-        documents: List[str],
+        documents: List[Union[str, LogDocument]],
         message_history: Optional[List[Dict[str, Any]]] = None,
         instructions: Optional[List[str]] = None,
         tags: Optional[Dict[str, Any]] = {},

diff --git a/tests/resources/test_logs.py b/tests/resources/test_logs.py
@@ -1,7 +1,7 @@
 import pytest
 from datetime import datetime
 from unittest.mock import Mock, AsyncMock
-from quotientai.resources.logs import Log, LogsResource, AsyncLogsResource
+from quotientai.resources.logs import Log, LogsResource, AsyncLogsResource, LogDocument
 
 # Fixtures
 @pytest.fixture
@@ -25,6 +25,36 @@ def sample_log_data():
         "created_at": "2024-01-01T00:00:00"
     }
 
+# LogDocument Tests
+class TestLogDocument:
+    """Tests for the LogDocument class"""
+
+    def test_log_document_creation(self):
+        """Test basic creation of LogDocument"""
+        doc = LogDocument(
+            page_content="This is test content",
+            metadata={"source": "test_source", "author": "test_author"}
+        )
+        assert doc.page_content == "This is test content"
+        assert doc.metadata["source"] == "test_source"
+        assert doc.metadata["author"] == "test_author"
+
+    def test_log_document_with_no_metadata(self):
+        """Test LogDocument creation without metadata"""
+        doc = LogDocument(page_content="Test content only")
+        assert doc.page_content == "Test content only"
+        assert doc.metadata is None
+
+    def test_log_document_from_dict(self):
+        """Test creating LogDocument from dictionary"""
+        doc_dict = {
+            "page_content": "Content from dict",
+            "metadata": {"source": "dictionary"}
+        }
+        doc = LogDocument(**doc_dict)
+        assert doc.page_content == "Content from dict"
+        assert doc.metadata["source"] == "dictionary"
+
 # Model Tests
 class TestLog:
     """Tests for the Log dataclass"""

diff --git a/tests/test_async_client.py b/tests/test_async_client.py
@@ -507,4 +507,44 @@ def test_should_sample(self):
 
             # Should not sample when random >= sample_rate
             mock_random.return_value = 0.6
-            assert logger._should_sample() is False 
+            assert logger._should_sample() is False
+
+    @pytest.mark.asyncio
+    async def test_log_with_invalid_document_dict(self):
+        """Test logging with an invalid document dictionary"""
+        mock_logs_resource = Mock()
+        logger = AsyncQuotientLogger(mock_logs_resource)
+        logger.init(app_name="test-app", environment="test")
+        logger._logger = Mock()  # Mock the logger to capture error messages
+
+        # Test with a document missing 'page_content'
+        result = await logger.log(
+            user_query="test query",
+            model_output="test output",
+            documents=[{"metadata": {"key": "value"}}]  # Missing page_content
+        )
+        # Should log the error and return None
+        assert logger._logger.error.call_count == 1
+        assert mock_logs_resource.create.call_count == 0
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_log_with_invalid_document_type(self):
+        """Test logging with a document of invalid type"""
+        mock_logs_resource = Mock()
+        logger = AsyncQuotientLogger(mock_logs_resource)
+        logger.init(app_name="test-app", environment="test")
+        logger._logger = Mock()  # Mock the logger to capture error messages
+
+        # Test with a mix of valid string and invalid non-string/non-dict document
+        # The string document will hit the 'continue' branch
+        result = await logger.log(
+            user_query="test query",
+            model_output="test output",
+            documents=["valid string document", 123]  # String and invalid type
+        )
+
+        # Should log the error and return None
+        assert logger._logger.error.call_count == 1
+        assert mock_logs_resource.create.call_count == 0
+        assert result is None 
diff --git a/tests/test_client.py b/tests/test_client.py
@@ -499,3 +499,40 @@ def test_should_sample(self):
             # Should not sample when random >= sample_rate
             mock_random.return_value = 0.6
             assert logger._should_sample() is False
+
+    def test_log_with_invalid_document_dict(self):
+        """Test logging with an invalid document dictionary"""
+        mock_logs_resource = Mock()
+        logger = QuotientLogger(mock_logs_resource)
+        logger.init(app_name="test-app", environment="test")
+        logger._logger = Mock()  # Mock the logger to capture error messages
+
+        # Test with a document missing 'page_content'
+        logger.log(
+            user_query="test query",
+            model_output="test output",
+            documents=[{"metadata": {"key": "value"}}]  # Missing page_content
+        )
+
+        # Should log the error and return None
+        assert logger._logger.error.call_count == 1
+        assert mock_logs_resource.create.call_count == 0
+
+    def test_log_with_invalid_document_type(self):
+        """Test logging with a document of invalid type"""
+        mock_logs_resource = Mock()
+        logger = QuotientLogger(mock_logs_resource)
+        logger.init(app_name="test-app", environment="test")
+        logger._logger = Mock()  # Mock the logger to capture error messages
+
+        # Test with a mix of valid string and invalid non-string/non-dict document
+        # The string document will hit the 'continue' branch
+        logger.log(
+            user_query="test query",
+            model_output="test output",
+            documents=["valid string document", 123]  # String and invalid type
+        )
+
+        # Should log the error and return None
+        assert logger._logger.error.call_count == 1
+        assert mock_logs_resource.create.call_count == 0