-
Notifications
You must be signed in to change notification settings - Fork 2
[QUO-899][QUO-898] Support document metadata in Q-SDK #81
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
12e1781
c379737
8074159
e3b3976
c6a825d
7245a9d
0379bdf
91975c0
ab6ebb0
90f3d75
10f6a2a
4da3a8e
d61a1d1
bbcbbb1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ click = "^8.1.7" | |
| httpx = "^0.27.0" | ||
| tenacity = "<=9.0.0" | ||
| pyjwt = "^2.10.1" | ||
| pydantic = "^2.10.6" | ||
|
|
||
|
|
||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,3 @@ | ||
| [pytest] | ||
| addopts = -s | ||
| asyncio_default_fixture_loop_scope = function | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,15 +2,17 @@ | |
| import random | ||
| import json | ||
| import time | ||
| import logging | ||
| from pathlib import Path | ||
| import jwt | ||
| from typing import Any, Dict, List, Optional | ||
| from typing import Any, Dict, List, Optional, Union | ||
|
|
||
| import httpx | ||
|
|
||
| from quotientai import resources | ||
| from quotientai.exceptions import QuotientAIError, handle_async_errors | ||
| from quotientai.resources.prompts import Prompt | ||
| from quotientai.resources.logs import LogDocument | ||
| from quotientai.resources.models import Model | ||
| from quotientai.resources.datasets import Dataset | ||
| from quotientai.resources.runs import Run | ||
|
|
@@ -218,7 +220,7 @@ async def log( | |
| *, | ||
| user_query: str, | ||
| model_output: str, | ||
| documents: Optional[List[str]] = None, | ||
| documents: Optional[List[Union[str, LogDocument]]] = None, | ||
| message_history: Optional[List[Dict[str, Any]]] = None, | ||
| instructions: Optional[List[str]] = None, | ||
| tags: Optional[Dict[str, Any]] = {}, | ||
|
|
@@ -251,6 +253,19 @@ async def log( | |
| else self.inconsistency_detection | ||
| ) | ||
|
|
||
| # Validate documents format | ||
| if documents: | ||
| for doc in documents: | ||
| if isinstance(doc, str): | ||
| continue | ||
| elif isinstance(doc, dict): | ||
| try: | ||
| LogDocument(**doc) | ||
| except Exception as _: | ||
| raise QuotientAIError("Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys. Metadata keys must be strings") | ||
| else: | ||
| raise QuotientAIError(f"Documents must be a list of strings or dictionaries with 'page_content' and optional 'metadata' keys, got {type(doc)}") | ||
|
||
|
|
||
| if self._should_sample(): | ||
| await self.logs_resource.create( | ||
| app_name=self.app_name, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,21 @@ | ||
| from typing import Any, Dict, List, Optional | ||
| from typing import Any, Dict, List, Optional, Union | ||
| import asyncio | ||
| import logging | ||
| from collections import deque | ||
| from threading import Thread | ||
| from dataclasses import dataclass | ||
| from datetime import datetime | ||
| import time | ||
| from pydantic import BaseModel | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we importing pydantic here? All other classes in the sdk are instantiated without it?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @waldnzwrld had a quick chat in the office: it'd be good to use pydantic for client-side validation over the dataclasses. Dataclasses are useful but don't do type validation. @crekhari can you make a separate PR to replace stuff with pydantic entirely?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It'd be better for us to replace everything in one go rather than doing it piece-meal There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wholeheartedly agree that if we're going to do it, we should just go for it. If nobody has opened an issue for it, I will There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thx for opening the issue, Ill assign it to myself for now, but its a lowish priority for me rn |
||
|
|
||
|
|
||
| class LogDocument(BaseModel): | ||
| """ | ||
| Represents a log document | ||
| """ | ||
| page_content: str | ||
| metadata: Optional[Dict[str, Any]] = None | ||
|
|
||
| @dataclass | ||
| class Log: | ||
| """ | ||
|
|
@@ -21,7 +29,7 @@ class Log: | |
| inconsistency_detection: bool | ||
| user_query: str | ||
| model_output: str | ||
| documents: List[str] | ||
| documents: List[Union[str, LogDocument]] | ||
| message_history: Optional[List[Dict[str, Any]]] | ||
| instructions: Optional[List[str]] | ||
| tags: Dict[str, Any] | ||
|
|
@@ -71,7 +79,7 @@ def create( | |
| inconsistency_detection: bool, | ||
| user_query: str, | ||
| model_output: str, | ||
| documents: List[str], | ||
| documents: List[Union[str, LogDocument]], | ||
| message_history: Optional[List[Dict[str, Any]]] = None, | ||
| instructions: Optional[List[str]] = None, | ||
| tags: Optional[Dict[str, Any]] = {}, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -499,3 +499,67 @@ def test_should_sample(self): | |
| # Should not sample when random >= sample_rate | ||
| mock_random.return_value = 0.6 | ||
| assert logger._should_sample() is False | ||
|
|
||
| def test_log_with_invalid_document_dict(self): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should add a happy case test, (A test where the document data is valid) for both the sync and async client arch |
||
| """Test logging with an invalid document dictionary""" | ||
| mock_logs_resource = Mock() | ||
| logger = QuotientLogger(mock_logs_resource) | ||
| logger.init(app_name="test-app", environment="test") | ||
|
|
||
| # Test with a document missing 'page_content' | ||
| with pytest.raises(QuotientAIError) as excinfo: | ||
| logger.log( | ||
| user_query="test query", | ||
| model_output="test output", | ||
| documents=[{"metadata": {"key": "value"}}] | ||
| ) | ||
|
|
||
| # Verify the error message contains expected information | ||
| assert "page_content" in str(excinfo.value) | ||
| assert mock_logs_resource.create.call_count == 0 | ||
|
|
||
| def test_log_with_invalid_document_type(self): | ||
| """Test logging with a document of invalid type""" | ||
| mock_logs_resource = Mock() | ||
| logger = QuotientLogger(mock_logs_resource) | ||
| logger.init(app_name="test-app", environment="test") | ||
|
|
||
| # Test with a mix of valid string and invalid non-string/non-dict document | ||
| # The string document will hit the 'continue' branch | ||
| with pytest.raises(QuotientAIError) as excinfo: | ||
| logger.log( | ||
| user_query="test query", | ||
| model_output="test output", | ||
| documents=["valid string document", 123] | ||
| ) | ||
|
|
||
| # Verify the error message contains expected information | ||
| assert "123" in str(excinfo.value) or "int" in str(excinfo.value) | ||
| assert mock_logs_resource.create.call_count == 0 | ||
|
|
||
| def test_log_with_valid_documents(self): | ||
| """Test logging with valid document formats""" | ||
|
|
||
| mock_logs_resource = Mock() | ||
| logger = QuotientLogger(mock_logs_resource) | ||
| logger.init(app_name="test-app", environment="test") | ||
|
|
||
| # Force sampling to True for testing | ||
| with patch.object(logger, '_should_sample', return_value=True): | ||
| # Test with mixed valid document types | ||
| logger.log( | ||
| user_query="test query 4", | ||
| model_output="test output 4", | ||
| documents=[ | ||
| "string document", | ||
| {"page_content": "dict document", "metadata": {"key": "value"}}, | ||
| ] | ||
| ) | ||
|
|
||
| assert mock_logs_resource.create.call_count == 1 | ||
|
|
||
| # Verify correct documents were passed to create | ||
| calls = mock_logs_resource.create.call_args_list | ||
| assert calls[0][1]["documents"][0] == "string document" | ||
| assert calls[0][1]["documents"][1] == {"page_content": "dict document", "metadata": {"key": "value"}} | ||
| assert len(calls[0][1]["documents"]) == 2 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's this for?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I get a Deprecation warning when running tests
(poetry run pytest --cov=quotientai tests/ --cov-report term-missing):Setting it explicitly makes it go away. Setting the event loop scope to function means a new event loop will be created for each test function that uses an async fixture