feat: add pagination for pipeline logs (#180)

mathislucka · web-flow · commit db5994682b64 · 2025-08-21T13:33:20.000+02:00
diff --git a/src/deepset_mcp/api/pipeline/protocols.py b/src/deepset_mcp/api/pipeline/protocols.py
@@ -10,7 +10,7 @@
     DeepsetSearchResponse,
     DeepsetStreamEvent,
     LogLevel,
-    PipelineLogList,
+    PipelineLog,
     PipelineValidationResult,
 )
 from deepset_mcp.api.shared_models import NoContentResponse, PaginatedResponse
@@ -49,7 +49,8 @@ async def get_logs(
         pipeline_name: str,
         limit: int = 30,
         level: LogLevel | None = None,
-    ) -> PipelineLogList:
+        after: str | None = None,
+    ) -> PaginatedResponse[PipelineLog]:
         """Fetch logs for a specific pipeline."""
         ...
 
diff --git a/src/deepset_mcp/api/pipeline/resource.py b/src/deepset_mcp/api/pipeline/resource.py
@@ -14,7 +14,7 @@
     DeepsetSearchResponse,
     DeepsetStreamEvent,
     LogLevel,
-    PipelineLogList,
+    PipelineLog,
     PipelineValidationResult,
     ValidationError,
 )
@@ -215,36 +215,62 @@ async def get_logs(
         pipeline_name: str,
         limit: int = 30,
         level: LogLevel | None = None,
-    ) -> PipelineLogList:
-        """Fetch logs for a specific pipeline.
+        after: str | None = None,
+    ) -> PaginatedResponse[PipelineLog]:
+        """Fetch logs for a specific pipeline and returns the first page of results.
+
+        The returned object can be iterated over to fetch subsequent pages.
 
         :param pipeline_name: Name of the pipeline to fetch logs for.
-        :param limit: Maximum number of log entries to return.
+        :param limit: Maximum number of log entries to return per page.
         :param level: Filter logs by level. If None, returns all levels.
-        :returns: A PipelineLogList containing the log entries.
+        :param after: The cursor to fetch the next page of results.
+        :returns: A `PaginatedResponse` object containing the first page of logs.
         """
-        params: dict[str, Any] = {
+        # 1. Prepare arguments for the initial API call
+        request_params = {
             "limit": limit,
             "filter": "origin eq 'querypipeline'",
         }
 
         # Add level filter if specified
         if level is not None:
-            params["filter"] = f"level eq '{level}' and origin eq 'querypipeline'"
+            request_params["filter"] = f"level eq '{level}' and origin eq 'querypipeline'"
+
+        # Add cursor if provided
+        if after is not None:
+            request_params["after"] = after
+
+        # Remove None values
+        request_params = {k: v for k, v in request_params.items() if v is not None}
+
+        # 2. Make the first API call using a private, stateless method
+        page = await self._get_logs_api_call(pipeline_name, **request_params)
+
+        # 3. Inject the logic needed for subsequent fetches into the response object
+        page._inject_paginator(
+            fetch_func=lambda **kwargs: self._get_logs_api_call(pipeline_name, **kwargs),
+            # Base args for the *next* fetch don't include initial cursors
+            base_args={"limit": limit, "filter": request_params["filter"]},
+            cursor_param="after",  # Logs use 'after' cursor, not 'before' like pipelines
+        )
+        return page
 
+    async def _get_logs_api_call(self, pipeline_name: str, **kwargs: Any) -> PaginatedResponse[PipelineLog]:
+        """A private, stateless method that performs the raw API call for logs."""
         resp = await self._client.request(
             endpoint=f"v1/workspaces/{quote(self._workspace, safe='')}/pipelines/{quote(pipeline_name, safe='')}/logs",
             method="GET",
-            params=params,
+            params=kwargs,
         )
 
         raise_for_status(resp)
 
         if resp.json is not None:
-            return PipelineLogList.model_validate(resp.json)
+            return PaginatedResponse[PipelineLog].create_with_cursor_field(resp.json, "logged_at")
         else:
-            # Return empty log list if no response
-            return PipelineLogList(data=[], has_more=False, total=0)
+            # Return empty paginated response if no JSON data
+            return PaginatedResponse[PipelineLog](data=[], has_more=False, total=0)
 
     async def deploy(self, pipeline_name: str) -> PipelineValidationResult:
         """Deploy a pipeline to production.
diff --git a/src/deepset_mcp/api/shared_models.py b/src/deepset_mcp/api/shared_models.py
@@ -43,6 +43,7 @@ class PaginatedResponse(BaseModel, Generic[T]):
     # --- Internal Paginator State (Defaults to None) ---
     _fetch_func: Callable[..., Coroutine[Any, Any, "PaginatedResponse[T]"]] | None = PrivateAttr(default=None)
     _base_args: dict[str, Any] | None = PrivateAttr(default=None)
+    _cursor_param: str = PrivateAttr(default="before")
 
     @model_validator(mode="before")
     @classmethod
@@ -72,14 +73,18 @@ def create_with_cursor_field(cls, data: dict[str, Any], cursor_field: str) -> "P
         return cls.model_validate(data_copy)
 
     def _inject_paginator(
-        self, fetch_func: Callable[..., Awaitable["PaginatedResponse[T]"]], base_args: dict[str, Any]
+        self,
+        fetch_func: Callable[..., Awaitable["PaginatedResponse[T]"]],
+        base_args: dict[str, Any],
+        cursor_param: str = "before",
     ) -> None:
         """Injects the necessary components to make this object iterable."""
         # Convert Awaitable to Coroutine for typing compatibility
         if callable(fetch_func):
             # This is a runtime check - mypy doesn't understand the callable compatibility
             self._fetch_func = fetch_func  # type: ignore
         self._base_args = {k: v for k, v in base_args.items() if v is not None}
+        self._cursor_param = cursor_param
 
     async def _get_next_page(self) -> "PaginatedResponse[T] | None":
         """Fetches the next page of results using the stored fetch function."""
@@ -97,10 +102,10 @@ async def _get_next_page(self) -> "PaginatedResponse[T] | None":
         # TODO: while 'before' signals pipelines younger than the current cursor.
         # TODO: This is applied irrespective of any sort (e.g. name) that would conflict with this approach.
         # TODO: Change this to 'after' once the behaviour is fixed on the deepset API
-        args["before"] = self.next_cursor
+        args[self._cursor_param] = self.next_cursor
 
         next_page = await self._fetch_func(**args)
-        next_page._inject_paginator(self._fetch_func, self._base_args)
+        next_page._inject_paginator(self._fetch_func, self._base_args, self._cursor_param)
         return next_page
 
     async def items(self) -> AsyncIterator[T]:
diff --git a/src/deepset_mcp/tools/pipeline.py b/src/deepset_mcp/tools/pipeline.py
@@ -11,7 +11,7 @@
     DeepsetPipeline,
     DeepsetSearchResponse,
     LogLevel,
-    PipelineLogList,
+    PipelineLog,
     PipelineOperationWithErrors,
     PipelineValidationResult,
     PipelineValidationResultWithYaml,
@@ -211,8 +211,14 @@ async def update_pipeline(
 
 
 async def get_pipeline_logs(
-    *, client: AsyncClientProtocol, workspace: str, pipeline_name: str, limit: int = 30, level: LogLevel | None = None
-) -> PipelineLogList | str:
+    *,
+    client: AsyncClientProtocol,
+    workspace: str,
+    pipeline_name: str,
+    limit: int = 30,
+    level: LogLevel | None = None,
+    after: str | None = None,
+) -> PaginatedResponse[PipelineLog] | str:
     """Fetches logs for a specific pipeline.
 
     Retrieves log entries for the specified pipeline, with optional filtering by log level.
@@ -223,12 +229,13 @@ async def get_pipeline_logs(
     :param pipeline_name: Name of the pipeline to fetch logs for.
     :param limit: Maximum number of log entries to return (default: 30).
     :param level: Filter logs by level. If None, returns all levels.
+    :param after: The cursor to fetch the next page of results.
 
     :returns: Pipeline logs or error message.
     """
     try:
         return await client.pipelines(workspace=workspace).get_logs(
-            pipeline_name=pipeline_name, limit=limit, level=level
+            pipeline_name=pipeline_name, limit=limit, level=level, after=after
         )
     except ResourceNotFoundError:
         return f"There is no pipeline named '{pipeline_name}' in workspace '{workspace}'."
diff --git a/test/integration/test_integration_pipeline_logs.py b/test/integration/test_integration_pipeline_logs.py
@@ -7,8 +7,9 @@
 import pytest
 
 from deepset_mcp.api.client import AsyncDeepsetClient
-from deepset_mcp.api.pipeline.models import DeepsetPipeline, PipelineLogList
+from deepset_mcp.api.pipeline.models import DeepsetPipeline
 from deepset_mcp.api.pipeline.resource import PipelineResource
+from deepset_mcp.api.shared_models import PaginatedResponse
 
 pytestmark = pytest.mark.integration
 
@@ -145,7 +146,7 @@ async def test_get_logs_for_deployed_pipeline(
     logs = await pipeline_resource.get_logs(pipeline_name=pipeline_name)
 
     # Verify the response structure
-    assert isinstance(logs, PipelineLogList)
+    assert isinstance(logs, PaginatedResponse)
     assert isinstance(logs.data, list)
     assert isinstance(logs.has_more, bool)
     assert isinstance(logs.total, int)
@@ -178,7 +179,7 @@ async def test_get_logs_for_non_deployed_pipeline(
     logs = await pipeline_resource.get_logs(pipeline_name=pipeline_name)
 
     # Should return a valid response structure even if empty
-    assert isinstance(logs, PipelineLogList)
+    assert isinstance(logs, PaginatedResponse)
     assert isinstance(logs.data, list)
     assert isinstance(logs.has_more, bool)
     assert isinstance(logs.total, int)
@@ -209,3 +210,73 @@ async def test_deployment_timeout_handling(
             timeout_seconds=1,  # Very short timeout
             poll_interval=1,
         )
+
+
+@pytest.mark.extra_slow
+@pytest.mark.asyncio
+async def test_get_logs_pagination(
+    pipeline_resource: PipelineResource,
+    simple_yaml_config: str,
+) -> None:
+    """
+    Test pagination functionality for pipeline logs.
+
+    This test:
+    1. Creates and deploys a pipeline
+    2. Waits for deployment and potentially some logs
+    3. Tests pagination by requesting logs with small limit
+    4. Verifies cursor-based pagination works correctly
+    """
+    pipeline_name = "test-logs-pagination-pipeline"
+
+    # Step 1: Create and deploy a pipeline
+    await pipeline_resource.create(pipeline_name=pipeline_name, yaml_config=simple_yaml_config)
+    deploy_result = await pipeline_resource.deploy(pipeline_name=pipeline_name)
+    assert deploy_result.valid is True, f"Pipeline deployment failed: {deploy_result.errors}"
+
+    # Step 2: Wait for the pipeline to be deployed
+    deployed_pipeline = await wait_for_pipeline_deployment(
+        pipeline_resource=pipeline_resource,
+        pipeline_name=pipeline_name,
+        timeout_seconds=300,  # 5 minutes timeout
+        poll_interval=15,  # Check every 15 seconds
+    )
+
+    assert deployed_pipeline.status == "DEPLOYED"
+
+    # Step 3: Get first page of logs with small limit to test pagination
+    first_page = await pipeline_resource.get_logs(pipeline_name=pipeline_name, limit=5)
+
+    # Verify the response structure
+    assert isinstance(first_page, PaginatedResponse)
+    assert isinstance(first_page.data, list)
+    assert isinstance(first_page.has_more, bool)
+    assert isinstance(first_page.total, int | type(None))
+
+    # Step 4: If there are more logs available, test cursor-based pagination
+    if first_page.has_more and first_page.next_cursor:
+        second_page = await pipeline_resource.get_logs(
+            pipeline_name=pipeline_name, limit=5, after=first_page.next_cursor
+        )
+
+        # Verify second page structure
+        assert isinstance(second_page, PaginatedResponse)
+        assert isinstance(second_page.data, list)
+
+        # Ensure we got different logs (no duplicates between pages)
+        first_page_log_ids = {log.log_id for log in first_page.data}
+        second_page_log_ids = {log.log_id for log in second_page.data}
+
+        # There should be no overlap between pages
+        assert first_page_log_ids.isdisjoint(second_page_log_ids), "Found duplicate logs across pages"
+
+    # Step 5: Test async iteration over all logs
+    all_logs_via_iteration = []
+    async for log in first_page:
+        all_logs_via_iteration.append(log)
+        # Limit to avoid infinite loops in case of issues
+        if len(all_logs_via_iteration) > 100:
+            break
+
+    # Should have at least the logs from the first page
+    assert len(all_logs_via_iteration) >= len(first_page.data)
diff --git a/test/unit/api/pipeline/test_pipeline_resource.py b/test/unit/api/pipeline/test_pipeline_resource.py
@@ -12,7 +12,6 @@
     DeepsetPipeline,
     LogLevel,
     PipelineLog,
-    PipelineLogList,
     PipelineServiceLevel,
     PipelineValidationResult,
 )
@@ -795,7 +794,7 @@ async def test_get_logs_default_params(self) -> None:
         result = await resource.get_logs(pipeline_name="test-pipeline")
 
         # Verify results
-        assert isinstance(result, PipelineLogList)
+        assert isinstance(result, PaginatedResponse)
         assert len(result.data) == 2
         assert isinstance(result.data[0], PipelineLog)
         assert result.data[0].log_id == "log1"
@@ -1008,6 +1007,85 @@ async def test_get_logs_preserves_extra_fields(self) -> None:
         assert "custom_field" in result.data[0].extra_fields
         assert result.data[0].extra_fields["custom_field"] == "custom_value"
 
+    @pytest.mark.asyncio
+    async def test_get_logs_with_pagination(self) -> None:
+        """Test getting logs with pagination parameters."""
+        # Create sample logs
+        sample_logs = [
+            create_sample_log(log_id="log1", message="First log entry"),
+            create_sample_log(log_id="log2", message="Second log entry"),
+        ]
+
+        # Create client with predefined response
+        client = DummyClient(
+            responses={
+                "test-workspace/pipelines/test-pipeline/logs": {
+                    "data": sample_logs,
+                    "has_more": True,
+                    "total": 10,
+                }
+            }
+        )
+
+        # Create resource and call get_logs method with pagination
+        resource = PipelineResource(client=client, workspace="test-workspace")
+        result = await resource.get_logs(pipeline_name="test-pipeline", limit=5, after="some_cursor")
+
+        # Verify results
+        assert isinstance(result, PaginatedResponse)
+        assert len(result.data) == 2
+        assert result.data[0].log_id == "log1"
+        assert result.data[1].log_id == "log2"
+        assert result.has_more is True
+        assert result.total == 10
+
+        # Verify request
+        assert client.requests[0]["endpoint"] == "v1/workspaces/test-workspace/pipelines/test-pipeline/logs"
+        # Logs should use 'after' parameter (not 'before' like pipelines)
+        assert client.requests[0]["params"] == {
+            "limit": 5,
+            "filter": "origin eq 'querypipeline'",
+            "after": "some_cursor",
+        }
+
+    @pytest.mark.asyncio
+    async def test_get_logs_pagination_with_level_filter(self) -> None:
+        """Test getting logs with both pagination and level filter."""
+        # Create sample error logs
+        sample_logs = [
+            create_sample_log(log_id="error1", message="First error", level="error"),
+            create_sample_log(log_id="error2", message="Second error", level="error"),
+        ]
+
+        # Create client with predefined response
+        client = DummyClient(
+            responses={
+                "test-workspace/pipelines/test-pipeline/logs": {
+                    "data": sample_logs,
+                    "has_more": False,
+                    "total": 2,
+                }
+            }
+        )
+
+        # Create resource and call get_logs method with level filter and pagination
+        resource = PipelineResource(client=client, workspace="test-workspace")
+        result = await resource.get_logs(
+            pipeline_name="test-pipeline", limit=10, level=LogLevel.ERROR, after="some_cursor"
+        )
+
+        # Verify results
+        assert len(result.data) == 2
+        assert all(log.level == "error" for log in result.data)
+
+        # Verify request with both level filter and cursor
+        expected_params = {
+            "limit": 10,
+            "filter": "level eq 'error' and origin eq 'querypipeline'",
+            "after": "some_cursor",
+        }
+        assert client.requests[0]["params"] == expected_params
+
     @pytest.mark.asyncio
     async def test_deploy_pipeline_success(self) -> None:
         """Test successful pipeline deployment."""
diff --git a/test/unit/tools/test_doc_search.py b/test/unit/tools/test_doc_search.py
diff --git a/test/unit/tools/test_pipeline.py b/test/unit/tools/test_pipeline.py