Skip to content

Commit ca51be8

Browse files
authored
feat: add cursor-based pagination to index resource (#176)
1 parent 7505005 commit ca51be8

6 files changed

Lines changed: 366 additions & 56 deletions

File tree

src/deepset_mcp/api/indexes/protocols.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,26 @@
44

55
from typing import Protocol
66

7-
from deepset_mcp.api.indexes.models import Index, IndexList
7+
from deepset_mcp.api.indexes.models import Index
88
from deepset_mcp.api.pipeline.models import PipelineValidationResult
9+
from deepset_mcp.api.shared_models import PaginatedResponse
910

1011

1112
class IndexResourceProtocol(Protocol):
1213
"""Protocol defining the implementation for IndexResource."""
1314

14-
async def list(self, limit: int = 10, page_number: int = 1) -> IndexList:
15+
async def list(self, limit: int = 10, after: str | None = None) -> PaginatedResponse[Index]:
1516
"""List indexes in the configured workspace."""
1617
...
1718

1819
async def get(self, index_name: str) -> Index:
1920
"""Fetch a single index by its name."""
2021
...
2122

22-
async def create(self, name: str, yaml_config: str, description: str | None = None) -> Index:
23+
async def create(self, index_name: str, yaml_config: str, description: str | None = None) -> Index:
2324
"""Create a new index with the given name and configuration.
2425
25-
:param name: Name of the index
26+
:param index_name: Name of the index
2627
:param yaml_config: YAML configuration for the index
2728
:param description: Optional description for the index
2829
:returns: Created index details

src/deepset_mcp/api/indexes/resource.py

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
from typing import Any
56
from urllib.parse import quote
67

78
from deepset_mcp.api.exceptions import UnexpectedAPIError
8-
from deepset_mcp.api.indexes.models import Index, IndexList
9+
from deepset_mcp.api.indexes.models import Index
910
from deepset_mcp.api.indexes.protocols import IndexResourceProtocol
1011
from deepset_mcp.api.pipeline.models import PipelineValidationResult, ValidationError
1112
from deepset_mcp.api.protocols import AsyncClientProtocol
13+
from deepset_mcp.api.shared_models import PaginatedResponse
1214
from deepset_mcp.api.transport import raise_for_status
1315

1416

@@ -24,26 +26,45 @@ def __init__(self, client: AsyncClientProtocol, workspace: str) -> None:
2426
self._client = client
2527
self._workspace = workspace
2628

27-
async def list(self, limit: int = 10, page_number: int = 1) -> IndexList:
28-
"""List all indexes.
29+
async def list(self, limit: int = 10, after: str | None = None) -> PaginatedResponse[Index]:
30+
"""Lists indexes and returns the first page of results.
2931
30-
:param limit: Maximum number of indexes to return.
31-
:param page_number: Page number for pagination.
32+
The returned object can be iterated over to fetch subsequent pages.
3233
33-
:returns: List of indexes.
34+
:param limit: The maximum number of indexes to return per page.
35+
:param after: The cursor to fetch the next page of results.
36+
:returns: A `PaginatedResponse` object containing the first page of indexes.
3437
"""
35-
params = {
36-
"limit": limit,
37-
"page_number": page_number,
38-
}
39-
40-
response = await self._client.request(
41-
f"/v1/workspaces/{quote(self._workspace, safe='')}/indexes", params=params
38+
# 1. Prepare arguments for the initial API call
39+
# TODO: Pagination in the deepset API is currently implemented in an unintuitive way.
40+
# TODO: The cursor is always time based (created_at) and after signifies indexes older than the current cursor
41+
# TODO: while 'before' signals indexes younger than the current cursor.
42+
# TODO: This is applied irrespective of any sort (e.g. name) that would conflict with this approach.
43+
# TODO: Change this to 'after' once the behaviour is fixed on the deepset API
44+
request_params = {"limit": limit, "before": after}
45+
request_params = {k: v for k, v in request_params.items() if v is not None}
46+
47+
# 2. Make the first API call using a private, stateless method
48+
page = await self._list_api_call(**request_params)
49+
50+
# 3. Inject the logic needed for subsequent fetches into the response object
51+
page._inject_paginator(
52+
fetch_func=self._list_api_call,
53+
# Base args for the *next* fetch don't include initial cursors
54+
base_args={"limit": limit},
4255
)
56+
return page
4357

44-
raise_for_status(response)
58+
async def _list_api_call(self, **kwargs: Any) -> PaginatedResponse[Index]:
59+
"""A private, stateless method that performs the raw API call."""
60+
resp = await self._client.request(
61+
endpoint=f"v1/workspaces/{quote(self._workspace, safe='')}/indexes", method="GET", params=kwargs
62+
)
63+
raise_for_status(resp)
64+
if resp.json is None:
65+
raise UnexpectedAPIError(status_code=resp.status_code, message="Empty response", detail=None)
4566

46-
return IndexList.model_validate(response.json)
67+
return PaginatedResponse[Index].create_with_cursor_field(resp.json, "pipeline_index_id")
4768

4869
async def get(self, index_name: str) -> Index:
4970
"""Get a specific index.

src/deepset_mcp/tools/indexes.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,29 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
from deepset_mcp.api.exceptions import BadRequestError, ResourceNotFoundError, UnexpectedAPIError
6-
from deepset_mcp.api.indexes.models import Index, IndexList
6+
from deepset_mcp.api.indexes.models import Index
77
from deepset_mcp.api.pipeline import PipelineValidationResult
88
from deepset_mcp.api.protocols import AsyncClientProtocol
9+
from deepset_mcp.api.shared_models import PaginatedResponse
910

1011

11-
async def list_indexes(*, client: AsyncClientProtocol, workspace: str) -> IndexList | str:
12-
"""Use this to list available indexes on the deepset platform in your workspace.
12+
async def list_indexes(
13+
*, client: AsyncClientProtocol, workspace: str, after: str | None = None
14+
) -> PaginatedResponse[Index] | str:
15+
"""Retrieves a list of all indexes available within the currently configured deepset workspace.
1316
14-
:param client: Deepset API client to use for requesting indexes.
15-
:param workspace: Workspace of which to list indexes.
17+
:param client: The async client for API communication.
18+
:param workspace: The workspace name.
19+
:param after: The cursor to fetch the next page of results.
20+
If there are more results to fetch, the cursor will appear as `next_cursor` on the response.
21+
:returns: List of indexes or error message.
1622
"""
1723
try:
18-
result = await client.indexes(workspace=workspace).list()
19-
except ResourceNotFoundError as e:
20-
return f"Error listing indexes. Error: {e.message} ({e.status_code})"
21-
22-
return result
24+
return await client.indexes(workspace=workspace).list(after=after)
25+
except ResourceNotFoundError:
26+
return f"There is no workspace named '{workspace}'. Did you mean to configure it?"
27+
except (BadRequestError, UnexpectedAPIError) as e:
28+
return f"Failed to list indexes: {e}"
2329

2430

2531
async def get_index(*, client: AsyncClientProtocol, workspace: str, index_name: str) -> Index | str:
@@ -55,7 +61,7 @@ async def create_index(
5561
"""
5662
try:
5763
result = await client.indexes(workspace=workspace).create(
58-
name=index_name, yaml_config=yaml_configuration, description=description
64+
index_name=index_name, yaml_config=yaml_configuration, description=description
5965
)
6066
except ResourceNotFoundError:
6167
return f"There is no workspace named '{workspace}'. Did you mean to configure it?"

test/integration/test_integration_index_resource.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from deepset_mcp.api.indexes.models import Index
1212
from deepset_mcp.api.indexes.resource import IndexResource
1313
from deepset_mcp.api.pipeline.models import PipelineValidationResult
14+
from deepset_mcp.api.shared_models import PaginatedResponse
1415

1516
pytestmark = pytest.mark.integration
1617

@@ -238,25 +239,48 @@ async def test_list_indexes(
238239

239240
# Test listing without pagination
240241
indexes = await index_resource.list(limit=10)
242+
assert isinstance(indexes, PaginatedResponse)
241243
assert len(indexes.data) == 3
242244

243245
# Verify our created indexes are in the list
244246
retrieved_names = [p.name for p in indexes.data]
245247
for name in index_names:
246248
assert name in retrieved_names
247249

248-
# Test pagination
249-
if len(indexes.data) > 1:
250-
# Get the first page with 1 item
251-
first_page = await index_resource.list(limit=1)
252-
assert len(first_page.data) == 1
253250

254-
# Get the second page
255-
second_page = await index_resource.list(page_number=2, limit=1)
256-
assert len(second_page.data) == 1
251+
@pytest.mark.asyncio
252+
async def test_pagination_iteration(
253+
index_resource: IndexResource,
254+
valid_index_config: str,
255+
) -> None:
256+
"""Test iterating over multiple pages of indexes using the async iterator."""
257+
# Create several test indexes
258+
config = json.loads(valid_index_config)
259+
index_names = []
260+
for i in range(5):
261+
index_name = f"test-pagination-index-{i}"
262+
index_names.append(index_name)
263+
await index_resource.create(index_name=index_name, yaml_config=config["yaml_config"])
264+
265+
# Get the first page with a small limit to ensure pagination
266+
paginator = await index_resource.list(limit=2)
267+
268+
# Collect all indexes by iterating through pages
269+
all_indexes = []
270+
async for index in paginator:
271+
all_indexes.append(index)
272+
273+
# Verify we got all our created indexes (at least 5)
274+
assert len(all_indexes) >= 5
257275

258-
# Verify they're different indexes
259-
assert first_page.data[0].pipeline_index_id != second_page.data[0].pipeline_index_id
276+
# Verify all indexes are Index instances
277+
for index in all_indexes:
278+
assert isinstance(index, Index)
279+
280+
# Verify our created indexes are in the results
281+
retrieved_names = [p.name for p in all_indexes]
282+
for name in index_names:
283+
assert name in retrieved_names
260284

261285

262286
@pytest.mark.asyncio

0 commit comments

Comments
 (0)