Skip to content

Commit 9381d0d

Browse files
authored
feat: add integration tests for IndexResource (#47)
* feat: add integration tests for IndexResource * feat: add integration tests for IndexResource * fix: tests
1 parent 31c2c66 commit 9381d0d

3 files changed

Lines changed: 331 additions & 13 deletions

File tree

src/deepset_mcp/api/indexes/resource.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ async def list(self, limit: int = 10, page_number: int = 1) -> IndexList:
2828
"page_number": page_number,
2929
}
3030

31-
response = await self._client.request(f"/api/v1/workspaces/{self._workspace}/indexes", params=params)
31+
response = await self._client.request(f"/v1/workspaces/{self._workspace}/indexes", params=params)
3232

3333
raise_for_status(response)
3434

@@ -41,7 +41,7 @@ async def get(self, index_name: str) -> Index:
4141
4242
:returns: Index details.
4343
"""
44-
response = await self._client.request(f"/api/v1/workspaces/{self._workspace}/indexes/{index_name}")
44+
response = await self._client.request(f"/v1/workspaces/{self._workspace}/indexes/{index_name}")
4545

4646
raise_for_status(response)
4747

@@ -62,7 +62,7 @@ async def create(self, name: str, yaml_config: str, description: str | None = No
6262
if description is not None:
6363
data["description"] = description
6464

65-
response = await self._client.request(f"/api/v1/workspaces/{self._workspace}/indexes", method="POST", data=data)
65+
response = await self._client.request(f"v1/workspaces/{self._workspace}/indexes", method="POST", data=data)
6666

6767
raise_for_status(response)
6868

@@ -88,7 +88,7 @@ async def update(
8888
raise ValueError("At least one of updated_index_name or yaml_config must be provided")
8989

9090
response = await self._client.request(
91-
f"/api/v1/workspaces/{self._workspace}/indexes/{index_name}", method="PATCH", data=data
91+
f"/v1/workspaces/{self._workspace}/indexes/{index_name}", method="PATCH", data=data
9292
)
9393

9494
raise_for_status(response)
Lines changed: 318 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,318 @@
1+
import json
2+
3+
import pytest
4+
5+
from deepset_mcp.api.client import AsyncDeepsetClient
6+
from deepset_mcp.api.exceptions import ResourceNotFoundError
7+
from deepset_mcp.api.indexes.models import Index
8+
from deepset_mcp.api.indexes.resource import IndexResource
9+
10+
pytestmark = pytest.mark.integration
11+
12+
13+
@pytest.fixture
14+
def valid_index_config() -> str:
15+
"""Return a valid index YAML configuration for testing."""
16+
return json.dumps(
17+
{
18+
"config_yaml": """
19+
components:
20+
file_classifier:
21+
type: haystack.components.routers.file_type_router.FileTypeRouter
22+
init_parameters:
23+
mime_types:
24+
- text/plain
25+
- application/pdf
26+
- text/markdown
27+
- text/html
28+
- application/vnd.openxmlformats-officedocument.wordprocessingml.document
29+
- application/vnd.openxmlformats-officedocument.presentationml.presentation
30+
- application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
31+
- text/csv
32+
33+
text_converter:
34+
type: haystack.components.converters.txt.TextFileToDocument
35+
init_parameters:
36+
encoding: utf-8
37+
38+
pdf_converter:
39+
type: haystack.components.converters.pdfminer.PDFMinerToDocument
40+
init_parameters:
41+
line_overlap: 0.5
42+
char_margin: 2
43+
line_margin: 0.5
44+
word_margin: 0.1
45+
boxes_flow: 0.5
46+
detect_vertical: true
47+
all_texts: false
48+
store_full_path: false
49+
50+
markdown_converter:
51+
type: haystack.components.converters.txt.TextFileToDocument
52+
init_parameters:
53+
encoding: utf-8
54+
55+
html_converter:
56+
type: haystack.components.converters.html.HTMLToDocument
57+
init_parameters:
58+
# A dictionary of keyword arguments to customize how you want to extract content from your HTML files.
59+
# For the full list of available arguments, see
60+
# the [Trafilatura documentation](https://trafilatura.readthedocs.io/en/latest/corefunctions.html#extract).
61+
extraction_kwargs:
62+
output_format: markdown # Extract text from HTML. You can also also choose "txt"
63+
include_tables: true # If true, includes tables in the output
64+
include_links: true # If true, keeps links along with their targets
65+
66+
docx_converter:
67+
type: haystack.components.converters.docx.DOCXToDocument
68+
init_parameters:
69+
link_format: markdown
70+
71+
pptx_converter:
72+
type: haystack.components.converters.pptx.PPTXToDocument
73+
init_parameters: {}
74+
75+
xlsx_converter:
76+
type: haystack.components.converters.xlsx.XLSXToDocument
77+
init_parameters: {}
78+
79+
csv_converter:
80+
type: haystack.components.converters.csv.CSVToDocument
81+
init_parameters:
82+
encoding: utf-8
83+
84+
joiner:
85+
type: haystack.components.joiners.document_joiner.DocumentJoiner
86+
init_parameters:
87+
join_mode: concatenate
88+
sort_by_score: false
89+
90+
joiner_xlsx: # merge split documents with non-split xlsx documents
91+
type: haystack.components.joiners.document_joiner.DocumentJoiner
92+
init_parameters:
93+
join_mode: concatenate
94+
sort_by_score: false
95+
96+
splitter:
97+
type: haystack.components.preprocessors.document_splitter.DocumentSplitter
98+
init_parameters:
99+
split_by: word
100+
split_length: 250
101+
split_overlap: 30
102+
respect_sentence_boundary: true
103+
language: en
104+
105+
document_embedder:
106+
type: haystack.components.embedders.sentence_transformers_document_embedder.SentenceTransformersDocumentEmbedder
107+
init_parameters:
108+
normalize_embeddings: true
109+
model: intfloat/e5-base-v2
110+
111+
writer:
112+
type: haystack.components.writers.document_writer.DocumentWriter
113+
init_parameters:
114+
document_store:
115+
type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
116+
init_parameters:
117+
hosts:
118+
index: ''
119+
max_chunk_bytes: 104857600
120+
embedding_dim: 768
121+
return_embedding: false
122+
method:
123+
mappings:
124+
settings:
125+
create_index: true
126+
http_auth:
127+
use_ssl:
128+
verify_certs:
129+
timeout:
130+
policy: OVERWRITE
131+
132+
connections: # Defines how the components are connected
133+
- sender: file_classifier.text/plain
134+
receiver: text_converter.sources
135+
- sender: file_classifier.application/pdf
136+
receiver: pdf_converter.sources
137+
- sender: file_classifier.text/markdown
138+
receiver: markdown_converter.sources
139+
- sender: file_classifier.text/html
140+
receiver: html_converter.sources
141+
- sender: file_classifier.application/vnd.openxmlformats-officedocument.wordprocessingml.document
142+
receiver: docx_converter.sources
143+
- sender: file_classifier.application/vnd.openxmlformats-officedocument.presentationml.presentation
144+
receiver: pptx_converter.sources
145+
- sender: file_classifier.application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
146+
receiver: xlsx_converter.sources
147+
- sender: file_classifier.text/csv
148+
receiver: csv_converter.sources
149+
- sender: text_converter.documents
150+
receiver: joiner.documents
151+
- sender: pdf_converter.documents
152+
receiver: joiner.documents
153+
- sender: markdown_converter.documents
154+
receiver: joiner.documents
155+
- sender: html_converter.documents
156+
receiver: joiner.documents
157+
- sender: docx_converter.documents
158+
receiver: joiner.documents
159+
- sender: pptx_converter.documents
160+
receiver: joiner.documents
161+
- sender: joiner.documents
162+
receiver: splitter.documents
163+
- sender: splitter.documents
164+
receiver: joiner_xlsx.documents
165+
- sender: xlsx_converter.documents
166+
receiver: joiner_xlsx.documents
167+
- sender: csv_converter.documents
168+
receiver: joiner_xlsx.documents
169+
- sender: joiner_xlsx.documents
170+
receiver: document_embedder.documents
171+
- sender: document_embedder.documents
172+
receiver: writer.documents
173+
174+
inputs: # Define the inputs for your pipeline
175+
files: # This component will receive the files to index as input
176+
- file_classifier.sources
177+
178+
max_runs_per_component: 100
179+
180+
metadata: {}
181+
"""
182+
}
183+
)
184+
185+
186+
@pytest.fixture
187+
async def index_resource(
188+
client: AsyncDeepsetClient,
189+
test_workspace: str,
190+
) -> IndexResource:
191+
"""Create an IndexResource instance for testing."""
192+
return IndexResource(client=client, workspace=test_workspace)
193+
194+
195+
@pytest.fixture
196+
def default_index_name() -> str:
197+
return "test-index"
198+
199+
200+
@pytest.mark.asyncio
201+
async def test_create_index(
202+
index_resource: IndexResource,
203+
valid_index_config: str,
204+
default_index_name: str,
205+
) -> None:
206+
"""Test creating a new index."""
207+
# Create a new index
208+
config = json.loads(valid_index_config)
209+
await index_resource.create(
210+
name=default_index_name, yaml_config=config["config_yaml"], description="Test index description"
211+
)
212+
213+
# Verify the index was created by retrieving it
214+
index: Index = await index_resource.get(index_name=default_index_name)
215+
216+
assert index.name == default_index_name
217+
assert index.config_yaml == config["config_yaml"]
218+
219+
220+
@pytest.mark.asyncio
221+
async def test_list_indexes(
222+
index_resource: IndexResource,
223+
valid_index_config: str,
224+
) -> None:
225+
"""Test listing indexes with pagination."""
226+
# Create multiple test indexes
227+
config = json.loads(valid_index_config)
228+
index_names = []
229+
for i in range(3):
230+
index_name = f"test-list-index-{i}"
231+
index_names.append(index_name)
232+
await index_resource.create(name=index_name, yaml_config=config["config_yaml"])
233+
234+
# Test listing without pagination
235+
indexes = await index_resource.list(limit=10)
236+
assert len(indexes.data) == 3
237+
238+
# Verify our created indexes are in the list
239+
retrieved_names = [p.name for p in indexes.data]
240+
for name in index_names:
241+
assert name in retrieved_names
242+
243+
# Test pagination
244+
if len(indexes.data) > 1:
245+
# Get the first page with 1 item
246+
first_page = await index_resource.list(limit=1)
247+
assert len(first_page.data) == 1
248+
249+
# Get the second page
250+
second_page = await index_resource.list(page_number=2, limit=1)
251+
assert len(second_page.data) == 1
252+
253+
# Verify they're different indexes
254+
assert first_page.data[0].pipeline_index_id != second_page.data[0].pipeline_index_id
255+
256+
257+
@pytest.mark.asyncio
258+
async def test_get_index(
259+
index_resource: IndexResource,
260+
valid_index_config: str,
261+
default_index_name: str,
262+
) -> None:
263+
"""Test getting a single index by name."""
264+
# Create an index to retrieve
265+
config = json.loads(valid_index_config)
266+
await index_resource.create(name=default_index_name, yaml_config=config["config_yaml"])
267+
268+
# Test getting the index
269+
index: Index = await index_resource.get(index_name=default_index_name)
270+
assert index.name == default_index_name
271+
assert index.config_yaml == config["config_yaml"]
272+
273+
274+
@pytest.mark.asyncio
275+
async def test_update_index(
276+
index_resource: IndexResource,
277+
valid_index_config: str,
278+
) -> None:
279+
"""Test updating an existing index's name and config."""
280+
original_name = "test-update-index-original"
281+
updated_name = "test-update-index-updated"
282+
283+
# Create an index to update
284+
config = json.loads(valid_index_config)
285+
await index_resource.create(name=original_name, yaml_config=config["config_yaml"])
286+
287+
# Update the index name
288+
await index_resource.update(
289+
index_name=original_name,
290+
updated_index_name=updated_name,
291+
)
292+
293+
# Verify the name was updated
294+
updated_index: Index = await index_resource.get(index_name=updated_name)
295+
assert updated_index.name == updated_name
296+
297+
# Update the index config
298+
modified_yaml = config["config_yaml"].replace("split_length: 250", "split_length: 300")
299+
await index_resource.update(
300+
index_name=updated_name,
301+
yaml_config=modified_yaml,
302+
)
303+
304+
# Verify the config was updated
305+
updated_index = await index_resource.get(index_name=updated_name)
306+
assert updated_index.config_yaml == modified_yaml
307+
308+
309+
@pytest.mark.asyncio
310+
async def test_get_nonexistent_index(
311+
index_resource: IndexResource,
312+
) -> None:
313+
"""Test error handling when getting a non-existent index."""
314+
non_existent_name = "non-existent-index"
315+
316+
# Trying to get a non-existent index should raise an exception
317+
with pytest.raises(ResourceNotFoundError):
318+
await index_resource.get(index_name=non_existent_name)

0 commit comments

Comments
 (0)