Skip to content

Commit 8863b47

Browse files
tomingtomingGitHub Copilot
andauthored
feat: add chroma_update_documents tool (#17)
Co-authored-by: GitHub Copilot <[email protected]>
1 parent 442090a commit 8863b47

File tree

2 files changed

+226
-2
lines changed

2 files changed

+226
-2
lines changed

src/chroma_mcp/server.py

Lines changed: 76 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,82 @@ async def chroma_get_documents(
392392
limit=limit,
393393
offset=offset
394394
)
395-
395+
396+
@mcp.tool()
397+
async def chroma_update_documents(
398+
collection_name: str,
399+
ids: List[str],
400+
embeddings: Optional[List[List[float]]] = None,
401+
metadatas: Optional[List[Dict]] = None,
402+
documents: Optional[List[str]] = None
403+
) -> str:
404+
"""Update documents in a Chroma collection.
405+
406+
Args:
407+
collection_name: Name of the collection to update documents in
408+
ids: List of document IDs to update (required)
409+
embeddings: Optional list of new embeddings for the documents.
410+
Must match length of ids if provided.
411+
metadatas: Optional list of new metadata dictionaries for the documents.
412+
Must match length of ids if provided.
413+
documents: Optional list of new text documents.
414+
Must match length of ids if provided.
415+
416+
Returns:
417+
A confirmation message indicating the number of documents updated.
418+
419+
Raises:
420+
ValueError: If 'ids' is empty or if none of 'embeddings', 'metadatas',
421+
or 'documents' are provided, or if the length of provided
422+
update lists does not match the length of 'ids'.
423+
Exception: If the collection does not exist or if the update operation fails.
424+
"""
425+
if not ids:
426+
raise ValueError("The 'ids' list cannot be empty.")
427+
428+
if embeddings is None and metadatas is None and documents is None:
429+
raise ValueError(
430+
"At least one of 'embeddings', 'metadatas', or 'documents' "
431+
"must be provided for update."
432+
)
433+
434+
# Ensure provided lists match the length of ids if they are not None
435+
if embeddings is not None and len(embeddings) != len(ids):
436+
raise ValueError("Length of 'embeddings' list must match length of 'ids' list.")
437+
if metadatas is not None and len(metadatas) != len(ids):
438+
raise ValueError("Length of 'metadatas' list must match length of 'ids' list.")
439+
if documents is not None and len(documents) != len(ids):
440+
raise ValueError("Length of 'documents' list must match length of 'ids' list.")
441+
442+
443+
client = get_chroma_client()
444+
try:
445+
collection = client.get_collection(collection_name)
446+
except Exception as e:
447+
raise Exception(
448+
f"Failed to get collection '{collection_name}': {str(e)}"
449+
) from e
450+
451+
# Prepare arguments for update, excluding None values at the top level
452+
update_args = {
453+
"ids": ids,
454+
"embeddings": embeddings,
455+
"metadatas": metadatas,
456+
"documents": documents,
457+
}
458+
kwargs = {k: v for k, v in update_args.items() if v is not None}
459+
460+
try:
461+
collection.update(**kwargs)
462+
return (
463+
f"Successfully processed update request for {len(ids)} documents in "
464+
f"collection '{collection_name}'. Note: Non-existent IDs are ignored by ChromaDB."
465+
)
466+
except Exception as e:
467+
raise Exception(
468+
f"Failed to update documents in collection '{collection_name}': {str(e)}"
469+
) from e
470+
396471
def validate_thought_data(input_data: Dict) -> Dict:
397472
"""Validate thought data structure."""
398473
if not input_data.get("sessionId"):

tests/test_server.py

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import os
66
from unittest.mock import patch, MagicMock
77
import argparse
8+
from mcp.server.fastmcp.exceptions import ToolError # Import ToolError
9+
import json # Import json for parsing results
10+
811

912
# Add pytest-asyncio marker
1013
pytest_plugins = ['pytest_asyncio']
@@ -261,4 +264,150 @@ def test_required_args_for_cloud_client():
261264
# Check that error was called for missing api-key (the first check in the code)
262265
mock_error.assert_called_with(
263266
"API key must be provided via --api-key flag or CHROMA_API_KEY environment variable when using cloud client"
264-
)
267+
)
268+
269+
# --- Tests for chroma_update_documents ---
270+
271+
@pytest.mark.asyncio
272+
async def test_update_documents_success():
273+
"""Test successful document update."""
274+
collection_name = "test_update_collection_success"
275+
doc_ids = ["doc1", "doc2"]
276+
initial_docs = ["Initial doc 1", "Initial doc 2"]
277+
initial_metadatas = [{"source": "initial"}, {"source": "initial"}]
278+
updated_docs = ["Updated doc 1", initial_docs[1]] # Update only first doc content
279+
updated_metadatas = [initial_metadatas[0], {"source": "updated"}] # Update only second doc metadata
280+
281+
try:
282+
# 1. Create collection
283+
await mcp.call_tool("chroma_create_collection", {"collection_name": collection_name})
284+
285+
# 2. Add initial documents
286+
await mcp.call_tool("chroma_add_documents", {
287+
"collection_name": collection_name,
288+
"documents": initial_docs,
289+
"metadatas": initial_metadatas,
290+
"ids": doc_ids
291+
})
292+
293+
# 3. Update documents (pass both documents and metadatas)
294+
update_result = await mcp.call_tool("chroma_update_documents", {
295+
"collection_name": collection_name,
296+
"ids": doc_ids,
297+
"documents": updated_docs,
298+
"metadatas": updated_metadatas
299+
})
300+
assert len(update_result) == 1
301+
# Updated success message check
302+
assert (
303+
f"Successfully processed update request for {len(doc_ids)} documents"
304+
in update_result[0].text
305+
)
306+
307+
# 4. Verify updates
308+
get_result_raw = await mcp.call_tool("chroma_get_documents", {
309+
"collection_name": collection_name,
310+
"ids": doc_ids,
311+
"include": ["documents", "metadatas"]
312+
})
313+
# Corrected: Parse the JSON string from TextContent
314+
assert len(get_result_raw) == 1
315+
get_result = json.loads(get_result_raw[0].text)
316+
assert isinstance(get_result, dict)
317+
318+
assert get_result.get("ids") == doc_ids
319+
# Check updated document content
320+
assert get_result.get("documents") == updated_docs
321+
# Check updated metadata
322+
assert get_result.get("metadatas") == updated_metadatas
323+
324+
finally:
325+
# Clean up
326+
await mcp.call_tool("chroma_delete_collection", {"collection_name": collection_name})
327+
328+
@pytest.mark.asyncio
329+
async def test_update_documents_invalid_args():
330+
"""Test update documents with invalid arguments."""
331+
collection_name = "test_update_collection_invalid"
332+
333+
try:
334+
await mcp.call_tool("chroma_create_collection", {"collection_name": collection_name})
335+
await mcp.call_tool("chroma_add_documents", {
336+
"collection_name": collection_name,
337+
"documents": ["Test doc"],
338+
"ids": ["doc1"]
339+
})
340+
341+
# Test with empty IDs list - Expect ToolError wrapping ValueError
342+
with pytest.raises(ToolError, match="The 'ids' list cannot be empty."):
343+
await mcp.call_tool("chroma_update_documents", {
344+
"collection_name": collection_name,
345+
"ids": [],
346+
"documents": ["New content"]
347+
})
348+
349+
# Test with no update fields provided - Expect ToolError wrapping ValueError
350+
with pytest.raises(
351+
ToolError,
352+
match="At least one of 'embeddings', 'metadatas', or 'documents' must be provided"
353+
):
354+
await mcp.call_tool("chroma_update_documents", {
355+
"collection_name": collection_name,
356+
"ids": ["doc1"]
357+
# No embeddings, metadatas, or documents
358+
})
359+
360+
finally:
361+
# Clean up
362+
await mcp.call_tool("chroma_delete_collection", {"collection_name": collection_name})
363+
364+
@pytest.mark.asyncio
365+
async def test_update_documents_collection_not_found():
366+
"""Test updating documents in a non-existent collection."""
367+
# Expect ToolError wrapping the Exception from the function
368+
with pytest.raises(ToolError, match="Failed to get collection"):
369+
await mcp.call_tool("chroma_update_documents", {
370+
"collection_name": "non_existent_collection",
371+
"ids": ["doc1"],
372+
"documents": ["New content"]
373+
})
374+
375+
@pytest.mark.asyncio
376+
async def test_update_documents_id_not_found():
377+
"""Test updating a document with an ID that does not exist. Expect no exception."""
378+
collection_name = "test_update_id_not_found"
379+
try:
380+
await mcp.call_tool("chroma_create_collection", {"collection_name": collection_name})
381+
await mcp.call_tool("chroma_add_documents", {
382+
"collection_name": collection_name,
383+
"documents": ["Test doc"],
384+
"ids": ["existing_id"]
385+
})
386+
387+
# Attempt to update a non-existent ID - should not raise Exception
388+
update_result = await mcp.call_tool("chroma_update_documents", {
389+
"collection_name": collection_name,
390+
"ids": ["non_existent_id"],
391+
"documents": ["New content"]
392+
})
393+
# Check the success message (even though the ID didn't exist)
394+
assert len(update_result) == 1
395+
assert "Successfully processed update request" in update_result[0].text
396+
397+
# Optionally, verify that the existing document was not changed
398+
get_result_raw = await mcp.call_tool("chroma_get_documents", {
399+
"collection_name": collection_name,
400+
"ids": ["existing_id"],
401+
"include": ["documents"]
402+
})
403+
# Corrected assertion: Parse JSON and check structure/content
404+
assert len(get_result_raw) == 1
405+
get_result = json.loads(get_result_raw[0].text)
406+
assert isinstance(get_result, dict)
407+
assert "documents" in get_result
408+
assert isinstance(get_result["documents"], list)
409+
assert get_result["documents"] == ["Test doc"]
410+
411+
finally:
412+
# Clean up
413+
await mcp.call_tool("chroma_delete_collection", {"collection_name": collection_name})

0 commit comments

Comments
 (0)