1212from api .schemas .chunks import Chunk
1313from api .schemas .collections import CollectionVisibility
1414from api .schemas .core .context import RequestContext
15- from api .schemas .documents import Chunker
1615from api .schemas .me .info import UserInfo
1716from api .schemas .usage import Usage
1817from api .utils .exceptions import (
@@ -70,11 +69,13 @@ async def test_create_document_collection_no_longer_exists():
7069 await document_manager .create_document (
7170 collection_id = 123 ,
7271 file = mock_file ,
73- chunker = Chunker .RECURSIVE_CHARACTER_TEXT_SPLITTER ,
72+ name = None ,
73+ disable_chunking = False ,
7474 chunk_size = 1000 ,
7575 chunk_overlap = 100 ,
7676 is_separator_regex = False ,
7777 separators = ["\n \n " , "\n " , " " ],
78+ preset_separators = "markdown" ,
7879 chunk_min_size = 50 ,
7980 metadata = mock_metadata ,
8081 elasticsearch_vector_store = mock_elasticsearch_vector_store ,
@@ -346,7 +347,7 @@ async def test_create_document_success(monkeypatch):
346347
347348 chunks = ["chunk-1" , "chunk-2" ]
348349 document_manager ._split = MagicMock (return_value = chunks )
349- document_manager ._upsert = AsyncMock ()
350+ document_manager ._upsert_document_chunks = AsyncMock ()
350351
351352 mock_file = create_upload_file ("Test content" , "test.txt" , "text/plain" )
352353 mock_metadata = {"source_tags" : ["test" ]}
@@ -374,15 +375,19 @@ async def test_create_document_success(monkeypatch):
374375 collection_id = 123 ,
375376 file = mock_file ,
376377 metadata = mock_metadata ,
377- chunker = Chunker .RECURSIVE_CHARACTER_TEXT_SPLITTER ,
378378 chunk_size = 1000 ,
379379 chunk_overlap = 100 ,
380380 chunk_min_size = 50 ,
381+ name = None ,
382+ disable_chunking = False ,
383+ separators = [],
384+ preset_separators = "markdown" ,
385+ is_separator_regex = False ,
381386 )
382387
383388 assert document_id == 555
384389 document_manager ._split .assert_called_once ()
385- document_manager ._upsert .assert_awaited_once ()
390+ document_manager ._upsert_document_chunks .assert_awaited_once ()
386391 mock_session .commit .assert_awaited_once ()
387392
388393
@@ -559,7 +564,7 @@ async def test_delete_document_success():
559564
560565 assert mock_session .execute .await_count == 2
561566 mock_session .commit .assert_awaited_once ()
562- mock_elasticsearch_vector_store .delete_document .assert_awaited_once_with (client = mock_elasticsearch_client , collection_id = 123 , document_id = 456 )
567+ mock_elasticsearch_vector_store .delete_document .assert_awaited_once_with (client = mock_elasticsearch_client , document_id = 456 )
563568
564569
565570@pytest .mark .asyncio
@@ -618,7 +623,7 @@ async def test_get_chunks_success():
618623
619624 document_manager = DocumentManager (vector_store_model = "test-model" , parser_manager = mock_parser )
620625
621- chunks = await document_manager .get_chunks (
626+ chunks = await document_manager .get_document_chunks (
622627 postgres_session = mock_session ,
623628 elasticsearch_vector_store = mock_elasticsearch_vector_store ,
624629 elasticsearch_client = mock_elasticsearch_client ,
@@ -632,7 +637,7 @@ async def test_get_chunks_success():
632637 assert chunks [0 ].id == 1
633638 assert chunks [1 ].id == 2
634639 mock_elasticsearch_vector_store .get_chunks .assert_awaited_once_with (
635- client = mock_elasticsearch_client , collection_id = 123 , document_id = 456 , offset = 0 , limit = 10 , chunk_id = None
640+ client = mock_elasticsearch_client , document_id = 456 , offset = 0 , limit = 10 , chunk_id = None
636641 )
637642
638643
@@ -653,7 +658,7 @@ async def test_get_chunks_document_not_found():
653658 document_manager = DocumentManager (vector_store_model = "test-model" , parser_manager = mock_parser )
654659
655660 with pytest .raises (DocumentNotFoundException ):
656- await document_manager .get_chunks (
661+ await document_manager .get_document_chunks (
657662 postgres_session = mock_session ,
658663 elasticsearch_vector_store = mock_elasticsearch_vector_store ,
659664 elasticsearch_client = mock_elasticsearch_client ,
@@ -902,10 +907,14 @@ async def test_create_document_parsing_fails():
902907 collection_id = 123 ,
903908 file = mock_file ,
904909 metadata = mock_metadata ,
905- chunker = Chunker .RECURSIVE_CHARACTER_TEXT_SPLITTER ,
906910 chunk_size = 1000 ,
907911 chunk_overlap = 100 ,
908912 chunk_min_size = 50 ,
913+ name = None ,
914+ disable_chunking = False ,
915+ separators = [],
916+ preset_separators = "markdown" ,
917+ is_separator_regex = False ,
909918 )
910919
911920
@@ -957,10 +966,14 @@ async def test_create_document_empty_chunks():
957966 collection_id = 123 ,
958967 file = mock_file ,
959968 metadata = mock_metadata ,
960- chunker = Chunker .RECURSIVE_CHARACTER_TEXT_SPLITTER ,
961969 chunk_size = 1000 ,
962970 chunk_overlap = 100 ,
963971 chunk_min_size = 50 ,
972+ name = None ,
973+ disable_chunking = False ,
974+ separators = [],
975+ preset_separators = "markdown" ,
976+ is_separator_regex = False ,
964977 )
965978
966979 assert "No chunks were extracted" in str (exc_info .value .detail )
@@ -998,8 +1011,8 @@ async def test_create_document_vectorization_fails(monkeypatch):
9981011 chunks = ["chunk-1" , "chunk-2" ]
9991012 document_manager ._split = MagicMock (return_value = chunks )
10001013
1001- # Mock _upsert to fail
1002- document_manager ._upsert = AsyncMock (side_effect = Exception ("Vectorization error" ))
1014+ # Mock chunk upsert to fail
1015+ document_manager ._upsert_document_chunks = AsyncMock (side_effect = Exception ("Vectorization error" ))
10031016
10041017 mock_file = create_upload_file ("Test content" , "test.txt" , "text/plain" )
10051018 mock_metadata = {"source_tags" : ["test" ]}
@@ -1027,16 +1040,20 @@ async def test_create_document_vectorization_fails(monkeypatch):
10271040 collection_id = 123 ,
10281041 file = mock_file ,
10291042 metadata = mock_metadata ,
1030- chunker = Chunker .RECURSIVE_CHARACTER_TEXT_SPLITTER ,
10311043 chunk_size = 1000 ,
10321044 chunk_overlap = 100 ,
10331045 chunk_min_size = 50 ,
1046+ name = None ,
1047+ disable_chunking = False ,
1048+ separators = [],
1049+ preset_separators = "markdown" ,
1050+ is_separator_regex = False ,
10341051 )
10351052
10361053 assert "Vectorization failed" in str (exc_info .value .detail )
10371054 # Verify document was attempted to be deleted from Postgres
10381055 assert mock_session .execute .await_count == 4 # collection check, insert, delete check, delete
1039- mock_elasticsearch_vector_store .delete_document .assert_awaited_once_with (client = mock_elasticsearch_client , collection_id = 123 , document_id = 555 )
1056+ mock_elasticsearch_vector_store .delete_document .assert_awaited_once_with (client = mock_elasticsearch_client , document_id = 555 )
10401057
10411058
10421059@pytest .mark .asyncio
0 commit comments