1818
1919from mmore .index .indexer import Indexer
2020from mmore .rag .model import DenseModelConfig , SparseModelConfig
21- from mmore .run_index_api import make_router as make_index_router
21+ from mmore .run_index_api import (
22+ _apply_uploaded_file_metadata ,
23+ )
24+ from mmore .run_index_api import (
25+ make_router as make_index_router ,
26+ )
2227from mmore .run_retriever import make_router , save_results
23- from mmore .type import MultimodalSample
28+ from mmore .type import DocumentMetadata , MultimodalSample
2429
2530_COLLECTION = "my_docs"
2631
@@ -216,13 +221,16 @@ def test_save_results_writes_valid_json(tmp_path):
216221 docs = [
217222 Document (
218223 page_content = "Paris is the capital." ,
219- metadata = {
220- "rank" : 1 ,
221- "similarity" : 0.9 ,
222- "id" : "1" ,
223- "page_numbers" : [],
224- "paragraph_numbers" : [],
225- },
224+ metadata = DocumentMetadata (
225+ file_path = "paris.txt" ,
226+ extra = {
227+ "rank" : 1 ,
228+ "similarity" : 0.9 ,
229+ "id" : "1" ,
230+ "page_numbers" : [],
231+ "paragraph_numbers" : [],
232+ },
233+ ).to_dict (),
226234 )
227235 ]
228236 results = [docs ]
@@ -246,25 +254,31 @@ def test_save_results_multiple_queries(tmp_path):
246254 [
247255 Document (
248256 page_content = "doc A" ,
249- metadata = {
250- "rank" : 1 ,
251- "similarity" : 0.8 ,
252- "id" : "a" ,
253- "page_numbers" : [],
254- "paragraph_numbers" : [],
255- },
257+ metadata = DocumentMetadata (
258+ file_path = "doc-a.txt" ,
259+ extra = {
260+ "rank" : 1 ,
261+ "similarity" : 0.8 ,
262+ "id" : "a" ,
263+ "page_numbers" : [],
264+ "paragraph_numbers" : [],
265+ },
266+ ).to_dict (),
256267 )
257268 ],
258269 [
259270 Document (
260271 page_content = "doc B" ,
261- metadata = {
262- "rank" : 1 ,
263- "similarity" : 0.7 ,
264- "id" : "b" ,
265- "page_numbers" : [],
266- "paragraph_numbers" : [],
267- },
272+ metadata = DocumentMetadata (
273+ file_path = "doc-b.txt" ,
274+ extra = {
275+ "rank" : 1 ,
276+ "similarity" : 0.7 ,
277+ "id" : "b" ,
278+ "page_numbers" : [],
279+ "paragraph_numbers" : [],
280+ },
281+ ).to_dict (),
268282 )
269283 ],
270284 ]
@@ -294,10 +308,20 @@ def _fake_doc(file_path: str, document_id: str = "doc") -> MultimodalSample:
294308 document_id = document_id ,
295309 text = "Test document content." ,
296310 modalities = [],
297- metadata = { " file_path" : file_path } ,
311+ metadata = DocumentMetadata ( file_path = file_path ) ,
298312 )
299313
300314
315+ def test_apply_uploaded_file_metadata_preserves_chunk_suffix ():
316+ doc = _fake_doc ("/tmp/original-name.txt" , document_id = "default-doc" )
317+
318+ _apply_uploaded_file_metadata ([doc ], "client-doc" , "original-name.txt" )
319+
320+ assert doc .document_id == "client-doc"
321+ assert doc .id == "client-doc+0"
322+ assert doc .metadata .extra ["filename" ] == "original-name.txt"
323+
324+
301325@pytest .fixture (scope = "module" )
302326def indexer_client (tmp_path_factory ):
303327 """Builds the indexer FastAPI app."""
@@ -392,6 +416,81 @@ def test_upload_file_success(indexer_client):
392416 assert Path (upload_dir , "new-doc" ).exists ()
393417
394418
419+ def test_uploaded_file_has_filename_in_list_files (tmp_path ):
420+ upload_dir = tmp_path / "uploads"
421+ upload_dir .mkdir ()
422+ db_path = str (tmp_path / "uploaded_list_files.db" )
423+ config_file = tmp_path / "config.yaml"
424+ cfg = {
425+ "db" : {"uri" : db_path , "name" : "my_db" },
426+ "hybrid_search_weight" : 0.5 ,
427+ "k" : 2 ,
428+ "collection_name" : _COLLECTION ,
429+ "use_web" : False ,
430+ "reranker_model_name" : None ,
431+ }
432+ with open (config_file , "w" ) as f :
433+ yaml .dump (cfg , f )
434+
435+ with ExitStack () as stack :
436+ stack .enter_context (
437+ patch (
438+ "mmore.index.indexer.SparseModel.from_config" ,
439+ return_value = FakeSparseEmbedding (),
440+ )
441+ )
442+ milvus_client = MilvusClient (db_path , enable_sparse = True )
443+ the_indexer = Indexer (
444+ dense_model_config = DenseModelConfig (model_name = "debug" ),
445+ sparse_model_config = SparseModelConfig (
446+ model_name = "naver/splade-cocondenser-selfdistil"
447+ ),
448+ client = milvus_client ,
449+ )
450+ stack .enter_context (patch ("mmore.run_index_api.UPLOAD_DIR" , str (upload_dir )))
451+ stack .enter_context (patch ("mmore.run_index_api.register_all_processors" ))
452+ stack .enter_context (
453+ patch ("mmore.run_index_api.get_indexer" , return_value = the_indexer )
454+ )
455+
456+ index_app = FastAPI ()
457+ index_app .include_router (make_index_router (str (config_file )))
458+ index_client = TestClient (index_app , raise_server_exceptions = False )
459+
460+ uploaded_path = str (upload_dir / "listed-doc.txt" )
461+ stack .enter_context (
462+ patch (
463+ "mmore.run_index_api.process_files_default" ,
464+ return_value = [_fake_doc (uploaded_path )],
465+ )
466+ )
467+ response = index_client .post (
468+ "/v1/files" ,
469+ data = {"fileId" : "listed-doc" },
470+ files = {"file" : ("listed-doc.txt" , b"Hello list files" , "text/plain" )},
471+ )
472+ assert response .status_code == 201
473+
474+ stack .enter_context (
475+ patch (
476+ "mmore.rag.retriever.SparseModel.from_config" ,
477+ return_value = FakeSparseEmbedding (),
478+ )
479+ )
480+ retriever_app = FastAPI ()
481+ retriever_app .include_router (make_router (str (config_file )))
482+ retriever_client = TestClient (retriever_app )
483+
484+ response = retriever_client .get (
485+ "/list_files" , params = {"collection_name" : _COLLECTION }
486+ )
487+
488+ assert response .status_code == 200
489+ files_by_id = {file ["id" ]: file ["filename" ] for file in response .json ()}
490+ assert files_by_id ["listed-doc" ] == "listed-doc.txt"
491+ assert files_by_id ["listed-doc" ] != "Unknown"
492+
493+
395494def test_upload_duplicate_file_returns_400 (indexer_client ):
396495 tc , upload_dir , _ = indexer_client
397496 duplicate_id = "duplicate-doc"
0 commit comments