28
28
to_async_generator ,
29
29
)
30
30
from r2r .pipes import R2REvalPipe
31
+ from r2r .telemetry .telemetry_decorator import telemetry_event
31
32
32
33
from .r2r_abstractions import R2RPipelines , R2RProviders
33
34
from .r2r_config import R2RConfig
@@ -258,6 +259,7 @@ class UpdatePromptRequest(BaseModel):
258
259
template : Optional [str ] = None
259
260
input_types : Optional [dict [str , str ]] = None
260
261
262
+ @telemetry_event ("UpdatePrompt" )
261
263
async def update_prompt_app (self , request : UpdatePromptRequest ):
262
264
"""Update a prompt's template and/or input types."""
263
265
try :
@@ -289,7 +291,27 @@ async def aingest_documents(
289
291
)
290
292
291
293
document_infos = []
294
+ skipped_documents = []
295
+ processed_documents = []
296
+ existing_document_ids = [
297
+ str (doc_info .document_id )
298
+ for doc_info in self .providers .vector_db .get_documents_info ()
299
+ ]
300
+
292
301
for iteration , document in enumerate (documents ):
302
+ if (
303
+ version is not None
304
+ and str (document .id ) in existing_document_ids
305
+ ):
306
+ logger .error (f"Document with ID { document .id } already exists." )
307
+ if len (documents ) == 1 :
308
+ raise HTTPException (
309
+ status_code = 409 ,
310
+ detail = f"Document with ID { document .id } already exists." ,
311
+ )
312
+ skipped_documents .append (document .title or str (document .id ))
313
+ continue
314
+
293
315
document_metadata = (
294
316
metadatas [iteration ] if metadatas else document .metadata
295
317
)
@@ -319,24 +341,62 @@ async def aingest_documents(
319
341
)
320
342
)
321
343
344
+ processed_documents .append (document .title or str (document .id ))
345
+
346
+ if skipped_documents and len (skipped_documents ) == len (documents ):
347
+ logger .error ("All provided documents already exist." )
348
+ raise HTTPException (
349
+ status_code = 409 ,
350
+ detail = "All provided documents already exist. Use the update endpoint to update these documents." ,
351
+ )
352
+
353
+ if skipped_documents :
354
+ logger .warning (
355
+ f"Skipped ingestion for the following documents since they already exist: { ', ' .join (skipped_documents )} . Use the update endpoint to update these documents."
356
+ )
357
+
322
358
await self .ingestion_pipeline .run (
323
- input = to_async_generator (documents ),
324
- versions = versions ,
359
+ input = to_async_generator (
360
+ [
361
+ doc
362
+ for doc in documents
363
+ if str (doc .id ) not in existing_document_ids
364
+ ]
365
+ ),
366
+ versions = [
367
+ info .version
368
+ for info in document_infos
369
+ if info .created_at == info .updated_at
370
+ ],
325
371
run_manager = self .run_manager ,
326
372
)
327
373
328
374
self .providers .vector_db .upsert_documents_info (document_infos )
329
- return {"results" : "Entries upserted successfully." }
375
+ return {
376
+ "processed_documents" : [
377
+ f"Document '{ title } ' processed successfully."
378
+ for title in processed_documents
379
+ ],
380
+ "skipped_documents" : [
381
+ f"Document '{ title } ' skipped since it already exists."
382
+ for title in skipped_documents
383
+ ],
384
+ }
330
385
331
386
class IngestDocumentsRequest (BaseModel ):
332
387
documents : list [Document ]
333
388
389
+ @telemetry_event ("IngestDocuments" )
334
390
async def ingest_documents_app (self , request : IngestDocumentsRequest ):
335
391
async with manage_run (
336
392
self .run_manager , "ingest_documents_app"
337
393
) as run_id :
338
394
try :
339
395
return await self .aingest_documents (request .documents )
396
+
397
+ except HTTPException as he :
398
+ raise he
399
+
340
400
except Exception as e :
341
401
await self .logging_connection .log (
342
402
log_id = run_id ,
@@ -423,6 +483,7 @@ async def aupdate_documents(
423
483
class UpdateDocumentsRequest (BaseModel ):
424
484
documents : list [Document ]
425
485
486
+ @telemetry_event ("UpdateDocuments" )
426
487
async def update_documents_app (self , request : UpdateDocumentsRequest ):
427
488
async with manage_run (
428
489
self .run_manager , "update_documents_app"
@@ -445,10 +506,7 @@ async def update_documents_app(self, request: UpdateDocumentsRequest):
445
506
logger .error (
446
507
f"update_documents_app(documents={ request .documents } ) - \n \n { str (e )} )"
447
508
)
448
- logger .error (
449
- f"update_documents_app(documents={ request .documents } ) - \n \n { str (e )} )"
450
- )
451
- raise HTTPException (status_code = 500 , detail = str (e ))
509
+ raise HTTPException (status_code = 500 , detail = str (e )) from e
452
510
453
511
@syncable
454
512
async def aingest_files (
@@ -482,6 +540,12 @@ async def aingest_files(
482
540
try :
483
541
documents = []
484
542
document_infos = []
543
+ skipped_documents = []
544
+ processed_documents = []
545
+ existing_document_ids = [
546
+ str (doc_info .document_id )
547
+ for doc_info in self .providers .vector_db .get_documents_info ()
548
+ ]
485
549
486
550
for iteration , file in enumerate (files ):
487
551
logger .info (f"Processing file: { file .filename } " )
@@ -522,14 +586,27 @@ async def aingest_files(
522
586
detail = f"{ file_extension } is explicitly excluded in the configuration file." ,
523
587
)
524
588
525
- file_content = await file .read ()
526
- logger .info (f"File read successfully: { file .filename } " )
527
-
528
589
document_id = (
529
590
generate_id_from_label (file .filename )
530
591
if document_ids is None
531
592
else document_ids [iteration ]
532
593
)
594
+ if (
595
+ version is not None
596
+ and str (document_id ) in existing_document_ids
597
+ ):
598
+ logger .error (f"File with ID { document_id } already exists." )
599
+ if len (files ) == 1 :
600
+ raise HTTPException (
601
+ status_code = 409 ,
602
+ detail = f"File with ID { document_id } already exists." ,
603
+ )
604
+ skipped_documents .append (file .filename )
605
+ continue
606
+
607
+ file_content = await file .read ()
608
+ logger .info (f"File read successfully: { file .filename } " )
609
+
533
610
document_metadata = metadatas [iteration ] if metadatas else {}
534
611
document_title = (
535
612
document_metadata .get ("title" , None ) or file .filename
@@ -567,7 +644,21 @@ async def aingest_files(
567
644
)
568
645
)
569
646
570
- # Run the pipeline asynchronously with filtered documents
647
+ processed_documents .append (file .filename )
648
+
649
+ if skipped_documents and len (skipped_documents ) == len (files ):
650
+ logger .error ("All uploaded documents already exist." )
651
+ raise HTTPException (
652
+ status_code = 409 ,
653
+ detail = "All uploaded documents already exist. Use the update endpoint to update these documents." ,
654
+ )
655
+
656
+ if skipped_documents :
657
+ logger .warning (
658
+ f"Skipped ingestion for the following documents since they already exist: { ', ' .join (skipped_documents )} . Use the update endpoint to update these documents."
659
+ )
660
+
661
+ # Run the pipeline asynchronously
571
662
await self .ingestion_pipeline .run (
572
663
input = to_async_generator (documents ),
573
664
versions = versions ,
@@ -578,8 +669,14 @@ async def aingest_files(
578
669
self .providers .vector_db .upsert_documents_info (document_infos )
579
670
580
671
return {
581
- "results" : f"File '{ file } ' processed successfully."
582
- for file in document_infos
672
+ "processed_documents" : [
673
+ f"File '{ filename } ' processed successfully."
674
+ for filename in processed_documents
675
+ ],
676
+ "skipped_documents" : [
677
+ f"File '{ filename } ' skipped since it already exists."
678
+ for filename in skipped_documents
679
+ ],
583
680
}
584
681
except Exception as e :
585
682
raise e
@@ -588,6 +685,7 @@ async def aingest_files(
588
685
for file in files :
589
686
file .file .close ()
590
687
688
+ @telemetry_event ("IngestFiles" )
591
689
async def ingest_files_app (
592
690
self ,
593
691
files : list [UploadFile ] = File (...),
@@ -756,6 +854,7 @@ class UpdateFilesRequest(BaseModel):
756
854
metadatas : Optional [str ] = Form (None )
757
855
ids : str = Form ("" )
758
856
857
+ @telemetry_event ("UpdateFiles" )
759
858
async def update_files_app (
760
859
self ,
761
860
files : list [UploadFile ] = File (...),
@@ -845,6 +944,7 @@ class SearchRequest(BaseModel):
845
944
search_limit : int = 10
846
945
do_hybrid_search : Optional [bool ] = False
847
946
947
+ @telemetry_event ("Search" )
848
948
async def search_app (self , request : SearchRequest ):
849
949
async with manage_run (self .run_manager , "search_app" ) as run_id :
850
950
try :
@@ -960,6 +1060,7 @@ class RAGRequest(BaseModel):
960
1060
rag_generation_config : Optional [str ] = None
961
1061
streaming : Optional [bool ] = None
962
1062
1063
+ @telemetry_event ("RAG" )
963
1064
async def rag_app (self , request : RAGRequest ):
964
1065
async with manage_run (self .run_manager , "rag_app" ) as run_id :
965
1066
try :
@@ -1069,6 +1170,7 @@ class EvalRequest(BaseModel):
1069
1170
context : str
1070
1171
completion : str
1071
1172
1173
+ @telemetry_event ("Evaluate" )
1072
1174
async def evaluate_app (self , request : EvalRequest ):
1073
1175
async with manage_run (self .run_manager , "evaluate_app" ) as run_id :
1074
1176
try :
@@ -1110,6 +1212,7 @@ class DeleteRequest(BaseModel):
1110
1212
keys : list [str ]
1111
1213
values : list [Union [bool , int , str ]]
1112
1214
1215
+ @telemetry_event ("Delete" )
1113
1216
async def delete_app (self , request : DeleteRequest = Body (...)):
1114
1217
try :
1115
1218
return await self .adelete (request .keys , request .values )
@@ -1168,6 +1271,7 @@ async def alogs(
1168
1271
1169
1272
return {"results" : aggregated_logs }
1170
1273
1274
+ @telemetry_event ("Logs" )
1171
1275
async def logs_app (
1172
1276
self ,
1173
1277
log_type_filter : Optional [str ] = Query (None ),
@@ -1236,27 +1340,27 @@ async def aanalytics(
1236
1340
analysis_type = analysis_config [0 ]
1237
1341
if analysis_type == "bar_chart" :
1238
1342
extract_key = analysis_config [1 ]
1239
- results [
1240
- filter_key
1241
- ] = AnalysisTypes . generate_bar_chart_data (
1242
- filtered_logs [ filter_key ], extract_key
1343
+ results [filter_key ] = (
1344
+ AnalysisTypes . generate_bar_chart_data (
1345
+ filtered_logs [ filter_key ], extract_key
1346
+ )
1243
1347
)
1244
1348
elif analysis_type == "basic_statistics" :
1245
1349
extract_key = analysis_config [1 ]
1246
- results [
1247
- filter_key
1248
- ] = AnalysisTypes . calculate_basic_statistics (
1249
- filtered_logs [ filter_key ], extract_key
1350
+ results [filter_key ] = (
1351
+ AnalysisTypes . calculate_basic_statistics (
1352
+ filtered_logs [ filter_key ], extract_key
1353
+ )
1250
1354
)
1251
1355
elif analysis_type == "percentile" :
1252
1356
extract_key = analysis_config [1 ]
1253
1357
percentile = int (analysis_config [2 ])
1254
- results [
1255
- filter_key
1256
- ] = AnalysisTypes . calculate_percentile (
1257
- filtered_logs [ filter_key ] ,
1258
- extract_key ,
1259
- percentile ,
1358
+ results [filter_key ] = (
1359
+ AnalysisTypes . calculate_percentile (
1360
+ filtered_logs [ filter_key ],
1361
+ extract_key ,
1362
+ percentile ,
1363
+ )
1260
1364
)
1261
1365
else :
1262
1366
logger .warning (
@@ -1265,6 +1369,7 @@ async def aanalytics(
1265
1369
1266
1370
return {"results" : results }
1267
1371
1372
+ @telemetry_event ("Analytics" )
1268
1373
async def analytics_app (
1269
1374
self ,
1270
1375
filter_criteria : FilterCriteria = Body (...),
@@ -1292,6 +1397,7 @@ async def aapp_settings(self, *args: Any, **kwargs: Any):
1292
1397
}
1293
1398
}
1294
1399
1400
+ @telemetry_event ("AppSettings" )
1295
1401
async def app_settings_app (self ):
1296
1402
"""Return the config.json and all prompts."""
1297
1403
try :
@@ -1306,6 +1412,7 @@ async def ausers_stats(self, user_ids: Optional[list[uuid.UUID]] = None):
1306
1412
[str (ele ) for ele in user_ids ]
1307
1413
)
1308
1414
1415
+ @telemetry_event ("UsersStats" )
1309
1416
async def users_stats_app (
1310
1417
self , user_ids : Optional [list [uuid .UUID ]] = Query (None )
1311
1418
):
@@ -1335,6 +1442,7 @@ async def adocuments_info(
1335
1442
),
1336
1443
)
1337
1444
1445
+ @telemetry_event ("DocumentsInfo" )
1338
1446
async def documents_info_app (
1339
1447
self ,
1340
1448
document_ids : Optional [list [str ]] = Query (None ),
@@ -1355,6 +1463,7 @@ async def documents_info_app(
1355
1463
async def adocument_chunks (self , document_id : str ) -> list [str ]:
1356
1464
return self .providers .vector_db .get_document_chunks (document_id )
1357
1465
1466
+ @telemetry_event ("DocumentChunks" )
1358
1467
async def document_chunks_app (self , document_id : str ):
1359
1468
try :
1360
1469
chunks = await self .adocument_chunks (document_id )
@@ -1365,6 +1474,7 @@ async def document_chunks_app(self, document_id: str):
1365
1474
)
1366
1475
raise HTTPException (status_code = 500 , detail = str (e )) from e
1367
1476
1477
+ @telemetry_event ("OpenAPI" )
1368
1478
def openapi_spec_app (self ):
1369
1479
from fastapi .openapi .utils import get_openapi
1370
1480
0 commit comments