@@ -64,6 +64,7 @@ def file(
6464 interpretTables : Optional [bool ] = False ,
6565 blanklineSplitter : Optional [bool ] = False ,
6666 mimetype : Optional [str ] = None ,
67+ extract_strategy : Optional [str ] = None ,
6768 ** kwargs ,
6869 ) -> Optional [str ]:
6970 """Upload a file from filesystem to a Nuclia KnowledgeBox"""
@@ -98,6 +99,7 @@ def file(
9899 filename = filename ,
99100 content_type = mimetype ,
100101 md5 = md5_hash .hexdigest (),
102+ extract_strategy = extract_strategy ,
101103 )
102104
103105 offset = 0
@@ -197,6 +199,9 @@ def text(
197199 "format" : format ,
198200 }
199201 }
202+ extract_strategy = kwargs .get ("extract_strategy" )
203+ if extract_strategy is not None :
204+ texts [field ]["extract_strategy" ] = extract_strategy
200205 rid , is_new_resource = self ._get_or_create_resource (
201206 texts = texts ,
202207 icon = icon ,
@@ -226,6 +231,9 @@ def link(
226231 "css_selector" : css_selector ,
227232 }
228233 }
234+ extract_strategy = kwargs .get ("extract_strategy" )
235+ if extract_strategy is not None :
236+ links [field ]["extract_strategy" ] = extract_strategy
229237 kwargs ["icon" ] = "application/stf-link"
230238 rid , is_new_resource = self ._get_or_create_resource (
231239 links = links ,
@@ -248,6 +256,7 @@ def remote(
248256 field : Optional [str ] = "file" ,
249257 interpretTables : Optional [bool ] = False ,
250258 blanklineSplitter : Optional [bool ] = False ,
259+ extract_strategy : Optional [str ] = None ,
251260 ** kwargs ,
252261 ) -> str :
253262 """Upload a remote url to a Nuclia KnowledgeBox"""
@@ -279,6 +288,7 @@ def remote(
279288 size = size ,
280289 filename = filename ,
281290 content_type = mimetype ,
291+ extract_strategy = extract_strategy ,
282292 )
283293 offset = 0
284294 for _ in tqdm (range ((size // CHUNK_SIZE ) + 1 )):
@@ -375,6 +385,7 @@ async def file(
375385 mimetype : Optional [str ] = None ,
376386 interpretTables : Optional [bool ] = False ,
377387 blanklineSplitter : Optional [bool ] = False ,
388+ extract_strategy : Optional [str ] = None ,
378389 ** kwargs ,
379390 ) -> str :
380391 """Upload a file from filesystem to a Nuclia KnowledgeBox"""
@@ -407,6 +418,7 @@ async def file(
407418 filename = filename ,
408419 content_type = mimetype ,
409420 md5 = md5_hash .hexdigest (),
421+ extract_strategy = extract_strategy ,
410422 )
411423 offset = 0
412424 for _ in tqdm (range ((size // CHUNK_SIZE ) + 1 )):
@@ -503,6 +515,9 @@ async def text(
503515 "format" : format ,
504516 }
505517 }
518+ extract_strategy = kwargs .get ("extract_strategy" )
519+ if extract_strategy is not None :
520+ texts [field ]["extract_strategy" ] = extract_strategy
506521 rid , is_new_resource = await self ._get_or_create_resource (
507522 texts = texts ,
508523 icon = icon ,
@@ -530,6 +545,9 @@ async def link(
530545 "uri" : uri ,
531546 }
532547 }
548+ extract_strategy = kwargs .get ("extract_strategy" )
549+ if extract_strategy is not None :
550+ links [field ]["extract_strategy" ] = extract_strategy
533551 kwargs ["icon" ] = "application/stf-link"
534552 rid , is_new_resource = await self ._get_or_create_resource (
535553 links = links ,
@@ -552,6 +570,7 @@ async def remote(
552570 field : Optional [str ] = "file" ,
553571 interpretTables : Optional [bool ] = False ,
554572 blanklineSplitter : Optional [bool ] = False ,
573+ extract_strategy : Optional [str ] = None ,
555574 ** kwargs ,
556575 ) -> str :
557576 """Upload a remote url to a Nuclia KnowledgeBox"""
@@ -578,6 +597,7 @@ async def remote(
578597 size = size ,
579598 filename = filename ,
580599 content_type = mimetype ,
600+ extract_strategy = extract_strategy ,
581601 )
582602 offset = 0
583603 with tqdm (total = (size // CHUNK_SIZE ) + 1 ) as p_bar :
0 commit comments