@@ -179,6 +179,7 @@ def _response_handler(
179179 return str_response
180180 if invoke_response_format == InvokeResponseFormat .USAGE :
181181 tokenizer = self .client .tokenizer
182+ # Messages already be a formatted prompt string
182183 if not isinstance (messages , str ):
183184 try :
184185 messages = tokenizer .apply_chat_template (
@@ -292,23 +293,61 @@ def custom_invoke(
292293 else :
293294 return self .client (** invoke_kwargs )
294295
296+ def _batch_invoke (
297+ self ,
298+ messages_list : list [list [dict ]],
299+ invoke_response_format : InvokeResponseFormat = InvokeResponseFormat .FULL ,
300+ ** invoke_kwargs ,
301+ ) -> list [Union [str , dict , list ]]:
302+ """
303+ Internal batch processing for multiple message lists.
304+
305+ :param messages_list: List of message lists to process in batch.
306+ :param invoke_response_format: Response format (STRING, USAGE, or FULL).
307+ :param invoke_kwargs: Additional kwargs for the pipeline.
308+
309+ :return: List of processed responses.
310+ """
311+ if "batch_size" not in invoke_kwargs :
312+ invoke_kwargs ["batch_size" ] = (
313+ mlrun .mlconf .model_providers .huggingface_default_batch_size
314+ )
315+
316+ batch_response = self .custom_invoke (text_inputs = messages_list , ** invoke_kwargs )
317+
318+ results = []
319+ for i , single_response in enumerate (batch_response ):
320+ processed = self ._response_handler (
321+ messages = messages_list [i ],
322+ response = single_response ,
323+ invoke_response_format = invoke_response_format ,
324+ )
325+ results .append (processed )
326+
327+ return results
328+
295329 def invoke (
296330 self ,
297- messages : Union [str , list [ str ], "ChatType" , list ["ChatType" ]],
331+ messages : Union ["ChatType" , list ["ChatType" ]],
298332 invoke_response_format : InvokeResponseFormat = InvokeResponseFormat .FULL ,
299333 ** invoke_kwargs ,
300334 ) -> Union [str , list , dict [str , Any ]]:
301335 """
302336 HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
303337 Invokes a HuggingFace model operation for text generation tasks.
304338
339+ Supports both single and batch invocations:
340+ - Single invocation: Pass a single ChatType (string or chat format messages)
341+ - Batch invocation: Pass a list of ChatType objects for batch processing
342+
305343 Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
306344
307345 :param messages:
308346 Input for the text generation model. Can be provided in multiple formats:
309347
348+ **Single invocation:**
349+
310350 - A single string: Direct text input for generation
311- - A list of strings: Multiple text inputs for batch processing
312351 - Chat format: A list of dictionaries with "role" and "content" keys:
313352
314353 .. code-block:: json
@@ -318,11 +357,27 @@ def invoke(
318357 {"role": "user", "content": "What is the capital of France?"}
319358 ]
320359
360+ **Batch invocation:**
361+
362+ - List of chat format messages: Multiple chat conversations for batch processing:
363+
364+ .. code-block:: json
365+
366+ [
367+ [
368+ {"role": "user", "content": "What is the capital of France?"}
369+ ],
370+ [
371+ {"role": "user", "content": "What is the capital of Germany?"}
372+ ]
373+ ]
374+
321375 :param invoke_response_format: InvokeResponseFormat
322376 Specifies the format of the returned response. Options:
323377
324- - "string": Returns only the generated text content, extracted from a single response.
325- - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
378+ - "string": Returns only the generated text content. For batch invocations, returns a list of strings.
379+ - "usage": Combines the generated text with metadata (e.g., token usage). For batch invocations,
380+ returns a list of dictionaries:
326381
327382 .. code-block:: json
328383 {
@@ -342,9 +397,12 @@ def invoke(
342397
343398 :param invoke_kwargs:
344399 Additional keyword arguments passed to the HuggingFace pipeline.
400+ For batch invocations, you can specify 'batch_size' to control the batch processing size.
401+ If not provided, defaults to mlrun.mlconf.model_providers.huggingface_default_batch_size.
345402
346403 :return:
347- A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
404+ - Single invocation: A string, dictionary, or list depending on `invoke_response_format`.
405+ - Batch invocation: A list of strings, dictionaries, or lists depending on `invoke_response_format`.
348406
349407 :raises MLRunInvalidArgumentError:
350408 If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
@@ -356,8 +414,19 @@ def invoke(
356414 raise mlrun .errors .MLRunInvalidArgumentError (
357415 "HuggingFaceProvider.invoke supports text-generation task only"
358416 )
417+
359418 if InvokeResponseFormat .is_str_response (invoke_response_format .value ):
360419 invoke_kwargs ["return_full_text" ] = False
420+
421+ is_batch = self ._validate_and_detect_batch_invocation (messages )
422+
423+ if is_batch :
424+ return self ._batch_invoke (
425+ messages_list = messages ,
426+ invoke_response_format = invoke_response_format ,
427+ ** invoke_kwargs ,
428+ )
429+
361430 response = self .custom_invoke (text_inputs = messages , ** invoke_kwargs )
362431 response = self ._response_handler (
363432 messages = messages ,
0 commit comments