fix(api-inference): feature-extraction, flatten array, discard the batch size dim

oOraph · oOraph · commit 39db7c6a4682 · 2025-05-09T11:14:55.000+02:00
Signed-off-by: Raphael Glon &lt;oOraph@users.noreply.github.com&gt;
diff --git a/src/huggingface_inference_toolkit/handler.py b/src/huggingface_inference_toolkit/handler.py
@@ -4,6 +4,7 @@
 
 from huggingface_inference_toolkit.const import HF_TRUST_REMOTE_CODE
 from huggingface_inference_toolkit.env_utils import api_inference_compat
+from huggingface_inference_toolkit import logging
 from huggingface_inference_toolkit.sentence_transformers_utils import SENTENCE_TRANSFORMERS_TASKS
 from huggingface_inference_toolkit.utils import (
     check_and_register_custom_pipeline_from_directory,
@@ -106,17 +107,52 @@ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
             if self.pipeline.task == "text-classification" and isinstance(inputs, str):
                 inputs = [inputs]
                 parameters.setdefault("top_k", os.environ.get("DEFAULT_TOP_K", 5))
-                resp = self.pipeline(inputs, **parameters)
-                # # We don't want to return {}
+            if self.pipeline.task == "token-classification":
+                parameters.setdefault("aggregation_strategy", os.environ.get("DEFAULT_AGGREGATION_STRATEGY", "simple"))
+
+        resp = self.pipeline(**inputs, **parameters) if isinstance(inputs, dict) else \
+            self.pipeline(inputs, **parameters)
+
+        if api_inference_compat():
+            if self.pipeline.task == "text-classification":
+                # We don't want to return {} but [{}] in any case
                 if isinstance(resp, list) and len(resp) > 0:
                     if not isinstance(resp[0], list):
                         return [resp]
                 return resp
-            if self.pipeline.task == "token-classification":
-                parameters.setdefault("aggregation_strategy", os.environ.get("DEFAULT_AGGREGATION_STRATEGY", "simple"))
-        return (
-            self.pipeline(**inputs, **parameters) if isinstance(inputs, dict) else self.pipeline(inputs, **parameters)  # type: ignore
-        )
+            if self.pipeline.task == "feature-extraction":
+                # If the library used is Transformers then the feature-extraction is returning the headless encoder
+                # outputs as embeddings. The shape is a 3D or 4D array
+                # [n_inputs, batch_size = 1, n_sentence_tokens, num_hidden_dim].
+                # Let's just discard the batch size dim that always seems to be 1 and return a 2D/3D array
+                # https://github.com/huggingface/transformers/blob/5c47d08b0d6835b8d8fc1c06d9a1bc71f6e78ace/src/transformers/pipelines/feature_extraction.py#L27
+                # for api inference (reason: mainly display)
+                new_resp = []
+                if isinstance(inputs, list):
+                    if isinstance(resp, list) and len(resp) == len(inputs):
+                        for it in resp:
+                            # Batch size dim is the first it level, dicard it
+                            if isinstance(it, list) and len(it) == 1:
+                                new_resp.append(it[0])
+                            else:
+                                logging.logger.warning("One of the output batch size differs from 1: %d", len(it))
+                                return resp
+                        return new_resp
+                    else:
+                        logging.logger.warning("Inputs and resp len differ (or resp is not a list, type %s)",
+                                               type(resp))
+                        return resp
+                elif isinstance(inputs, str):
+                    if isinstance(resp, list) and len(resp) == 1:
+                        return resp[0]
+                    else:
+                        logging.logger.warning("The output batch size differs from 1: %d", len(resp))
+                        return resp
+                else:
+                    logging.logger.warning("Output unexpected type %s", type(resp))
+                    return resp
+
+        return resp
 
 
 class VertexAIHandler(HuggingFaceHandler):