We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 6962d78 commit 76b357dCopy full SHA for 76b357d
slime/utils/processing_utils.py
@@ -22,7 +22,7 @@ def build_processor_kwargs(multimodal_inputs: dict | None = None) -> dict:
22
23
result = dict(multimodal_inputs) if multimodal_inputs else {}
24
25
- # return_tensors=None for text (input_ids as lists), "pt" for vision/audio tensors
+ # return_tensors=None for text (input_ids as lists), "pt" for modality-specific outputs
26
result["text_kwargs"] = {**result.get("text_kwargs", {}), "return_tensors": None}
27
for key in ("audio_kwargs", "images_kwargs", "videos_kwargs"):
28
if key in result:
0 commit comments