diff --git a/torchbenchmark/models/hf_clip/__init__.py b/torchbenchmark/models/hf_clip/__init__.py
index f2920eef68..33e8bfd809 100644
--- a/torchbenchmark/models/hf_clip/__init__.py
+++ b/torchbenchmark/models/hf_clip/__init__.py
@@ -51,9 +51,9 @@ def __init__(self, test, device, batch_size=1, extra_args=[]):
text = "the dog is here"
images = [image] * self.batch_size
texts = [text] * self.batch_size
- self.inputs = processor(
+ self.inputs = dict(processor(
text=texts, images=images, return_tensors="pt", padding=True
- )
+ ))
# dict_keys(['input_ids', 'attention_mask', 'pixel_values'])
for key in self.inputs: