-
Notifications
You must be signed in to change notification settings - Fork 131
Open
Description
Hello
The quickstart example is not working out of the box.
See output below
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[1], line 34
31 min_pixels = 256 * 28 * 28
32 max_pixels = 1344 * 28 * 28
---> 34 processor = AutoProcessor.from_pretrained(
35 "showlab/ShowUI-2B", min_pixels=min_pixels, max_pixels=max_pixels
36 )
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
372 return processor_class.from_pretrained(
373 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
374 )
375 elif processor_class is not None:
--> 376 return processor_class.from_pretrained(
377 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
378 )
379 # Last try: we use the PROCESSOR_MAPPING.
380 elif type(config) in PROCESSOR_MAPPING:
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1185, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1182 if token is not None:
1183 kwargs["token"] = token
-> 1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1248, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
1245 else:
1246 attribute_class = cls.get_possibly_dynamic_module(class_name)
-> 1248 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
1249 return args
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:564, in AutoImageProcessor.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
562 return image_processor_class.from_dict(config_dict, **kwargs)
563 elif image_processor_class is not None:
--> 564 return image_processor_class.from_dict(config_dict, **kwargs)
565 # Last try: we use the IMAGE_PROCESSOR_MAPPING.
566 elif type(config) in IMAGE_PROCESSOR_MAPPING:
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_base.py:422, in ImageProcessingMixin.from_dict(cls, image_processor_dict, **kwargs)
419 if "crop_size" in kwargs and "crop_size" in image_processor_dict:
420 image_processor_dict["crop_size"] = kwargs.pop("crop_size")
--> 422 image_processor = cls(**image_processor_dict)
424 # Update image_processor with kwargs if needed
425 to_remove = []
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:143, in Qwen2VLImageProcessor.__init__(self, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_convert_rgb, min_pixels, max_pixels, patch_size, temporal_patch_size, merge_size, **kwargs)
141 super().__init__(**kwargs)
142 if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
--> 143 raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
144 else:
145 size = {"shortest_edge": 56 * 56, "longest_edge": 28 * 28 * 1280}
ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.
Adding size={"shortest_edge": 256, "longest_edge": 1024} in the Autoprocessor.from_pretrained solve the issue.
But then the following issue arises:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 28
22 text = processor.apply_chat_template(
23 messages,
24 tokenize=False,
25 add_generation_prompt=True,
26 )
27 image_inputs, video_inputs = process_vision_info(messages)
---> 28 inputs = processor(
29 text=[text],
30 images=image_inputs,
31 videos=video_inputs,
32 padding=True,
33 return_tensors="pt",
34 )
35 inputs = inputs.to("cuda")
37 generated_ids = model.generate(**inputs, max_new_tokens=128)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/processing_qwen2_vl.py:142, in Qwen2VLProcessor.__call__(self, images, text, videos, **kwargs)
140 image_inputs = videos_inputs = {}
141 if images is not None:
--> 142 image_inputs = self.image_processor(images=images, **output_kwargs["images_kwargs"])
143 image_grid_thw = image_inputs["image_grid_thw"]
145 if videos is not None:
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_utils.py:44, in BaseImageProcessor.__call__(self, images, **kwargs)
42 def __call__(self, images, **kwargs) -> BatchFeature:
43 """Preprocess an image or a batch of images."""
---> 44 return self.preprocess(images, **kwargs)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:432, in Qwen2VLImageProcessor.preprocess(self, images, videos, do_resize, size, min_pixels, max_pixels, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, return_tensors, data_format, input_data_format)
430 pixel_values, vision_grid_thws = [], []
431 for image in images:
--> 432 patches, image_grid_thw = self._preprocess(
433 image,
434 do_resize=do_resize,
435 size=size,
436 resample=resample,
437 do_rescale=do_rescale,
438 rescale_factor=rescale_factor,
439 do_normalize=do_normalize,
440 image_mean=image_mean,
441 image_std=image_std,
442 patch_size=patch_size,
443 temporal_patch_size=temporal_patch_size,
444 merge_size=merge_size,
445 data_format=data_format,
446 do_convert_rgb=do_convert_rgb,
447 input_data_format=input_data_format,
448 )
449 pixel_values.extend(patches)
450 vision_grid_thws.append(image_grid_thw)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:258, in Qwen2VLImageProcessor._preprocess(self, images, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, data_format, input_data_format)
250 if do_resize:
251 resized_height, resized_width = smart_resize(
252 height,
253 width,
(...)
256 max_pixels=size["longest_edge"],
257 )
--> 258 image = resize(
259 image, size=(resized_height, resized_width), resample=resample, input_data_format=input_data_format
260 )
262 if do_rescale:
263 image = self.rescale(image, scale=rescale_factor, input_data_format=input_data_format)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_transforms.py:376, in resize(image, size, resample, reducing_gap, data_format, return_numpy, input_data_format)
374 height, width = size
375 # PIL images are in the format (width, height)
--> 376 resized_image = image.resize((width, height), resample=resample, reducing_gap=reducing_gap)
378 if return_numpy:
379 resized_image = np.array(resized_image)
File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/PIL/Image.py:2316, in Image.resize(self, size, resample, box, reducing_gap)
2304 self = (
2305 self.reduce(factor, box=reduce_box)
2306 if callable(self.reduce)
2307 else Image.reduce(self, factor, box=reduce_box)
2308 )
2309 box = (
2310 (box[0] - reduce_box[0]) / factor_x,
2311 (box[1] - reduce_box[1]) / factor_y,
2312 (box[2] - reduce_box[0]) / factor_x,
2313 (box[3] - reduce_box[1]) / factor_y,
2314 )
-> 2316 return self._new(self.im.resize(size, resample, box))
ValueError: height and width must be > 0
Could the QUICK_START example be updated/completed ?
Thanks
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels