QUICK_START not working

Hello

The quickstart example is not working out of the box.
See output below

```
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 34
     31 min_pixels = 256 * 28 * 28
     32 max_pixels = 1344 * 28 * 28
---> 34 processor = AutoProcessor.from_pretrained(
     35     "showlab/ShowUI-2B", min_pixels=min_pixels, max_pixels=max_pixels
     36 )

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    372     return processor_class.from_pretrained(
    373         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    374     )
    375 elif processor_class is not None:
--> 376     return processor_class.from_pretrained(
    377         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    378     )
    379 # Last try: we use the PROCESSOR_MAPPING.
    380 elif type(config) in PROCESSOR_MAPPING:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1185, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
   1182 if token is not None:
   1183     kwargs["token"] = token
-> 1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
   1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
   1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1248, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
   1245     else:
   1246         attribute_class = cls.get_possibly_dynamic_module(class_name)
-> 1248     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
   1249 return args

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:564, in AutoImageProcessor.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    562     return image_processor_class.from_dict(config_dict, **kwargs)
    563 elif image_processor_class is not None:
--> 564     return image_processor_class.from_dict(config_dict, **kwargs)
    565 # Last try: we use the IMAGE_PROCESSOR_MAPPING.
    566 elif type(config) in IMAGE_PROCESSOR_MAPPING:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_base.py:422, in ImageProcessingMixin.from_dict(cls, image_processor_dict, **kwargs)
    419 if "crop_size" in kwargs and "crop_size" in image_processor_dict:
    420     image_processor_dict["crop_size"] = kwargs.pop("crop_size")
--> 422 image_processor = cls(**image_processor_dict)
    424 # Update image_processor with kwargs if needed
    425 to_remove = []

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:143, in Qwen2VLImageProcessor.__init__(self, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_convert_rgb, min_pixels, max_pixels, patch_size, temporal_patch_size, merge_size, **kwargs)
    141 super().__init__(**kwargs)
    142 if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
--> 143     raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
    144 else:
    145     size = {"shortest_edge": 56 * 56, "longest_edge": 28 * 28 * 1280}

ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.
```

Adding `size={"shortest_edge": 256, "longest_edge": 1024}` in the Autoprocessor.from_pretrained solve the issue.

But then the following issue arises:

```
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 28
     22 text = processor.apply_chat_template(
     23     messages,
     24     tokenize=False,
     25     add_generation_prompt=True,
     26 )
     27 image_inputs, video_inputs = process_vision_info(messages)
---> 28 inputs = processor(
     29     text=[text],
     30     images=image_inputs,
     31     videos=video_inputs,
     32     padding=True,
     33     return_tensors="pt",
     34 )
     35 inputs = inputs.to("cuda")
     37 generated_ids = model.generate(**inputs, max_new_tokens=128)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/processing_qwen2_vl.py:142, in Qwen2VLProcessor.__call__(self, images, text, videos, **kwargs)
    140 image_inputs = videos_inputs = {}
    141 if images is not None:
--> 142     image_inputs = self.image_processor(images=images, **output_kwargs["images_kwargs"])
    143     image_grid_thw = image_inputs["image_grid_thw"]
    145 if videos is not None:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_utils.py:44, in BaseImageProcessor.__call__(self, images, **kwargs)
     42 def __call__(self, images, **kwargs) -> BatchFeature:
     43     """Preprocess an image or a batch of images."""
---> 44     return self.preprocess(images, **kwargs)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:432, in Qwen2VLImageProcessor.preprocess(self, images, videos, do_resize, size, min_pixels, max_pixels, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, return_tensors, data_format, input_data_format)
    430 pixel_values, vision_grid_thws = [], []
    431 for image in images:
--> 432     patches, image_grid_thw = self._preprocess(
    433         image,
    434         do_resize=do_resize,
    435         size=size,
    436         resample=resample,
    437         do_rescale=do_rescale,
    438         rescale_factor=rescale_factor,
    439         do_normalize=do_normalize,
    440         image_mean=image_mean,
    441         image_std=image_std,
    442         patch_size=patch_size,
    443         temporal_patch_size=temporal_patch_size,
    444         merge_size=merge_size,
    445         data_format=data_format,
    446         do_convert_rgb=do_convert_rgb,
    447         input_data_format=input_data_format,
    448     )
    449     pixel_values.extend(patches)
    450     vision_grid_thws.append(image_grid_thw)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:258, in Qwen2VLImageProcessor._preprocess(self, images, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, data_format, input_data_format)
    250 if do_resize:
    251     resized_height, resized_width = smart_resize(
    252         height,
    253         width,
   (...)
    256         max_pixels=size["longest_edge"],
    257     )
--> 258     image = resize(
    259         image, size=(resized_height, resized_width), resample=resample, input_data_format=input_data_format
    260     )
    262 if do_rescale:
    263     image = self.rescale(image, scale=rescale_factor, input_data_format=input_data_format)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_transforms.py:376, in resize(image, size, resample, reducing_gap, data_format, return_numpy, input_data_format)
    374 height, width = size
    375 # PIL images are in the format (width, height)
--> 376 resized_image = image.resize((width, height), resample=resample, reducing_gap=reducing_gap)
    378 if return_numpy:
    379     resized_image = np.array(resized_image)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/PIL/Image.py:2316, in Image.resize(self, size, resample, box, reducing_gap)
   2304         self = (
   2305             self.reduce(factor, box=reduce_box)
   2306             if callable(self.reduce)
   2307             else Image.reduce(self, factor, box=reduce_box)
   2308         )
   2309         box = (
   2310             (box[0] - reduce_box[0]) / factor_x,
   2311             (box[1] - reduce_box[1]) / factor_y,
   2312             (box[2] - reduce_box[0]) / factor_x,
   2313             (box[3] - reduce_box[1]) / factor_y,
   2314         )
-> 2316 return self._new(self.im.resize(size, resample, box))

ValueError: height and width must be > 0
```

Could the QUICK_START example be updated/completed ?
Thanks

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

QUICK_START not working #90

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

QUICK_START not working #90

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions