Skip to content

QUICK_START not working #90

@lesurJ

Description

@lesurJ

Hello

The quickstart example is not working out of the box.
See output below

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[1], line 34
     31 min_pixels = 256 * 28 * 28
     32 max_pixels = 1344 * 28 * 28
---> 34 processor = AutoProcessor.from_pretrained(
     35     "showlab/ShowUI-2B", min_pixels=min_pixels, max_pixels=max_pixels
     36 )

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    372     return processor_class.from_pretrained(
    373         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    374     )
    375 elif processor_class is not None:
--> 376     return processor_class.from_pretrained(
    377         pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
    378     )
    379 # Last try: we use the PROCESSOR_MAPPING.
    380 elif type(config) in PROCESSOR_MAPPING:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1185, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
   1182 if token is not None:
   1183     kwargs["token"] = token
-> 1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
   1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
   1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/processing_utils.py:1248, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
   1245     else:
   1246         attribute_class = cls.get_possibly_dynamic_module(class_name)
-> 1248     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
   1249 return args

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/auto/image_processing_auto.py:564, in AutoImageProcessor.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
    562     return image_processor_class.from_dict(config_dict, **kwargs)
    563 elif image_processor_class is not None:
--> 564     return image_processor_class.from_dict(config_dict, **kwargs)
    565 # Last try: we use the IMAGE_PROCESSOR_MAPPING.
    566 elif type(config) in IMAGE_PROCESSOR_MAPPING:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_base.py:422, in ImageProcessingMixin.from_dict(cls, image_processor_dict, **kwargs)
    419 if "crop_size" in kwargs and "crop_size" in image_processor_dict:
    420     image_processor_dict["crop_size"] = kwargs.pop("crop_size")
--> 422 image_processor = cls(**image_processor_dict)
    424 # Update image_processor with kwargs if needed
    425 to_remove = []

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:143, in Qwen2VLImageProcessor.__init__(self, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, do_convert_rgb, min_pixels, max_pixels, patch_size, temporal_patch_size, merge_size, **kwargs)
    141 super().__init__(**kwargs)
    142 if size is not None and ("shortest_edge" not in size or "longest_edge" not in size):
--> 143     raise ValueError("size must contain 'shortest_edge' and 'longest_edge' keys.")
    144 else:
    145     size = {"shortest_edge": 56 * 56, "longest_edge": 28 * 28 * 1280}

ValueError: size must contain 'shortest_edge' and 'longest_edge' keys.

Adding size={"shortest_edge": 256, "longest_edge": 1024} in the Autoprocessor.from_pretrained solve the issue.

But then the following issue arises:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[4], line 28
     22 text = processor.apply_chat_template(
     23     messages,
     24     tokenize=False,
     25     add_generation_prompt=True,
     26 )
     27 image_inputs, video_inputs = process_vision_info(messages)
---> 28 inputs = processor(
     29     text=[text],
     30     images=image_inputs,
     31     videos=video_inputs,
     32     padding=True,
     33     return_tensors="pt",
     34 )
     35 inputs = inputs.to("cuda")
     37 generated_ids = model.generate(**inputs, max_new_tokens=128)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/processing_qwen2_vl.py:142, in Qwen2VLProcessor.__call__(self, images, text, videos, **kwargs)
    140 image_inputs = videos_inputs = {}
    141 if images is not None:
--> 142     image_inputs = self.image_processor(images=images, **output_kwargs["images_kwargs"])
    143     image_grid_thw = image_inputs["image_grid_thw"]
    145 if videos is not None:

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_processing_utils.py:44, in BaseImageProcessor.__call__(self, images, **kwargs)
     42 def __call__(self, images, **kwargs) -> BatchFeature:
     43     """Preprocess an image or a batch of images."""
---> 44     return self.preprocess(images, **kwargs)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:432, in Qwen2VLImageProcessor.preprocess(self, images, videos, do_resize, size, min_pixels, max_pixels, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, return_tensors, data_format, input_data_format)
    430 pixel_values, vision_grid_thws = [], []
    431 for image in images:
--> 432     patches, image_grid_thw = self._preprocess(
    433         image,
    434         do_resize=do_resize,
    435         size=size,
    436         resample=resample,
    437         do_rescale=do_rescale,
    438         rescale_factor=rescale_factor,
    439         do_normalize=do_normalize,
    440         image_mean=image_mean,
    441         image_std=image_std,
    442         patch_size=patch_size,
    443         temporal_patch_size=temporal_patch_size,
    444         merge_size=merge_size,
    445         data_format=data_format,
    446         do_convert_rgb=do_convert_rgb,
    447         input_data_format=input_data_format,
    448     )
    449     pixel_values.extend(patches)
    450     vision_grid_thws.append(image_grid_thw)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/models/qwen2_vl/image_processing_qwen2_vl.py:258, in Qwen2VLImageProcessor._preprocess(self, images, do_resize, size, resample, do_rescale, rescale_factor, do_normalize, image_mean, image_std, patch_size, temporal_patch_size, merge_size, do_convert_rgb, data_format, input_data_format)
    250 if do_resize:
    251     resized_height, resized_width = smart_resize(
    252         height,
    253         width,
   (...)
    256         max_pixels=size["longest_edge"],
    257     )
--> 258     image = resize(
    259         image, size=(resized_height, resized_width), resample=resample, input_data_format=input_data_format
    260     )
    262 if do_rescale:
    263     image = self.rescale(image, scale=rescale_factor, input_data_format=input_data_format)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/transformers/image_transforms.py:376, in resize(image, size, resample, reducing_gap, data_format, return_numpy, input_data_format)
    374 height, width = size
    375 # PIL images are in the format (width, height)
--> 376 resized_image = image.resize((width, height), resample=resample, reducing_gap=reducing_gap)
    378 if return_numpy:
    379     resized_image = np.array(resized_image)

File ~/Documents/perso/showUI/workenv/lib/python3.10/site-packages/PIL/Image.py:2316, in Image.resize(self, size, resample, box, reducing_gap)
   2304         self = (
   2305             self.reduce(factor, box=reduce_box)
   2306             if callable(self.reduce)
   2307             else Image.reduce(self, factor, box=reduce_box)
   2308         )
   2309         box = (
   2310             (box[0] - reduce_box[0]) / factor_x,
   2311             (box[1] - reduce_box[1]) / factor_y,
   2312             (box[2] - reduce_box[0]) / factor_x,
   2313             (box[3] - reduce_box[1]) / factor_y,
   2314         )
-> 2316 return self._new(self.im.resize(size, resample, box))

ValueError: height and width must be > 0

Could the QUICK_START example be updated/completed ?
Thanks

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions