@@ -777,19 +777,22 @@ def generate(self,
777777 if standin :
778778 from preprocessing .face_preprocessor import FaceProcessor
779779 standin_ref_pos = 1 if "K" in video_prompt_type else 0
780- if len (original_input_ref_images ) < standin_ref_pos + 1 : raise Exception ("Missing Standin ref image" )
781- standin_ref_pos = - 1
782- image_ref = original_input_ref_images [standin_ref_pos ]
783- image_ref .save ("si.png" )
784- # face_processor = FaceProcessor(antelopv2_path="ckpts/antelopev2")
785- face_processor = FaceProcessor ()
786- standin_ref = face_processor .process (image_ref , remove_bg = model_type in ["vace_standin_14B" ])
787- face_processor = None
788- gc .collect ()
789- torch .cuda .empty_cache ()
790- standin_freqs = get_nd_rotary_pos_embed ((- 1 , int (target_shape [- 2 ]/ 2 ), int (target_shape [- 1 ]/ 2 ) ), (- 1 , int (target_shape [- 2 ]/ 2 + standin_ref .height / 16 ), int (target_shape [- 1 ]/ 2 + standin_ref .width / 16 ) ))
791- standin_ref = self .vae .encode ([ convert_image_to_tensor (standin_ref ).unsqueeze (1 ) ], VAE_tile_size )[0 ].unsqueeze (0 )
792- kwargs .update ({ "standin_freqs" : standin_freqs , "standin_ref" : standin_ref , })
780+ if len (original_input_ref_images ) < standin_ref_pos + 1 :
781+ if "I" in video_prompt_type :
782+ print ("Warning: Missing Standin ref image, make sure 'Inject only People / Objets' is selected or if there is 'Landscape and then People or Objects' there are at least two ref images." )
783+ else :
784+ standin_ref_pos = - 1
785+ image_ref = original_input_ref_images [standin_ref_pos ]
786+ image_ref .save ("si.png" )
787+ # face_processor = FaceProcessor(antelopv2_path="ckpts/antelopev2")
788+ face_processor = FaceProcessor ()
789+ standin_ref = face_processor .process (image_ref , remove_bg = model_type in ["vace_standin_14B" ])
790+ face_processor = None
791+ gc .collect ()
792+ torch .cuda .empty_cache ()
793+ standin_freqs = get_nd_rotary_pos_embed ((- 1 , int (target_shape [- 2 ]/ 2 ), int (target_shape [- 1 ]/ 2 ) ), (- 1 , int (target_shape [- 2 ]/ 2 + standin_ref .height / 16 ), int (target_shape [- 1 ]/ 2 + standin_ref .width / 16 ) ))
794+ standin_ref = self .vae .encode ([ convert_image_to_tensor (standin_ref ).unsqueeze (1 ) ], VAE_tile_size )[0 ].unsqueeze (0 )
795+ kwargs .update ({ "standin_freqs" : standin_freqs , "standin_ref" : standin_ref , })
793796
794797
795798 # Steps Skipping
0 commit comments