Nerogar · Nerogar · May 27, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 28, 2025
diff --git a/modules/dataLoader/mixin/DataLoaderText2ImageMixin.py b/modules/dataLoader/mixin/DataLoaderText2ImageMixin.py
@@ -31,6 +31,7 @@
 from mgds.pipelineModules.RandomRotate import RandomRotate
 from mgds.pipelineModules.RandomSaturation import RandomSaturation
 from mgds.pipelineModules.ScaleCropImage import ScaleCropImage
+from mgds.pipelineModules.SelectFirstInput import SelectFirstInput
 from mgds.pipelineModules.SelectInput import SelectInput
 from mgds.pipelineModules.SelectRandomText import SelectRandomText
 from mgds.pipelineModules.ShuffleTags import ShuffleTags
@@ -55,16 +56,19 @@ def _enumerate_input_modules(self, config: TrainConfig, allow_videos: bool = Fal
         collect_paths = CollectPaths(
             concept_in_name='concept', path_in_name='path', include_subdirectories_in_name='concept.include_subdirectories', enabled_in_name='enabled',
             path_out_name='image_path', concept_out_name='concept',
-            extensions=supported_extensions, include_postfix=None, exclude_postfix=['-masklabel']
+            extensions=supported_extensions, include_postfix=None, exclude_postfix=['-masklabel','-condlabel']
         )
 
         mask_path = ModifyPath(in_name='image_path', out_name='mask_path', postfix='-masklabel', extension='.png')
+        cond_path = ModifyPath(in_name='image_path', out_name='cond_path', postfix='-condlabel', extension='.png')
         sample_prompt_path = ModifyPath(in_name='image_path', out_name='sample_prompt_path', postfix='', extension='.txt')
 
         modules = [collect_paths, sample_prompt_path]
 
         if config.masked_training:
             modules.append(mask_path)
+        if config.custom_conditioning_image:
+            modules.append(cond_path)
 
         return modules
 
@@ -82,6 +86,8 @@ def _load_input_modules(
         load_mask = LoadImage(path_in_name='mask_path', image_out_name='mask', range_min=0, range_max=1, channels=1, supported_extensions={".png"}, dtype=train_dtype.torch_dtype())
         mask_to_video = ImageToVideo(in_name='mask', out_name='mask')
 
+        load_cond_image = LoadImage(path_in_name='cond_path', image_out_name='custom_conditioning_image', range_min=0, range_max=1, supported_extensions=path_util.supported_image_extensions(), dtype=train_dtype.torch_dtype())
+
         load_sample_prompts = LoadMultipleTexts(path_in_name='sample_prompt_path', texts_out_name='sample_prompts')
         load_concept_prompts = LoadMultipleTexts(path_in_name='concept.text.prompt_path', texts_out_name='concept_prompts')
         filename_prompt = GetFilename(path_in_name='image_path', filename_out_name='filename_prompt', include_extension=False)
@@ -105,6 +111,9 @@ def _load_input_modules(
         elif config.model_type.has_mask_input():
             modules.append(generate_mask)
 
+        if config.custom_conditioning_image:
+            modules.append(load_cond_image)
+
         if allow_video:
             modules.append(mask_to_video)
 
@@ -171,6 +180,9 @@ def _crop_modules(self, config: TrainConfig):
         if config.model_type.has_depth_input():
             inputs.append('depth')
 
+        if config.custom_conditioning_image:
+            inputs.append('custom_conditioning_image')
+
         scale_crop = ScaleCropImage(names=inputs, scale_resolution_in_name='scale_resolution', crop_resolution_in_name='crop_resolution', enable_crop_jitter_in_name='concept.image.enable_crop_jitter', crop_offset_out_name='crop_offset')
 
         modules = [scale_crop]
@@ -179,19 +191,27 @@ def _crop_modules(self, config: TrainConfig):
 
     def _augmentation_modules(self, config: TrainConfig):
         inputs = ['image']
+        image_inputs = ['image']
 
         if config.masked_training or config.model_type.has_mask_input():
             inputs.append('mask')
 
         if config.model_type.has_depth_input():
             inputs.append('depth')
 
+        if config.custom_conditioning_image:
+            inputs.append('custom_conditioning_image')
+            image_inputs.append('custom_conditioning_image')
+
+        # image augmentations
         random_flip = RandomFlip(names=inputs, enabled_in_name='concept.image.enable_random_flip', fixed_enabled_in_name='concept.image.enable_fixed_flip')
         random_rotate = RandomRotate(names=inputs, enabled_in_name='concept.image.enable_random_rotate', fixed_enabled_in_name='concept.image.enable_fixed_rotate', max_angle_in_name='concept.image.random_rotate_max_angle')
-        random_brightness = RandomBrightness(names=['image'], enabled_in_name='concept.image.enable_random_brightness', fixed_enabled_in_name='concept.image.enable_fixed_brightness', max_strength_in_name='concept.image.random_brightness_max_strength')
-        random_contrast = RandomContrast(names=['image'], enabled_in_name='concept.image.enable_random_contrast', fixed_enabled_in_name='concept.image.enable_fixed_contrast', max_strength_in_name='concept.image.random_contrast_max_strength')
-        random_saturation = RandomSaturation(names=['image'], enabled_in_name='concept.image.enable_random_saturation', fixed_enabled_in_name='concept.image.enable_fixed_saturation', max_strength_in_name='concept.image.random_saturation_max_strength')
-        random_hue = RandomHue(names=['image'], enabled_in_name='concept.image.enable_random_hue', fixed_enabled_in_name='concept.image.enable_fixed_hue', max_strength_in_name='concept.image.random_hue_max_strength')
+        random_brightness = RandomBrightness(names=image_inputs, enabled_in_name='concept.image.enable_random_brightness', fixed_enabled_in_name='concept.image.enable_fixed_brightness', max_strength_in_name='concept.image.random_brightness_max_strength')
+        random_contrast = RandomContrast(names=image_inputs, enabled_in_name='concept.image.enable_random_contrast', fixed_enabled_in_name='concept.image.enable_fixed_contrast', max_strength_in_name='concept.image.random_contrast_max_strength')
+        random_saturation = RandomSaturation(names=image_inputs, enabled_in_name='concept.image.enable_random_saturation', fixed_enabled_in_name='concept.image.enable_fixed_saturation', max_strength_in_name='concept.image.random_saturation_max_strength')
+        random_hue = RandomHue(names=image_inputs, enabled_in_name='concept.image.enable_random_hue', fixed_enabled_in_name='concept.image.enable_fixed_hue', max_strength_in_name='concept.image.random_hue_max_strength')
+
+        # text augmentations
         drop_tags = DropTags(text_in_name='prompt', enabled_in_name='concept.text.tag_dropout_enable', probability_in_name='concept.text.tag_dropout_probability', dropout_mode_in_name='concept.text.tag_dropout_mode',
                              special_tags_in_name='concept.text.tag_dropout_special_tags', special_tag_mode_in_name='concept.text.tag_dropout_special_tags_mode', delimiter_in_name='concept.text.tag_delimiter',
                              keep_tags_count_in_name='concept.text.keep_tags_count', text_out_name='prompt', regex_enabled_in_name='concept.text.tag_dropout_special_tags_regex')
@@ -215,11 +235,13 @@ def _augmentation_modules(self, config: TrainConfig):
 
     def _inpainting_modules(self, config: TrainConfig):
         conditioning_image = GenerateMaskedConditioningImage(image_in_name='image', mask_in_name='mask', image_out_name='conditioning_image', image_range_min=0, image_range_max=1)
+        select_conditioning_image = SelectFirstInput(in_names=['custom_conditioning_image', 'conditioning_image'], out_name='conditioning_image')
 
         modules = []
 
         if config.model_type.has_conditioning_image_input():
             modules.append(conditioning_image)
+            modules.append(select_conditioning_image)
 
         return modules
 

diff --git a/modules/ui/CaptionUI.py b/modules/ui/CaptionUI.py
@@ -225,7 +225,7 @@ def load_directory(self, include_subdirectories: bool = False):
     def scan_directory(self, include_subdirectories: bool = False):
         def __is_supported_image_extension(filename):
             name, ext = os.path.splitext(filename)
-            return path_util.is_supported_image_extension(ext) and not name.endswith("-masklabel")
+            return path_util.is_supported_image_extension(ext) and not name.endswith("-masklabel") and not name.endswith("-condlabel")
 
         self.image_rel_paths = []
 

diff --git a/modules/ui/ConceptTab.py b/modules/ui/ConceptTab.py
@@ -128,7 +128,7 @@ def __get_preview_image(self):
             for path in pathlib.Path(self.concept.path).glob(glob_pattern):
                 extension = os.path.splitext(path)[1]
                 if path.is_file() and path_util.is_supported_image_extension(extension) \
-                        and not path.name.endswith("-masklabel.png"):
+                        and not path.name.endswith("-masklabel.png") and not path.name.endswith("-condlabel.png"):
                     preview_path = path_util.canonical_join(self.concept.path, path)
                     break
 

diff --git a/modules/ui/ConceptWindow.py b/modules/ui/ConceptWindow.py
@@ -384,7 +384,7 @@ def __concept_stats_tab(self, master):
         #basic img/vid stats - count of each type in the concept
         #the \n at the start of the label gives it better vertical spacing with other rows
         self.image_count_label = components.label(frame, 3, 0, "\nTotal Images", pad=0,
-                         tooltip="Total number of image files, any of the extensions " + str(path_util.SUPPORTED_IMAGE_EXTENSIONS) + ", excluding '-masklabel.png'")
+                         tooltip="Total number of image files, any of the extensions " + str(path_util.SUPPORTED_IMAGE_EXTENSIONS) + ", excluding '-masklabel.png and -condlabel.png'")
         self.image_count_label.configure(font=ctk.CTkFont(underline=True))
         self.image_count_preview = components.label(frame, 4, 0, pad=0, text="-")
         self.video_count_label = components.label(frame, 3, 1, "\nTotal Videos", pad=0,
@@ -552,7 +552,7 @@ def __get_preview_image(self):
             for path in pathlib.Path(self.concept.path).glob(glob_pattern):
                 extension = os.path.splitext(path)[1]
                 if path.is_file() and path_util.is_supported_image_extension(extension) \
-                        and not path.name.endswith("-masklabel.png"):
+                        and not path.name.endswith("-masklabel.png") and not path.name.endswith("-condlabel.png"):
                     preview_image_path = path_util.canonical_join(self.concept.path, path)
                     file_index += 1
                     if file_index == self.image_preview_file_index:

diff --git a/modules/ui/TrainingTab.py b/modules/ui/TrainingTab.py
@@ -614,6 +614,11 @@ def __create_masked_frame(self, master, row):
                          tooltip="Preserves regions outside the mask using the original untrained model output as a target. Only available for LoRA training. If enabled, use a low unmasked weight.")
         components.entry(frame, 4, 1, self.ui_state, "masked_prior_preservation_weight")
 
+        # use custom conditioning image
+        components.label(frame, 5, 0, "Custom Conditioning Image",
+                         tooltip="When custom conditioning image is enabled, will use png postfix with -condlabel instead of automatically generated.It's suitable for special scenarios, such as object removal, allowing the model to learn a certain behavior concept")
+        components.switch(frame, 5, 1, self.ui_state, "custom_conditioning_image")
+
     def __create_loss_frame(self, master, row, supports_vb_loss: bool = False):
         frame = ctk.CTkFrame(master=master, corner_radius=5)
         frame.grid(row=row, column=0, padx=5, pady=5, sticky="nsew")

diff --git a/modules/util/concept_stats.py b/modules/util/concept_stats.py
@@ -91,7 +91,7 @@ def folder_scan(dir, stats_dict : dict, advanced_checks : bool, conceptconfig :
 
     for path in file_list:
         basename, extension = os.path.splitext(path)
-        if extension.lower() in img_extensions_list and not path.name.endswith("-masklabel.png"):
+        if extension.lower() in img_extensions_list and not path.name.endswith("-masklabel.png") and not path.name.endswith("-condlabel.png"):
             stats_dict["image_count"] += 1
             stats_dict["file_size"] += path.stat().st_size
             if advanced_checks:

diff --git a/modules/util/config/TrainConfig.py b/modules/util/config/TrainConfig.py
@@ -393,6 +393,9 @@ class TrainConfig(BaseConfig):
     normalize_masked_area_loss: bool
     masked_prior_preservation_weight: float
 
+    # custom conditioning image
+    custom_conditioning_image: bool
+
     # embedding
     embedding_learning_rate: float
     preserve_embedding_norm: bool
@@ -933,6 +936,7 @@ def default_values() -> 'TrainConfig':
         data.append(("unmasked_weight", 0.1, float, False))
         data.append(("normalize_masked_area_loss", False, bool, False))
         data.append(("masked_prior_preservation_weight", 0.0, float, False))
+        data.append(("custom_conditioning_image", False, bool, False))
 
         # embedding
         data.append(("embedding_learning_rate", None, float, True))

diff --git a/requirements-global.txt b/requirements-global.txt
@@ -33,7 +33,7 @@ pooch==1.8.2
 open-clip-torch==2.30.0
 
 # data loader
--e git+https://github.com/Nerogar/mgds.git@ecc1c76#egg=mgds
+-e git+https://github.com/Nerogar/mgds.git@11ff4aa#egg=mgds
 
 # optimizers
 dadaptation==3.2 # dadaptation optimizers