SECourses_Musubi_Trainer/image_captioning_defaults.toml at main · FurkanGozukara/SECourses_Musubi_Trainer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Image Captioning Default Configuration
# This file contains default settings for the Image Captioning tab

[image_captioning]
# Model Configuration
model_path = ""  # Path to Qwen2.5-VL model (qwen_2.5_vl_7b.safetensors)
fp8_vl = true  # Use FP8 precision for memory savings (recommended for <24GB VRAM)
max_size = 1280  # Maximum image size for processing

# Caption Generation Settings
max_new_tokens = 1024  # Maximum tokens to generate per caption
prefix = ""  # Text to add before each caption
suffix = ""  # Text to add after each caption
custom_prompt = ""  # Custom prompt (empty = use default)

# Generation Parameters (Optimized for diverse, coherent captions)
do_sample = true  # Enable sampling for text generation
temperature = 0.7  # Controls randomness (0.1=focused, 1.0=creative, 0.7=balanced)
top_k = 50  # Number of top tokens to consider (higher=more diverse vocabulary)
top_p = 0.95  # Cumulative probability threshold (0.95 recommended for coherence)
repetition_penalty = 1.05  # Penalty for repeating tokens (1.0=none, 1.05=slight penalty)

# Word Replacement Settings
replace_words = ""  # Word replacement pairs (format: word1:replacement1;word2:replacement2)
replace_case_insensitive = true  # Replace words regardless of case
replace_whole_words_only = true  # Only replace complete words

# Batch Processing Settings
batch_image_dir = ""  # Directory containing images for batch processing
batch_output_folder = ""  # Output folder for caption files (optional, empty = save alongside images)
output_format = "text"  # Output format: "text" or "jsonl"
jsonl_output_file = ""  # Output file path for JSONL format
scan_subfolders = false  # Include images from all subfolders recursively
copy_images = false  # Copy images to output folder
overwrite_existing_captions = false  # Replace existing caption files

# Advanced Settings
enable_progress_tracking = true  # Show progress during batch processing
validate_images = true  # Validate image files before processing
auto_resize_images = true  # Automatically resize images to optimal size