3333 vllm serve Qwen/Qwen-Image --omni --port 8091
3434
3535Search by using: `--help=<ConfigGroup>` to explore options by section (e.g.,
36- --help=ModelConfig, --help=Frontend )
36+ --help=OmniConfig )
3737 Use `--help=all` to show all available flags at once.
3838"""
3939
@@ -70,74 +70,81 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
7070
7171 serve_parser = make_arg_parser (serve_parser )
7272 serve_parser .epilog = VLLM_SUBCMD_PARSER_EPILOG .format (subcmd = self .name )
73- serve_parser .add_argument (
73+
74+ # Create OmniConfig argument group for omni-related parameters
75+ # This ensures the parameters appear in --help output
76+ omni_config_group = serve_parser .add_argument_group (
77+ title = "OmniConfig" , description = "Configuration for vLLM-Omni multi-stage and diffusion models."
78+ )
79+
80+ omni_config_group .add_argument (
7481 "--omni" ,
7582 action = "store_true" ,
7683 help = "Enable vLLM-Omni mode for multi-modal and diffusion models" ,
7784 )
78- serve_parser .add_argument (
85+ omni_config_group .add_argument (
7986 "--stage-configs-path" ,
8087 type = str ,
8188 default = None ,
8289 help = "Path to the stage configs file. If not specified, the stage configs will be loaded from the model." ,
8390 )
84- serve_parser .add_argument (
91+ omni_config_group .add_argument (
8592 "--stage-init-timeout" ,
8693 type = int ,
8794 default = 300 ,
8895 help = "The timeout for initializing a single stage in seconds (default: 300)" ,
8996 )
90- serve_parser .add_argument (
97+ omni_config_group .add_argument (
9198 "--init-timeout" ,
9299 type = int ,
93100 default = 60000 ,
94101 help = "The timeout for initializing the stages." ,
95102 )
96- serve_parser .add_argument (
103+ omni_config_group .add_argument (
97104 "--shm-threshold-bytes" ,
98105 type = int ,
99106 default = 65536 ,
100107 help = "The threshold for the shared memory size." ,
101108 )
102- serve_parser .add_argument (
109+ omni_config_group .add_argument (
103110 "--log-stats" ,
104111 action = "store_true" ,
105112 help = "Enable logging the stats." ,
106113 )
107- serve_parser .add_argument (
114+ omni_config_group .add_argument (
108115 "--log-file" ,
109116 type = str ,
110117 default = None ,
111118 help = "The path to the log file." ,
112119 )
113- serve_parser .add_argument (
120+ omni_config_group .add_argument (
114121 "--batch-timeout" ,
115122 type = int ,
116123 default = 10 ,
117124 help = "The timeout for the batch." ,
118125 )
119- serve_parser .add_argument (
126+ omni_config_group .add_argument (
120127 "--worker-backend" ,
121128 type = str ,
122129 default = "multi_process" ,
123130 choices = ["multi_process" , "ray" ],
124131 help = "The backend to use for stage workers." ,
125132 )
126- serve_parser .add_argument (
133+ omni_config_group .add_argument (
127134 "--ray-address" ,
128135 type = str ,
129136 default = None ,
130137 help = "The address of the Ray cluster to connect to." ,
131138 )
132139
133140 # Diffusion model specific arguments
134- serve_parser .add_argument (
141+ omni_config_group .add_argument (
135142 "--num-gpus" ,
136143 type = int ,
137144 default = None ,
138145 help = "Number of GPUs to use for diffusion model inference." ,
139146 )
140- serve_parser .add_argument (
147+ omni_config_group .add_argument (
141148 "--usp" ,
142149 "--ulysses-degree" ,
143150 dest = "ulysses_degree" ,
@@ -146,7 +153,7 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
146153 help = "Ulysses Sequence Parallelism degree for diffusion models. "
147154 "Equivalent to setting DiffusionParallelConfig.ulysses_degree." ,
148155 )
149- serve_parser .add_argument (
156+ omni_config_group .add_argument (
150157 "--ring" ,
151158 dest = "ring_degree" ,
152159 type = int ,
@@ -156,26 +163,26 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
156163 )
157164
158165 # Cache optimization parameters
159- serve_parser .add_argument (
166+ omni_config_group .add_argument (
160167 "--cache-backend" ,
161168 type = str ,
162169 default = "none" ,
163170 help = "Cache backend for diffusion models, options: 'tea_cache', 'cache_dit'" ,
164171 )
165- serve_parser .add_argument (
172+ omni_config_group .add_argument (
166173 "--cache-config" ,
167174 type = str ,
168175 default = None ,
169176 help = "JSON string of cache configuration (e.g., '{\" rel_l1_thresh\" : 0.2}')." ,
170177 )
171178
172179 # VAE memory optimization parameters
173- serve_parser .add_argument (
180+ omni_config_group .add_argument (
174181 "--vae-use-slicing" ,
175182 action = "store_true" ,
176183 help = "Enable VAE slicing for memory optimization (useful for mitigating OOM issues)." ,
177184 )
178- serve_parser .add_argument (
185+ omni_config_group .add_argument (
179186 "--vae-use-tiling" ,
180187 action = "store_true" ,
181188 help = "Enable VAE tiling for memory optimization (useful for mitigating OOM issues)." ,
@@ -189,13 +196,13 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
189196 )
190197
191198 # Video model parameters (e.g., Wan2.2) - engine-level
192- serve_parser .add_argument (
199+ omni_config_group .add_argument (
193200 "--boundary-ratio" ,
194201 type = float ,
195202 default = None ,
196203 help = "Boundary split ratio for low/high DiT in video models (e.g., 0.875 for Wan2.2)." ,
197204 )
198- serve_parser .add_argument (
205+ omni_config_group .add_argument (
199206 "--flow-shift" ,
200207 type = float ,
201208 default = None ,
0 commit comments