Skip to content

Commit 7e14c94

Browse files
authored
[Misc] Group omni arguments into OmniConfig section (#744)
Signed-off-by: Chenguang ZHENG <645327136@qq.com> Signed-off-by: ZEHNG Chenguang <645327136@qq.com> Signed-off-by: ZHENG Chenguang <645327136@qq.com>
1 parent 779f598 commit 7e14c94

File tree

1 file changed

+27
-20
lines changed

1 file changed

+27
-20
lines changed

vllm_omni/entrypoints/cli/serve.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
vllm serve Qwen/Qwen-Image --omni --port 8091
3434
3535
Search by using: `--help=<ConfigGroup>` to explore options by section (e.g.,
36-
--help=ModelConfig, --help=Frontend)
36+
--help=OmniConfig)
3737
Use `--help=all` to show all available flags at once.
3838
"""
3939

@@ -70,74 +70,81 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
7070

7171
serve_parser = make_arg_parser(serve_parser)
7272
serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format(subcmd=self.name)
73-
serve_parser.add_argument(
73+
74+
# Create OmniConfig argument group for omni-related parameters
75+
# This ensures the parameters appear in --help output
76+
omni_config_group = serve_parser.add_argument_group(
77+
title="OmniConfig", description="Configuration for vLLM-Omni multi-stage and diffusion models."
78+
)
79+
80+
omni_config_group.add_argument(
7481
"--omni",
7582
action="store_true",
7683
help="Enable vLLM-Omni mode for multi-modal and diffusion models",
7784
)
78-
serve_parser.add_argument(
85+
omni_config_group.add_argument(
7986
"--stage-configs-path",
8087
type=str,
8188
default=None,
8289
help="Path to the stage configs file. If not specified, the stage configs will be loaded from the model.",
8390
)
84-
serve_parser.add_argument(
91+
omni_config_group.add_argument(
8592
"--stage-init-timeout",
8693
type=int,
8794
default=300,
8895
help="The timeout for initializing a single stage in seconds (default: 300)",
8996
)
90-
serve_parser.add_argument(
97+
omni_config_group.add_argument(
9198
"--init-timeout",
9299
type=int,
93100
default=60000,
94101
help="The timeout for initializing the stages.",
95102
)
96-
serve_parser.add_argument(
103+
omni_config_group.add_argument(
97104
"--shm-threshold-bytes",
98105
type=int,
99106
default=65536,
100107
help="The threshold for the shared memory size.",
101108
)
102-
serve_parser.add_argument(
109+
omni_config_group.add_argument(
103110
"--log-stats",
104111
action="store_true",
105112
help="Enable logging the stats.",
106113
)
107-
serve_parser.add_argument(
114+
omni_config_group.add_argument(
108115
"--log-file",
109116
type=str,
110117
default=None,
111118
help="The path to the log file.",
112119
)
113-
serve_parser.add_argument(
120+
omni_config_group.add_argument(
114121
"--batch-timeout",
115122
type=int,
116123
default=10,
117124
help="The timeout for the batch.",
118125
)
119-
serve_parser.add_argument(
126+
omni_config_group.add_argument(
120127
"--worker-backend",
121128
type=str,
122129
default="multi_process",
123130
choices=["multi_process", "ray"],
124131
help="The backend to use for stage workers.",
125132
)
126-
serve_parser.add_argument(
133+
omni_config_group.add_argument(
127134
"--ray-address",
128135
type=str,
129136
default=None,
130137
help="The address of the Ray cluster to connect to.",
131138
)
132139

133140
# Diffusion model specific arguments
134-
serve_parser.add_argument(
141+
omni_config_group.add_argument(
135142
"--num-gpus",
136143
type=int,
137144
default=None,
138145
help="Number of GPUs to use for diffusion model inference.",
139146
)
140-
serve_parser.add_argument(
147+
omni_config_group.add_argument(
141148
"--usp",
142149
"--ulysses-degree",
143150
dest="ulysses_degree",
@@ -146,7 +153,7 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
146153
help="Ulysses Sequence Parallelism degree for diffusion models. "
147154
"Equivalent to setting DiffusionParallelConfig.ulysses_degree.",
148155
)
149-
serve_parser.add_argument(
156+
omni_config_group.add_argument(
150157
"--ring",
151158
dest="ring_degree",
152159
type=int,
@@ -156,26 +163,26 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
156163
)
157164

158165
# Cache optimization parameters
159-
serve_parser.add_argument(
166+
omni_config_group.add_argument(
160167
"--cache-backend",
161168
type=str,
162169
default="none",
163170
help="Cache backend for diffusion models, options: 'tea_cache', 'cache_dit'",
164171
)
165-
serve_parser.add_argument(
172+
omni_config_group.add_argument(
166173
"--cache-config",
167174
type=str,
168175
default=None,
169176
help="JSON string of cache configuration (e.g., '{\"rel_l1_thresh\": 0.2}').",
170177
)
171178

172179
# VAE memory optimization parameters
173-
serve_parser.add_argument(
180+
omni_config_group.add_argument(
174181
"--vae-use-slicing",
175182
action="store_true",
176183
help="Enable VAE slicing for memory optimization (useful for mitigating OOM issues).",
177184
)
178-
serve_parser.add_argument(
185+
omni_config_group.add_argument(
179186
"--vae-use-tiling",
180187
action="store_true",
181188
help="Enable VAE tiling for memory optimization (useful for mitigating OOM issues).",
@@ -189,13 +196,13 @@ def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgu
189196
)
190197

191198
# Video model parameters (e.g., Wan2.2) - engine-level
192-
serve_parser.add_argument(
199+
omni_config_group.add_argument(
193200
"--boundary-ratio",
194201
type=float,
195202
default=None,
196203
help="Boundary split ratio for low/high DiT in video models (e.g., 0.875 for Wan2.2).",
197204
)
198-
serve_parser.add_argument(
205+
omni_config_group.add_argument(
199206
"--flow-shift",
200207
type=float,
201208
default=None,

0 commit comments

Comments
 (0)