Skip to content

Commit 8c0602d

Browse files
committed
fix(cli): remove join defaults and set 32k sequence length
1 parent ce6ef50 commit 8c0602d

4 files changed

Lines changed: 41 additions & 9 deletions

File tree

src/parallax/cli.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,14 +232,6 @@ def join_command(args, passthrough_args: list[str] | None = None):
232232
passthrough_args = passthrough_args or []
233233

234234
cmd = [sys.executable, str(launch_script)]
235-
if not _flag_present(passthrough_args, ["--max-num-tokens-per-batch"]):
236-
cmd.extend(["--max-num-tokens-per-batch", "4096"])
237-
if not _flag_present(passthrough_args, ["--max-sequence-length"]):
238-
cmd.extend(["--max-sequence-length", "7168"])
239-
if not _flag_present(passthrough_args, ["--max-batch-size"]):
240-
cmd.extend(["--max-batch-size", "8"])
241-
if not _flag_present(passthrough_args, ["--kv-block-size"]):
242-
cmd.extend(["--kv-block-size", "32"])
243235

244236
# The scheduler address is now taken directly from the parsed arguments.
245237
cmd.extend(["--scheduler-addr", args.scheduler_addr])

src/parallax/server/server_args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def parse_args() -> argparse.Namespace:
5656
parser.add_argument(
5757
"--max-sequence-length",
5858
type=int,
59-
default=None,
59+
default=32768,
6060
help="Maximum sequence length for the model",
6161
)
6262

tests/test_cli.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,39 @@ def test_serve_command_launches_local_server_without_scheduler(tmp_path):
3737
assert env["SGLANG_ENABLE_JIT_DEEPGEMM"] == "0"
3838

3939

40+
def test_join_command_does_not_inject_runtime_defaults(tmp_path):
41+
launch_script = tmp_path / "src" / "parallax" / "launch.py"
42+
launch_script.parent.mkdir(parents=True)
43+
launch_script.touch()
44+
45+
args = Namespace(scheduler_addr="auto", skip_upload=True, use_relay=False)
46+
47+
with (
48+
patch.object(cli, "check_python_version"),
49+
patch.object(cli, "get_project_root", return_value=Path(tmp_path)),
50+
patch.object(cli.sys, "executable", "/repo/.venv/bin/python"),
51+
patch.object(cli, "_execute_with_graceful_shutdown") as execute,
52+
):
53+
cli.join_command(args, ["--log-level", "DEBUG"])
54+
55+
cmd = execute.call_args.args[0]
56+
env = execute.call_args.kwargs["env"]
57+
58+
assert cmd == [
59+
"/repo/.venv/bin/python",
60+
str(launch_script),
61+
"--scheduler-addr",
62+
"auto",
63+
"--log-level",
64+
"DEBUG",
65+
]
66+
assert "--max-num-tokens-per-batch" not in cmd
67+
assert "--max-sequence-length" not in cmd
68+
assert "--max-batch-size" not in cmd
69+
assert "--kv-block-size" not in cmd
70+
assert env["SGLANG_ENABLE_JIT_DEEPGEMM"] == "0"
71+
72+
4073
def test_main_dispatches_serve_command_with_passthrough_args():
4174
with (
4275
patch.object(

tests/test_server_args.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,13 @@ def test_chunked_prefill_zero_disables(self):
198198
class TestParseArgs:
199199
"""Test argument parsing with mocked sys.argv."""
200200

201+
@patch("sys.argv", ["test_server_args.py", "--model-path", "test"])
202+
def test_parse_default_max_sequence_length(self):
203+
"""Test max sequence length defaults to 32k."""
204+
args = parse_args()
205+
206+
assert args.max_sequence_length == 32768
207+
201208
@patch(
202209
"sys.argv",
203210
[

0 commit comments

Comments
 (0)