Skip to content

Commit 3fe7794

Browse files
authored
Reverts changing the MLX_IBV_DEVICES to MLX_JACCL_DEVICES (ml-explore#2999)
1 parent 4743015 commit 3fe7794

File tree

3 files changed

+5
-5
lines changed

3 files changed

+5
-5
lines changed

docs/src/usage/distributed.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,7 @@ the process.
584584
**MLX_JACCL_COORDINATOR** should contain the IP and port that rank 0 can listen
585585
to all the other ranks connect to in order to establish the RDMA connections.
586586

587-
**MLX_JACCL_DEVICES** should contain the path to a json file that contains the
587+
**MLX_IBV_DEVICES** should contain the path to a json file that contains the
588588
ibverbs device names that connect each node to each other node, something like
589589
the following:
590590

mlx/distributed/jaccl/jaccl.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,17 +1159,17 @@ bool is_available() {
11591159
}
11601160

11611161
std::shared_ptr<GroupImpl> init(bool strict /* = false */) {
1162-
const char* dev_file = std::getenv("MLX_JACCL_DEVICES");
1162+
const char* dev_file = std::getenv("MLX_IBV_DEVICES");
11631163
const char* coordinator = std::getenv("MLX_JACCL_COORDINATOR");
11641164
const char* rank_str = std::getenv("MLX_RANK");
11651165

11661166
if (!is_available() || !dev_file || !coordinator || !rank_str) {
11671167
if (strict) {
11681168
std::ostringstream msg;
11691169
msg << "[jaccl] You need to provide via environment variables a rank (MLX_RANK), "
1170-
<< "a device file (MLX_JACCL_DEVICES) and a coordinator ip/port (MLX_JACCL_COORDINATOR) "
1170+
<< "a device file (MLX_IBV_DEVICES) and a coordinator ip/port (MLX_JACCL_COORDINATOR) "
11711171
<< "but provided MLX_RANK=\"" << ((rank_str) ? rank_str : "")
1172-
<< "\", MLX_JACCL_DEVICES=\"" << ((dev_file) ? dev_file : "")
1172+
<< "\", MLX_IBV_DEVICES=\"" << ((dev_file) ? dev_file : "")
11731173
<< "\" and MLX_JACCL_COORDINATOR=\""
11741174
<< ((coordinator) ? coordinator : "");
11751175
throw std::runtime_error(msg.str());

python/mlx/_distributed_utils/launch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ def launch_jaccl(parser, hosts, args, command):
376376
env = args.env
377377
cwd = args.cwd
378378
env.append(f"MLX_JACCL_COORDINATOR={coordinator}:{args.starting_port}")
379-
files = {"MLX_JACCL_DEVICES": json.dumps([h.rdma for h in hosts])}
379+
files = {"MLX_IBV_DEVICES": json.dumps([h.rdma for h in hosts])}
380380

381381
log(args.verbose, "Running", shlex.join(command))
382382

0 commit comments

Comments
 (0)