Skip to content

Commit 44aa59f

Browse files
COMPUTE-1764_no_selector_extra_args
(feat) dx.py - relaxed exclusivity to allow instance type in runtime to allow cluster spec.
1 parent a8ec839 commit 44aa59f

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

src/python/dxpy/scripts/dx.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3219,10 +3219,11 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
32193219
if args.instance_count:
32203220
# Validate that we're not mixing clusterSpec with instanceTypeSelector
32213221
# clusterSpec and instanceTypeSelector are mutually exclusive at build time
3222-
# If runtime provides instance-count, we should not have instanceTypeSelector from cloned job
3223-
if cloned_instance_type_selector.as_dict():
3224-
raise DXCLIError("Cannot specify --instance-count when cloning a job that uses instanceTypeSelector. "
3225-
"instanceTypeSelector and clusterSpec are mutually exclusive.")
3222+
# However, if instanceType is provided (either from runtime or cloned job), it overrides instanceTypeSelector
3223+
# So we only raise an error if instanceTypeSelector exists AND no instanceType override is present
3224+
if cloned_instance_type_selector.as_dict() and not requested_instance_type.as_dict():
3225+
raise DXCLIError("Cannot specify --instance-count when cloning a job that uses instanceTypeSelector "
3226+
"without providing --instance-type. instanceTypeSelector and clusterSpec are mutually exclusive.")
32263227
# retrieve the full cluster spec defined in executable's runSpec.systemRequirements
32273228
# and overwrite the field initialInstanceCount with the runtime mapping
32283229
requested_instance_count = SystemRequirementsDict.from_instance_count(args.instance_count)
@@ -3244,10 +3245,10 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
32443245
requested_nvidia_driver = cloned_nvidia_driver
32453246

32463247
# Validate mutual exclusivity: instanceTypeSelector and clusterSpec cannot coexist
3247-
# Note: instanceType can override instanceTypeSelector, so we only check clusterSpec here
3248+
# Note: instanceType can override instanceTypeSelector, so we only raise error if no instanceType is present
32483249
# instanceTypeSelector is build-time only and should never be in runtime systemRequirements
3249-
if cloned_instance_type_selector.as_dict() and requested_cluster_spec.as_dict():
3250-
raise DXCLIError("Cannot combine clusterSpec with instanceTypeSelector. "
3250+
if cloned_instance_type_selector.as_dict() and requested_cluster_spec.as_dict() and not requested_instance_type.as_dict():
3251+
raise DXCLIError("Cannot combine clusterSpec with instanceTypeSelector without providing instanceType. "
32513252
"instanceTypeSelector and clusterSpec are mutually exclusive.")
32523253

32533254
# combine the requested instance type, full cluster spec, fpga spec, nvidia spec

src/python/test/test_dxclient.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3472,10 +3472,19 @@ def test_dx_run_clone_with_instance_type_selector(self):
34723472
# TODO uncomment later - for now this is not included in the backend behaviour
34733473
# assert "instanceTypeSelector" not in cloned_job_desc_2["systemRequirements"]["*"]
34743474

3475-
# Clone with --instance-count - should fail (clusterSpec and instanceTypeSelector are mutually exclusive)
3476-
with self.assertSubprocessFailure(stderr_regexp='Cannot specify --instance-count.*instanceTypeSelector', exit_code=3):
3475+
# Clone with --instance-count alone - should fail (clusterSpec and instanceTypeSelector are mutually exclusive)
3476+
with self.assertSubprocessFailure(stderr_regexp='Cannot specify --instance-count.*instanceTypeSelector.*without providing --instance-type', exit_code=3):
34773477
run(f"dx run --clone {origin_job_id} --instance-count 3 --brief -y")
34783478

3479+
# Clone with both --instance-count and --instance-type - should work (instanceType overrides instanceTypeSelector)
3480+
cloned_job_id_3 = run(f"dx run --clone {origin_job_id} --instance-count 3 --instance-type mem2_hdd2_x4 --brief -y").strip()
3481+
assert cloned_job_id_3.startswith("job-")
3482+
cloned_job_desc_3 = dxpy.api.job_describe(cloned_job_id_3)
3483+
# Verify both instanceType and clusterSpec are present
3484+
assert "instanceType" in cloned_job_desc_3["systemRequirements"]["*"]
3485+
assert cloned_job_desc_3["systemRequirements"]["*"]["instanceType"] == "mem2_hdd2_x4"
3486+
assert "clusterSpec" in cloned_job_desc_3["systemRequirements"]["*"]
3487+
34793488
def test_dx_run_clone(self):
34803489
applet_id = dxpy.api.applet_new({"project": self.project,
34813490
"dxapi": "1.0.0",

0 commit comments

Comments
 (0)