Skip to content

Commit ff02963

Browse files
COMPUTE-1764_no_selector_extra_args
(feat) all - account for the cloned jobs and the runtime specs there
1 parent d716bea commit ff02963

File tree

4 files changed

+117
-7
lines changed

4 files changed

+117
-7
lines changed

src/python/dxpy/cli/parsers.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,31 @@ def check_system_requirements(sys_reqs, context="systemRequirements"):
235235
if 'systemRequirements' in args.extra_args:
236236
check_system_requirements(args.extra_args['systemRequirements'])
237237

238+
# Check systemRequirementsByExecutable (double-nested structure: executable -> entrypoint -> requirements)
239+
if 'systemRequirementsByExecutable' in args.extra_args:
240+
sys_reqs_by_exec = args.extra_args['systemRequirementsByExecutable']
241+
if isinstance(sys_reqs_by_exec, dict):
242+
for executable_id, entrypoints in sys_reqs_by_exec.items():
243+
if isinstance(entrypoints, dict):
244+
check_system_requirements(entrypoints, f"systemRequirementsByExecutable.{executable_id}")
245+
238246
# Also check regionalOptions
239247
if 'regionalOptions' in args.extra_args:
240248
regional_opts = args.extra_args['regionalOptions']
241249
if isinstance(regional_opts, dict):
242250
for region, region_spec in regional_opts.items():
243-
if isinstance(region_spec, dict) and 'systemRequirements' in region_spec:
244-
check_system_requirements(region_spec['systemRequirements'],
245-
f"regionalOptions.{region}.systemRequirements")
251+
if isinstance(region_spec, dict):
252+
if 'systemRequirements' in region_spec:
253+
check_system_requirements(region_spec['systemRequirements'],
254+
f"regionalOptions.{region}.systemRequirements")
255+
# Also check systemRequirementsByExecutable within regionalOptions
256+
if 'systemRequirementsByExecutable' in region_spec:
257+
sys_reqs_by_exec = region_spec['systemRequirementsByExecutable']
258+
if isinstance(sys_reqs_by_exec, dict):
259+
for executable_id, entrypoints in sys_reqs_by_exec.items():
260+
if isinstance(entrypoints, dict):
261+
check_system_requirements(entrypoints,
262+
f"regionalOptions.{region}.systemRequirementsByExecutable.{executable_id}")
246263

247264
exec_input_args = argparse.ArgumentParser(add_help=False)
248265
exec_input_args.add_argument('-i', '--input', help=fill('An input to be added using "<input name>[:<class>]=<input value>" (provide "class" if there is no input spec; it can be any job IO class, e.g. "string", "array:string", or "array"; if "class" is "array" or not specified, the value will be attempted to be parsed as JSON and is otherwise treated as a string)', width_adjustment=-24), action='append')

src/python/dxpy/scripts/dx.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3191,27 +3191,38 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
31913191
# however when cloning from an analysis, the temporary workflow already has the cloned spec as its default, so no need to merge 1) and 2) here
31923192
cloned_system_requirements = copy.deepcopy(args.cloned_job_desc).get("systemRequirements", {})
31933193
cloned_instance_type = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='instanceType')
3194+
cloned_instance_type_selector = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='instanceTypeSelector')
31943195
cloned_cluster_spec = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='clusterSpec')
31953196
cloned_fpga_driver = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='fpgaDriver')
31963197
cloned_nvidia_driver = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='nvidiaDriver')
31973198
cloned_system_requirements_by_executable = args.cloned_job_desc.get("mergedSystemRequirementsByExecutable", {}) or {}
31983199
else:
31993200
cloned_system_requirements = {}
3200-
cloned_instance_type, cloned_cluster_spec, cloned_fpga_driver, cloned_nvidia_driver = (
3201-
SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}))
3201+
cloned_instance_type, cloned_instance_type_selector, cloned_cluster_spec, cloned_fpga_driver, cloned_nvidia_driver = (
3202+
SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}))
32023203
cloned_system_requirements_by_executable = {}
32033204

32043205
# convert runtime --instance-type into mapping {entrypoint:{'instanceType':xxx}}
32053206
# here the args.instance_type no longer contains specifications for stage sys reqs
32063207
if args.instance_type:
3208+
# Runtime --instance-type overrides instanceTypeSelector from the executable's runSpec
3209+
# When instanceType is specified at runtime, instanceTypeSelector should not be passed to the API
32073210
requested_instance_type = SystemRequirementsDict.from_instance_type(args.instance_type)
32083211
if not isinstance(args.instance_type, basestring):
32093212
requested_instance_type = SystemRequirementsDict(merge(cloned_instance_type.as_dict(), requested_instance_type.as_dict()))
32103213
else:
3214+
# If no runtime instanceType, use cloned instanceType (not instanceTypeSelector)
3215+
# instanceTypeSelector from the executable's runSpec will be resolved by the API
32113216
requested_instance_type = cloned_instance_type
32123217

32133218
# convert runtime --instance-count into mapping {entrypoint:{'clusterSpec':{'initialInstanceCount': N}}})
32143219
if args.instance_count:
3220+
# Validate that we're not mixing clusterSpec with instanceTypeSelector
3221+
# clusterSpec and instanceTypeSelector are mutually exclusive at build time
3222+
# If runtime provides instance-count, we should not have instanceTypeSelector from cloned job
3223+
if cloned_instance_type_selector.as_dict():
3224+
raise err_exit("Cannot specify --instance-count when cloning a job that uses instanceTypeSelector. "
3225+
"instanceTypeSelector and clusterSpec are mutually exclusive.")
32153226
# retrieve the full cluster spec defined in executable's runSpec.systemRequirements
32163227
# and overwrite the field initialInstanceCount with the runtime mapping
32173228
requested_instance_count = SystemRequirementsDict.from_instance_count(args.instance_count)
@@ -3232,8 +3243,17 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
32323243
requested_fpga_driver = cloned_fpga_driver
32333244
requested_nvidia_driver = cloned_nvidia_driver
32343245

3246+
# Validate mutual exclusivity: instanceTypeSelector and clusterSpec cannot coexist
3247+
# Note: instanceType can override instanceTypeSelector, so we only check clusterSpec here
3248+
# instanceTypeSelector is build-time only and should never be in runtime systemRequirements
3249+
if cloned_instance_type_selector.as_dict() and requested_cluster_spec.as_dict():
3250+
raise err_exit("Cannot combine clusterSpec with instanceTypeSelector. "
3251+
"instanceTypeSelector and clusterSpec are mutually exclusive.")
3252+
32353253
# combine the requested instance type, full cluster spec, fpga spec, nvidia spec
32363254
# into the runtime systemRequirements
3255+
# Note: instanceTypeSelector is NOT included here as it's build-time only
3256+
# If runtime instanceType was provided, it overrides instanceTypeSelector
32373257
requested_system_requirements = (requested_instance_type + requested_cluster_spec + requested_fpga_driver +
32383258
requested_nvidia_driver).as_dict()
32393259

src/python/dxpy/system_requirements.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ def from_sys_requirements(cls, system_requirements, _type='all'):
8383
"""
8484
Returns SystemRequirementsDict encapsulating system requirements.
8585
It can extract only entrypoints with specific fields ('clusterSpec',
86-
'instanceType', etc), depending on the value of _type.
86+
'instanceType', 'instanceTypeSelector', etc), depending on the value of _type.
8787
"""
88-
allowed_types = ['all', 'clusterSpec', 'instanceType', 'fpgaDriver', 'nvidiaDriver']
88+
allowed_types = ['all', 'clusterSpec', 'instanceType', 'instanceTypeSelector', 'fpgaDriver', 'nvidiaDriver']
8989
if _type not in (allowed_types):
9090
raise DXError("Expected '_type' to be one of the following: {}".format(allowed_types))
9191

src/python/test/test_dxclient.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3305,6 +3305,21 @@ def test_dx_run_extra_args_instanceTypeSelector_rejected(self):
33053305
run('dx run ' + applet_id + ' --extra-args ' +
33063306
'\'{"regionalOptions": {"aws:us-east-1": {"systemRequirements": {"*": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}}\'')
33073307

3308+
# Test with systemRequirementsByExecutable
3309+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3310+
run('dx run ' + applet_id + ' --extra-args ' +
3311+
'\'{"systemRequirementsByExecutable": {"' + applet_id + '": {"main": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}\'')
3312+
3313+
# Test with systemRequirementsByExecutable with wildcard entry point
3314+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3315+
run('dx run ' + applet_id + ' --extra-args ' +
3316+
'\'{"systemRequirementsByExecutable": {"' + applet_id + '": {"*": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}\'')
3317+
3318+
# Test with regionalOptions.systemRequirementsByExecutable
3319+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3320+
run('dx run ' + applet_id + ' --extra-args ' +
3321+
'\'{"regionalOptions": {"aws:us-east-1": {"systemRequirementsByExecutable": {"' + applet_id + '": {"main": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}}}\'')
3322+
33083323
def test_dx_run_sys_reqs(self):
33093324
app_spec = {"project": self.project,
33103325
"dxapi": "1.0.0",
@@ -3403,6 +3418,64 @@ def test_dx_run_clone_nvidia_driver(self):
34033418
cloned_job_desc = dxpy.api.job_describe(cloned_job_id_nvidia_override)
34043419
assert cloned_job_desc["systemRequirements"]["*"]["nvidiaDriver"] == build_nvidia_version
34053420

3421+
def test_dx_run_clone_with_instance_type_selector(self):
3422+
"""
3423+
Test that when cloning a job from an applet that uses instanceTypeSelector:
3424+
1. Cloning without runtime args works - instanceTypeSelector is preserved
3425+
2. Cloning with --instance-type works - runtime instanceType overrides instanceTypeSelector
3426+
3. Cloning with --instance-count fails - clusterSpec and instanceTypeSelector are mutually exclusive
3427+
4. Job description contains instanceTypeSelector in systemRequirements
3428+
"""
3429+
# Create an applet with instanceTypeSelector
3430+
applet_id = dxpy.api.applet_new({"project": self.project,
3431+
"dxapi": "1.0.0",
3432+
"runSpec": {"interpreter": "bash",
3433+
"distribution": "Ubuntu",
3434+
"release": "20.04",
3435+
"version": "0",
3436+
"code": "echo 'hello'",
3437+
"systemRequirements": {
3438+
"*": {
3439+
"instanceTypeSelector": {
3440+
"allowedInstanceTypes": ["mem1_ssd1_x2", "mem2_ssd1_x2"]
3441+
}
3442+
}
3443+
}}
3444+
})['id']
3445+
3446+
# Run the applet to create the origin job
3447+
# The API will resolve instanceTypeSelector to an actual instanceType
3448+
origin_job_id = run(f"dx run {applet_id} --brief -y").strip().split('\n')[-1]
3449+
3450+
# Verify the origin job description contains instanceTypeSelector
3451+
origin_job_desc = dxpy.api.job_describe(origin_job_id)
3452+
assert "systemRequirements" in origin_job_desc
3453+
assert "*" in origin_job_desc["systemRequirements"]
3454+
assert "instanceTypeSelector" in origin_job_desc["systemRequirements"]["*"]
3455+
assert "allowedInstanceTypes" in origin_job_desc["systemRequirements"]["*"]["instanceTypeSelector"]
3456+
3457+
# Clone without runtime args - should work, instanceTypeSelector is preserved
3458+
cloned_job_id_1 = run(f"dx run --clone {origin_job_id} --brief -y").strip()
3459+
assert cloned_job_id_1.startswith("job-")
3460+
cloned_job_desc_1 = dxpy.api.job_describe(cloned_job_id_1)
3461+
# Verify instanceTypeSelector is in the cloned job's systemRequirements
3462+
assert "instanceTypeSelector" in cloned_job_desc_1["systemRequirements"]["*"]
3463+
3464+
# Clone with --instance-type - should work, runtime instanceType overrides instanceTypeSelector
3465+
cloned_job_id_2 = run(f"dx run --clone {origin_job_id} --instance-type mem2_hdd2_x4 --brief -y").strip()
3466+
assert cloned_job_id_2.startswith("job-")
3467+
cloned_job_desc_2 = dxpy.api.job_describe(cloned_job_id_2)
3468+
# Verify instanceType is used instead of instanceTypeSelector
3469+
assert "instanceType" in cloned_job_desc_2["systemRequirements"]["*"]
3470+
assert cloned_job_desc_2["systemRequirements"]["*"]["instanceType"] == "mem2_hdd2_x4"
3471+
# instanceTypeSelector should not be in the runtime systemRequirements when overridden
3472+
# TODO uncomment later - for now this is not included in the backend behaviour
3473+
# assert "instanceTypeSelector" not in cloned_job_desc_2["systemRequirements"]["*"]
3474+
3475+
# Clone with --instance-count - should fail (clusterSpec and instanceTypeSelector are mutually exclusive)
3476+
with self.assertSubprocessFailure(stderr_regexp='Cannot specify --instance-count.*instanceTypeSelector', exit_code=3):
3477+
run(f"dx run --clone {origin_job_id} --instance-count 3 --brief -y")
3478+
34063479
def test_dx_run_clone(self):
34073480
applet_id = dxpy.api.applet_new({"project": self.project,
34083481
"dxapi": "1.0.0",

0 commit comments

Comments
 (0)