Skip to content

Commit 97bfb2a

Browse files
COMPUTE-1764_no_selector_extra_args (#1512)
* COMPUTE-1764_no_selector_extra_args (feat) parsers.py, tests - disallowed instanceTypeSelector to be passed in extra-args. * Update src/python/dxpy/cli/parsers.py Co-authored-by: Yuxin Shi <[email protected]> * COMPUTE-1764_no_selector_extra_args (feat) all - account for the cloned jobs and the runtime specs there * COMPUTE-1764_no_selector_extra_args (feat) dx.py - changed error type * COMPUTE-1764_no_selector_extra_args (feat) dx.py - relaxed exclusivity to allow instance type in runtime to allow cluster spec. * COMPUTE-1764_no_selector_extra_args (feat) dx.py - relaxed exclusivity to allow instance type in runtime to allow cluster spec. * COMPUTE-1764_no_selector_extra_args (feat) dx.py - relaxed exclusivity to allow instance type in runtime to allow cluster spec. --------- Co-authored-by: Yuxin Shi <[email protected]>
1 parent 276aba0 commit 97bfb2a

File tree

4 files changed

+166
-5
lines changed

4 files changed

+166
-5
lines changed

src/python/dxpy/cli/parsers.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,45 @@ def process_extra_args(args):
222222
except:
223223
raise DXParserError('Value given for --extra-args could not be parsed as JSON')
224224

225+
# Validate that instanceTypeSelector is not specified in systemRequirements
226+
def check_system_requirements(sys_reqs, context="systemRequirements"):
227+
if isinstance(sys_reqs, dict):
228+
for entry_point, reqs in sys_reqs.items():
229+
if isinstance(reqs, dict) and 'instanceTypeSelector' in reqs:
230+
raise DXParserError(
231+
'instanceTypeSelector cannot be specified in runtime --extra-args. '
232+
'It can only be defined in the dxapp.json at build time.'
233+
)
234+
235+
if 'systemRequirements' in args.extra_args:
236+
check_system_requirements(args.extra_args['systemRequirements'])
237+
238+
# Check systemRequirementsByExecutable (double-nested structure: executable -> entrypoint -> requirements)
239+
if 'systemRequirementsByExecutable' in args.extra_args:
240+
sys_reqs_by_exec = args.extra_args['systemRequirementsByExecutable']
241+
if isinstance(sys_reqs_by_exec, dict):
242+
for executable_id, entrypoints in sys_reqs_by_exec.items():
243+
if isinstance(entrypoints, dict):
244+
check_system_requirements(entrypoints, f"systemRequirementsByExecutable.{executable_id}")
245+
246+
# Also check regionalOptions
247+
if 'regionalOptions' in args.extra_args:
248+
regional_opts = args.extra_args['regionalOptions']
249+
if isinstance(regional_opts, dict):
250+
for region, region_spec in regional_opts.items():
251+
if isinstance(region_spec, dict):
252+
if 'systemRequirements' in region_spec:
253+
check_system_requirements(region_spec['systemRequirements'],
254+
f"regionalOptions.{region}.systemRequirements")
255+
# Also check systemRequirementsByExecutable within regionalOptions
256+
if 'systemRequirementsByExecutable' in region_spec:
257+
sys_reqs_by_exec = region_spec['systemRequirementsByExecutable']
258+
if isinstance(sys_reqs_by_exec, dict):
259+
for executable_id, entrypoints in sys_reqs_by_exec.items():
260+
if isinstance(entrypoints, dict):
261+
check_system_requirements(entrypoints,
262+
f"regionalOptions.{region}.systemRequirementsByExecutable.{executable_id}")
263+
225264
exec_input_args = argparse.ArgumentParser(add_help=False)
226265
exec_input_args.add_argument('-i', '--input', help=fill('An input to be added using "<input name>[:<class>]=<input value>" (provide "class" if there is no input spec; it can be any job IO class, e.g. "string", "array:string", or "array"; if "class" is "array" or not specified, the value will be attempted to be parsed as JSON and is otherwise treated as a string)', width_adjustment=-24), action='append')
227266
exec_input_args.add_argument('-j', '--input-json', help=fill('The full input JSON (keys=input field names, values=input field values)', width_adjustment=-24))

src/python/dxpy/scripts/dx.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import dxpy
3131
from dxpy.scripts import dx_build_app
3232
from dxpy import workflow_builder
33-
from dxpy.exceptions import PermissionDenied, InvalidState, ResourceNotFound
33+
from dxpy.exceptions import PermissionDenied, InvalidState, ResourceNotFound, DXCLIError
3434

3535
from ..cli import try_call, prompt_for_yn, INTERACTIVE_CLI
3636
from ..cli import workflow as workflow_cli
@@ -3191,27 +3191,39 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
31913191
# however when cloning from an analysis, the temporary workflow already has the cloned spec as its default, so no need to merge 1) and 2) here
31923192
cloned_system_requirements = copy.deepcopy(args.cloned_job_desc).get("systemRequirements", {})
31933193
cloned_instance_type = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='instanceType')
3194+
cloned_instance_type_selector = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='instanceTypeSelector')
31943195
cloned_cluster_spec = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='clusterSpec')
31953196
cloned_fpga_driver = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='fpgaDriver')
31963197
cloned_nvidia_driver = SystemRequirementsDict.from_sys_requirements(cloned_system_requirements, _type='nvidiaDriver')
31973198
cloned_system_requirements_by_executable = args.cloned_job_desc.get("mergedSystemRequirementsByExecutable", {}) or {}
31983199
else:
31993200
cloned_system_requirements = {}
3200-
cloned_instance_type, cloned_cluster_spec, cloned_fpga_driver, cloned_nvidia_driver = (
3201-
SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}))
3201+
cloned_instance_type, cloned_instance_type_selector, cloned_cluster_spec, cloned_fpga_driver, cloned_nvidia_driver = (
3202+
SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}), SystemRequirementsDict({}))
32023203
cloned_system_requirements_by_executable = {}
32033204

32043205
# convert runtime --instance-type into mapping {entrypoint:{'instanceType':xxx}}
32053206
# here the args.instance_type no longer contains specifications for stage sys reqs
32063207
if args.instance_type:
3208+
# Runtime --instance-type overrides instanceTypeSelector from the executable's runSpec
3209+
# When instanceType is specified at runtime, instanceTypeSelector should not be passed to the API
32073210
requested_instance_type = SystemRequirementsDict.from_instance_type(args.instance_type)
32083211
if not isinstance(args.instance_type, basestring):
32093212
requested_instance_type = SystemRequirementsDict(merge(cloned_instance_type.as_dict(), requested_instance_type.as_dict()))
32103213
else:
3214+
# If no runtime instanceType, use cloned instanceType (not instanceTypeSelector)
3215+
# instanceTypeSelector from the executable's runSpec will be resolved by the API
32113216
requested_instance_type = cloned_instance_type
32123217

32133218
# convert runtime --instance-count into mapping {entrypoint:{'clusterSpec':{'initialInstanceCount': N}}})
32143219
if args.instance_count:
3220+
# Validate that we're not mixing clusterSpec with instanceTypeSelector
3221+
# clusterSpec and instanceTypeSelector are mutually exclusive at build time
3222+
# However, if instanceType is provided (either from runtime or cloned job), it overrides instanceTypeSelector
3223+
# So we only raise an error if instanceTypeSelector exists AND no instanceType override is present
3224+
if cloned_instance_type_selector.as_dict() and not requested_instance_type.as_dict():
3225+
raise DXCLIError("Cannot specify --instance-count when cloning a job that uses instanceTypeSelector "
3226+
"without providing --instance-type. instanceTypeSelector and clusterSpec are mutually exclusive.")
32153227
# retrieve the full cluster spec defined in executable's runSpec.systemRequirements
32163228
# and overwrite the field initialInstanceCount with the runtime mapping
32173229
requested_instance_count = SystemRequirementsDict.from_instance_count(args.instance_count)
@@ -3232,8 +3244,11 @@ def run_body(args, executable, dest_proj, dest_path, preset_inputs=None, input_n
32323244
requested_fpga_driver = cloned_fpga_driver
32333245
requested_nvidia_driver = cloned_nvidia_driver
32343246

3247+
32353248
# combine the requested instance type, full cluster spec, fpga spec, nvidia spec
32363249
# into the runtime systemRequirements
3250+
# Note: instanceTypeSelector is NOT included here as it's build-time only
3251+
# If runtime instanceType was provided, it overrides instanceTypeSelector
32373252
requested_system_requirements = (requested_instance_type + requested_cluster_spec + requested_fpga_driver +
32383253
requested_nvidia_driver).as_dict()
32393254

src/python/dxpy/system_requirements.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,9 @@ def from_sys_requirements(cls, system_requirements, _type='all'):
8383
"""
8484
Returns SystemRequirementsDict encapsulating system requirements.
8585
It can extract only entrypoints with specific fields ('clusterSpec',
86-
'instanceType', etc), depending on the value of _type.
86+
'instanceType', 'instanceTypeSelector', etc), depending on the value of _type.
8787
"""
88-
allowed_types = ['all', 'clusterSpec', 'instanceType', 'fpgaDriver', 'nvidiaDriver']
88+
allowed_types = ['all', 'clusterSpec', 'instanceType', 'instanceTypeSelector', 'fpgaDriver', 'nvidiaDriver']
8989
if _type not in (allowed_types):
9090
raise DXError("Expected '_type' to be one of the following: {}".format(allowed_types))
9191

src/python/test/test_dxclient.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3280,6 +3280,46 @@ def test_dx_run_extra_args(self):
32803280
with self.assertSubprocessFailure(stderr_regexp='JSON', exit_code=3):
32813281
run("dx run " + applet_id + " --extra-args not-a-JSON-string")
32823282

3283+
def test_dx_run_extra_args_instanceTypeSelector_rejected(self):
3284+
# Validate that instanceTypeSelector cannot be specified in runtime extra-args
3285+
applet_id = dxpy.api.applet_new({"project": self.project,
3286+
"dxapi": "1.0.0",
3287+
"runSpec": {"interpreter": "bash",
3288+
"distribution": "Ubuntu",
3289+
"release": "14.04",
3290+
"code": "echo 'hello'"}
3291+
})['id']
3292+
3293+
# Test with wildcard entry point
3294+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3295+
run('dx run ' + applet_id + ' --extra-args ' +
3296+
'\'{"systemRequirements": {"*": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}\'')
3297+
3298+
# Test with specific entry point
3299+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3300+
run('dx run ' + applet_id + ' --extra-args ' +
3301+
'\'{"systemRequirements": {"main": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}\'')
3302+
3303+
# Test with regionalOptions
3304+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3305+
run('dx run ' + applet_id + ' --extra-args ' +
3306+
'\'{"regionalOptions": {"aws:us-east-1": {"systemRequirements": {"*": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}}\'')
3307+
3308+
# Test with systemRequirementsByExecutable
3309+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3310+
run('dx run ' + applet_id + ' --extra-args ' +
3311+
'\'{"systemRequirementsByExecutable": {"' + applet_id + '": {"main": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}\'')
3312+
3313+
# Test with systemRequirementsByExecutable with wildcard entry point
3314+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3315+
run('dx run ' + applet_id + ' --extra-args ' +
3316+
'\'{"systemRequirementsByExecutable": {"' + applet_id + '": {"*": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}\'')
3317+
3318+
# Test with regionalOptions.systemRequirementsByExecutable
3319+
with self.assertSubprocessFailure(stderr_regexp='instanceTypeSelector cannot be specified in runtime', exit_code=3):
3320+
run('dx run ' + applet_id + ' --extra-args ' +
3321+
'\'{"regionalOptions": {"aws:us-east-1": {"systemRequirementsByExecutable": {"' + applet_id + '": {"main": {"instanceTypeSelector": {"allowedInstanceTypes": ["mem2_ssd1_v2_x2"]}}}}}}}\'')
3322+
32833323
def test_dx_run_sys_reqs(self):
32843324
app_spec = {"project": self.project,
32853325
"dxapi": "1.0.0",
@@ -3378,6 +3418,73 @@ def test_dx_run_clone_nvidia_driver(self):
33783418
cloned_job_desc = dxpy.api.job_describe(cloned_job_id_nvidia_override)
33793419
assert cloned_job_desc["systemRequirements"]["*"]["nvidiaDriver"] == build_nvidia_version
33803420

3421+
def test_dx_run_clone_with_instance_type_selector(self):
3422+
"""
3423+
Test that when cloning a job from an applet that uses instanceTypeSelector:
3424+
1. Cloning without runtime args works - instanceTypeSelector is preserved
3425+
2. Cloning with --instance-type works - runtime instanceType overrides instanceTypeSelector
3426+
3. Cloning with --instance-count fails - clusterSpec and instanceTypeSelector are mutually exclusive
3427+
4. Job description contains instanceTypeSelector in systemRequirements
3428+
"""
3429+
# Create an applet with instanceTypeSelector
3430+
applet_id = dxpy.api.applet_new({"project": self.project,
3431+
"dxapi": "1.0.0",
3432+
"runSpec": {"interpreter": "bash",
3433+
"distribution": "Ubuntu",
3434+
"release": "20.04",
3435+
"version": "0",
3436+
"code": "echo 'hello'",
3437+
"systemRequirements": {
3438+
"*": {
3439+
"instanceTypeSelector": {
3440+
"allowedInstanceTypes": ["mem1_ssd1_x2", "mem2_ssd1_x2"]
3441+
}
3442+
}
3443+
}}
3444+
})['id']
3445+
3446+
# Run the applet to create the origin job
3447+
# The API will resolve instanceTypeSelector to an actual instanceType
3448+
origin_job_id = run(f"dx run {applet_id} --brief -y").strip().split('\n')[-1]
3449+
3450+
# Verify the origin job description contains instanceTypeSelector
3451+
origin_job_desc = dxpy.api.job_describe(origin_job_id)
3452+
assert "systemRequirements" in origin_job_desc
3453+
assert "*" in origin_job_desc["systemRequirements"]
3454+
assert "instanceTypeSelector" in origin_job_desc["systemRequirements"]["*"]
3455+
assert "allowedInstanceTypes" in origin_job_desc["systemRequirements"]["*"]["instanceTypeSelector"]
3456+
3457+
# Clone without runtime args - should work, instanceTypeSelector is preserved
3458+
cloned_job_id_1 = run(f"dx run --clone {origin_job_id} --brief -y").strip()
3459+
assert cloned_job_id_1.startswith("job-")
3460+
cloned_job_desc_1 = dxpy.api.job_describe(cloned_job_id_1)
3461+
# Verify instanceTypeSelector is in the cloned job's systemRequirements
3462+
assert "instanceTypeSelector" in cloned_job_desc_1["systemRequirements"]["*"]
3463+
3464+
# Clone with --instance-type - should work, runtime instanceType overrides instanceTypeSelector
3465+
cloned_job_id_2 = run(f"dx run --clone {origin_job_id} --instance-type mem2_hdd2_x4 --brief -y").strip()
3466+
assert cloned_job_id_2.startswith("job-")
3467+
cloned_job_desc_2 = dxpy.api.job_describe(cloned_job_id_2)
3468+
# Verify instanceType is used instead of instanceTypeSelector
3469+
assert "instanceType" in cloned_job_desc_2["systemRequirements"]["*"]
3470+
assert cloned_job_desc_2["systemRequirements"]["*"]["instanceType"] == "mem2_hdd2_x4"
3471+
# instanceTypeSelector should not be in the runtime systemRequirements when overridden
3472+
# TODO uncomment later - for now this is not included in the backend behaviour
3473+
# assert "instanceTypeSelector" not in cloned_job_desc_2["systemRequirements"]["*"]
3474+
3475+
# Clone with --instance-count alone - should fail (clusterSpec and instanceTypeSelector are mutually exclusive)
3476+
with self.assertSubprocessFailure(stderr_regexp='Cannot specify --instance-count.*instanceTypeSelector.*without providing --instance-type', exit_code=3):
3477+
run(f"dx run --clone {origin_job_id} --instance-count 3 --brief -y")
3478+
3479+
# Clone with both --instance-count and --instance-type - should work (instanceType overrides instanceTypeSelector)
3480+
cloned_job_id_3 = run(f"dx run --clone {origin_job_id} --instance-count 3 --instance-type mem2_hdd2_x4 --brief -y").strip()
3481+
assert cloned_job_id_3.startswith("job-")
3482+
cloned_job_desc_3 = dxpy.api.job_describe(cloned_job_id_3)
3483+
# Verify instanceType override is applied
3484+
assert "instanceType" in cloned_job_desc_3["systemRequirements"]["*"]
3485+
assert cloned_job_desc_3["systemRequirements"]["*"]["instanceType"] == "mem2_hdd2_x4"
3486+
# Note: When the applet's runSpec doesn't have a clusterSpec defined (only instanceTypeSelector)
3487+
33813488
def test_dx_run_clone(self):
33823489
applet_id = dxpy.api.applet_new({"project": self.project,
33833490
"dxapi": "1.0.0",

0 commit comments

Comments
 (0)