Skip to content

Commit b047a42

Browse files
committed
draft for flexynesis gpu rule and condor_container_gpu model rule
1 parent 6ac60a5 commit b047a42

2 files changed

Lines changed: 47 additions & 14 deletions

File tree

files/galaxy/tpv/destinations.yml.j2

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -555,12 +555,43 @@ destinations:
555555
max_accepted_mem: 500
556556
min_accepted_gpus: 1
557557
max_accepted_gpus: 4
558+
env:
559+
GPU_AVAILABLE: 1
560+
context:
561+
galaxy_group: 'GalaxyGroup == "pxe-gpu"'
558562
params:
559-
requirements: 'GalaxyGroup == "pxe-gpu"'
563+
requirements: "{galaxy_group}"
564+
request_gpus: "{gpus or 0}"
560565
docker_run_extra_arguments: "{entity.params.get('docker_run_extra_arguments') or ''} --gpus all --env CUDA_VISIBLE_DEVICES=$_CONDOR_AssignedGPUs --env NVIDIA_VISIBLE_DEVICES=$_CONDOR_AssignedGPUs"
561566
singularity_run_extra_arguments: "{entity.params.get('singularity_run_extra_arguments') or ''} --nv --env CUDA_VISIBLE_DEVICES=$_CONDOR_AssignedGPUs"
562-
env:
563-
GPU_AVAILABLE: 1
567+
rules:
568+
- id: GPU model preferences
569+
if: |
570+
"exclude_gpu_models" in entity.context or "include_gpu_models" in entity.context
571+
execute: |
572+
exclude_gpu_models = entity.context.get('exclude_gpu_models', [])
573+
include_gpu_models = entity.context.get('include_gpu_models', [])
574+
existing_requirements = galaxy_group
575+
gpu_conditions_list = []
576+
if exclude_gpu_models:
577+
exclude_conditions = ' && '.join(
578+
f'(GPUs_DeviceName != "{model}")' for model in exclude_gpu_models
579+
)
580+
gpu_conditions_list.append(exclude_conditions)
581+
if include_gpu_models:
582+
include_conditions = ' || '.join(
583+
f'(GPUs_DeviceName == "{model}")' for model in include_gpu_models
584+
)
585+
gpu_conditions_list.append(f'({include_conditions})')
586+
# Combine all GPU conditions with AND
587+
if gpu_conditions_list:
588+
gpu_conditions = ' && '.join(gpu_conditions_list)
589+
# Combine with existing requirements with AND
590+
if existing_requirements:
591+
entity.params['requirements'] = f'{gpu_conditions} && ({existing_requirements})'
592+
else:
593+
entity.params['requirements'] = gpu_conditions
594+
564595

565596
# This means a GPU can be shared by max 4 jobs at the same time
566597
condor_container_gpu_divide4:

files/galaxy/tpv/tools.yml

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -808,23 +808,25 @@ tools:
808808

809809
toolshed.g2.bx.psu.edu/repos/bgruening/flexynesis/flexynesis/.*:
810810
rules:
811-
- if: helpers.tool_version_gte(tool, '1.1.11+galaxy0')
812-
gpus: 1
813-
params:
814-
docker_run_extra_arguments: --user 999
815811
- id: flexynesis_gnn_high_mem
816812
if: |
817813
retval = False
818-
options = job.get_param_values(app)
819-
if options:
820-
training_type = options.get('training_type', {})
821-
if training_type and isinstance(training_type, dict):
822-
model_select = training_type.get('model_class', {})
823-
if model_select and isinstance(model_select, dict):
824-
retval = model_select.get('model_class') == 'GNN'
814+
if helpers.tool_version_gte(tool, '1.1.11+galaxy0'):
815+
options = job.get_param_values(app)
816+
if options:
817+
training_type = options.get('training_type', {})
818+
if training_type and isinstance(training_type, dict):
819+
model_select = training_type.get('model_class', {})
820+
if model_select and isinstance(model_select, dict):
821+
retval = model_select.get('model_class') == 'GNN'
825822
retval
823+
gpu: 1
826824
cores: 20
827825
mem: 100
826+
params:
827+
docker_run_extra_arguments: --user 999
828+
context:
829+
exclude_gpu_models: ["Tesla T4"] # T4 GPUs have only 16 GB of memory, which is not enough for the GNN model
828830

829831
toolshed.g2.bx.psu.edu/repos/genouest/helixer/helixer/.*:
830832
params:

0 commit comments

Comments
 (0)