Skip to content

Commit 5fb5ac1

Browse files
committed
Add gpus-per-node to job scripts and resources
1 parent 2c61650 commit 5fb5ac1

File tree

5 files changed

+24
-5
lines changed

5 files changed

+24
-5
lines changed

polaris/job/__init__.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
4242
cores = np.sqrt(target_cores * min_cores)
4343
nodes = int(np.ceil(cores / cores_per_node))
4444

45-
partition, qos, constraint, wall_time = get_slurm_options(
45+
partition, qos, constraint, gpus_per_node, wall_time = get_slurm_options(
4646
config, machine, nodes)
4747

4848
job_name = config.get('job', 'job_name')
@@ -58,7 +58,7 @@ def write_job_script(config, machine, target_cores, min_cores, work_dir,
5858
text = template.render(job_name=job_name, account=account,
5959
nodes=f'{nodes}', wall_time=wall_time, qos=qos,
6060
partition=partition, constraint=constraint,
61-
suite=suite)
61+
gpus_per_node=gpus_per_node, suite=suite)
6262
text = clean_up_whitespace(text)
6363
if suite == '':
6464
script_filename = 'job_script.sh'
@@ -95,6 +95,9 @@ def get_slurm_options(config, machine, nodes):
9595
constraint : str
9696
Slurm constraint
9797
98+
gpus_per_node : str
99+
The numer of GPUs per node (if any)
100+
98101
wall_time : str
99102
Slurm wall time
100103
"""
@@ -131,9 +134,14 @@ def get_slurm_options(config, machine, nodes):
131134
else:
132135
constraint = ''
133136

137+
if config.has_option('parallel', 'gpus_per_node'):
138+
gpus_per_node = config.get('parallel', 'gpus_per_node')
139+
else:
140+
gpus_per_node = ''
141+
134142
wall_time = config.get('job', 'wall_time')
135143

136-
return partition, qos, constraint, wall_time
144+
return partition, qos, constraint, gpus_per_node, wall_time
137145

138146

139147
def clean_up_whitespace(text):

polaris/job/job_script.template

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
{% if constraint != '' -%}
1717
#SBATCH --constraint={{ constraint }}
1818
{%- endif %}
19+
{% if gpus_per_node != '' -%}
20+
#SBATCH --gpus-per-node={{ gpus_per_node }}
21+
{%- endif %}
1922

2023
source load_polaris_env.sh
2124
polaris serial {{suite}}

polaris/parallel.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ def get_available_parallel_resources(config):
7777
cores_per_node=cores_per_node,
7878
mpi_allowed=mpi_allowed
7979
)
80+
81+
if config.has_option('parallel', 'gpus_per_node'):
82+
available_resources['gpus_per_node'] = \
83+
config.getint('parallel', 'gpus_per_node')
84+
8085
return available_resources
8186

8287

utils/omega/ctest/job_script.template

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
{% if constraint != '' -%}
1717
#SBATCH --constraint={{ constraint }}
1818
{%- endif %}
19+
{% if gpus_per_node != '' -%}
20+
#SBATCH --gpus-per-node={{ gpus_per_node }}
21+
{%- endif %}
1922

2023
cd {{ build_dir }}
2124
./omega_ctest.sh

utils/omega/ctest/omega_ctest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def write_job_script(config, machine, compiler, submit):
120120

121121
nodes = 1
122122

123-
partition, qos, constraint, _ = get_slurm_options(
123+
partition, qos, constraint, gpus_per_node, _ = get_slurm_options(
124124
config, machine, nodes)
125125

126126
wall_time = '0:15:00'
@@ -156,7 +156,7 @@ def write_job_script(config, machine, compiler, submit):
156156
script = template.render(job_name=job_name, account=account,
157157
nodes=f'{nodes}', wall_time=wall_time, qos=qos,
158158
partition=partition, constraint=constraint,
159-
build_dir=build_dir)
159+
gpus_per_node=gpus_per_node, build_dir=build_dir)
160160
script = clean_up_whitespace(script)
161161

162162
build_omega_dir = os.path.abspath('build_omega')

0 commit comments

Comments
 (0)