Skip to content

Commit c8cdfc4

Browse files
cmeestersfgvieiracoderabbitai[bot]
authored
feat: added new 'qos' resource (#241)
depsite the branch name, this PR introduces a new resource: `qos`. As `qos` seems to be frequently used and similar to `constraint`, adding this resource, rather than requiring the somewhat convoluted use of `slurm_extra` seems a more service oriented approach. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Job submissions now support a Quality of Service (QoS) option, providing enhanced flexibility for job scheduling and resource allocation. - Introduced a new command construction method for SLURM job submissions, improving clarity and maintainability. - **Bug Fixes** - Improved handling of SLURM job resources with additional validation tests for `constraint` and `qos` parameters. - **Tests** - Added comprehensive test coverage for SLURM resource handling in job submissions, including various scenarios for `constraint` and `qos`. - **Chores** - Updated `pytest` dependency to a newer version for potential improvements and new features. - Minor syntax update in GitHub Actions workflow for improved clarity. - Enhanced test output in CI workflow by modifying pytest command options. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: Filipe G. Vieira <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent dc9a4fd commit c8cdfc4

File tree

6 files changed

+392
-59
lines changed

6 files changed

+392
-59
lines changed

.github/workflows/announce-release.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ permissions:
1212

1313
jobs:
1414
post_to_mastodon:
15-
if: ${{ contains(github.event.head_commit.message, 'chore(main): release') }}
15+
if: "${{ contains(github.event.head_commit.message, 'chore(main): release') }}"
1616
runs-on: ubuntu-latest
1717
steps:
1818
- name: Post to Mastodon

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ jobs:
9898
poetry install
9999
100100
- name: Run pytest
101-
run: poetry run coverage run -m pytest tests/tests.py -sv
101+
run: poetry run coverage run -m pytest tests/tests.py -sv --tb=short --disable-warnings
102102

103103
- name: Run Coverage
104104
run: poetry run coverage report -m

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ throttler = "^1.2.2"
2424
black = "^23.7.0"
2525
flake8 = "^6.1.0"
2626
coverage = "^7.3.1"
27-
pytest = "^7.4.2"
27+
pytest = "^8.3.5"
2828
snakemake = "^8.20.0"
2929

3030
[tool.coverage.run]

snakemake_executor_plugin_slurm/__init__.py

+18-47
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
JobExecutorInterface,
2727
)
2828
from snakemake_interface_common.exceptions import WorkflowError
29-
from snakemake_executor_plugin_slurm_jobstep import get_cpu_setting
3029

3130
from .utils import delete_slurm_environment, delete_empty_dirs, set_gres_string
31+
from .submit_string import get_submit_command
3232

3333

3434
@dataclass
@@ -135,9 +135,10 @@ class ExecutorSettings(ExecutorSettingsBase):
135135
# Required:
136136
# Implementation of your executor
137137
class Executor(RemoteExecutor):
138-
def __post_init__(self):
138+
def __post_init__(self, test_mode: bool = False):
139139
# run check whether we are running in a SLURM job context
140140
self.warn_on_jobcontext()
141+
self.test_mode = test_mode
141142
self.run_uuid = str(uuid.uuid4())
142143
self.logger.info(f"SLURM run ID: {self.run_uuid}")
143144
self._fallback_account_arg = None
@@ -225,60 +226,41 @@ def run_job(self, job: JobExecutorInterface):
225226
comment_str = f"rule_{job.name}"
226227
else:
227228
comment_str = f"rule_{job.name}_wildcards_{wildcard_str}"
228-
call = (
229-
f"sbatch "
230-
f"--parsable "
231-
f"--job-name {self.run_uuid} "
232-
f"--output '{slurm_logfile}' "
233-
f"--export=ALL "
234-
f"--comment '{comment_str}'"
235-
)
229+
# check whether the 'slurm_extra' parameter is used correctly
230+
# prior to putatively setting in the sbatch call
231+
if job.resources.get("slurm_extra"):
232+
self.check_slurm_extra(job)
236233

237-
if not self.workflow.executor_settings.no_account:
238-
call += self.get_account_arg(job)
234+
job_params = {
235+
"run_uuid": self.run_uuid,
236+
"slurm_logfile": slurm_logfile,
237+
"comment_str": comment_str,
238+
"account": self.get_account_arg(job),
239+
"partition": self.get_partition_arg(job),
240+
"workdir": self.workflow.workdir_init,
241+
}
239242

240-
call += self.get_partition_arg(job)
243+
call = get_submit_command(job, job_params)
241244

242245
if self.workflow.executor_settings.requeue:
243246
call += " --requeue"
244247

245248
call += set_gres_string(job)
246249

247-
if job.resources.get("clusters"):
248-
call += f" --clusters {job.resources.clusters}"
249-
250-
if job.resources.get("runtime"):
251-
call += f" -t {job.resources.runtime}"
252-
else:
250+
if not job.resources.get("runtime"):
253251
self.logger.warning(
254252
"No wall time information given. This might or might not "
255253
"work on your cluster. "
256254
"If not, specify the resource runtime in your rule or as a reasonable "
257255
"default via --default-resources."
258256
)
259257

260-
if job.resources.get("constraint"):
261-
call += f" -C '{job.resources.constraint}'"
262-
if job.resources.get("mem_mb_per_cpu"):
263-
call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
264-
elif job.resources.get("mem_mb"):
265-
call += f" --mem {job.resources.mem_mb}"
266-
else:
258+
if not job.resources.get("mem_mb_per_cpu") and not job.resources.get("mem_mb"):
267259
self.logger.warning(
268260
"No job memory information ('mem_mb' or 'mem_mb_per_cpu') is given "
269261
"- submitting without. This might or might not work on your cluster."
270262
)
271263

272-
if job.resources.get("nodes", False):
273-
call += f" --nodes={job.resources.get('nodes', 1)}"
274-
275-
# fixes #40 - set ntasks regardless of mpi, because
276-
# SLURM v22.05 will require it for all jobs
277-
gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
278-
if gpu_job:
279-
call += f" --ntasks-per-gpu={job.resources.get('tasks', 1)}"
280-
else:
281-
call += f" --ntasks={job.resources.get('tasks', 1)}"
282264
# MPI job
283265
if job.resources.get("mpi", False):
284266
if not job.resources.get("tasks_per_node") and not job.resources.get(
@@ -290,19 +272,8 @@ def run_job(self, job: JobExecutorInterface):
290272
"Probably not what you want."
291273
)
292274

293-
# we need to set cpus-per-task OR cpus-per-gpu, the function
294-
# will return a string with the corresponding value
295-
call += f" {get_cpu_setting(job, gpu_job)}"
296-
if job.resources.get("slurm_extra"):
297-
self.check_slurm_extra(job)
298-
call += f" {job.resources.slurm_extra}"
299-
300275
exec_job = self.format_job_exec(job)
301276

302-
# ensure that workdir is set correctly
303-
# use short argument as this is the same in all slurm versions
304-
# (see https://github.com/snakemake/snakemake/issues/2014)
305-
call += f" -D {self.workflow.workdir_init}"
306277
# and finally the job to execute with all the snakemake parameters
307278
call += f' --wrap="{exec_job}"'
308279

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from snakemake_executor_plugin_slurm_jobstep import get_cpu_setting
2+
from types import SimpleNamespace
3+
4+
5+
def get_submit_command(job, params):
6+
"""
7+
Return the submit command for the job.
8+
"""
9+
# Convert params dict to a SimpleNamespace for attribute-style access
10+
params = SimpleNamespace(**params)
11+
12+
call = (
13+
f"sbatch "
14+
f"--parsable "
15+
f"--job-name {params.run_uuid} "
16+
f'--output "{params.slurm_logfile}" '
17+
f"--export=ALL "
18+
f'--comment "{params.comment_str}"'
19+
)
20+
21+
# check whether an account is set
22+
if hasattr(params, "account"):
23+
call += f" --account={params.account}"
24+
# check whether a partition is set
25+
if hasattr(params, "partition"):
26+
call += f" --partition={params.partition}"
27+
28+
if job.resources.get("clusters"):
29+
call += f" --clusters {job.resources.clusters}"
30+
31+
if job.resources.get("runtime"):
32+
call += f" -t {job.resources.runtime}"
33+
34+
if job.resources.get("constraint") or isinstance(
35+
job.resources.get("constraint"), str
36+
):
37+
call += f" -C '{job.resources.get('constraint')}'"
38+
39+
if job.resources.get("qos") or isinstance(job.resources.get("qos"), str):
40+
call += f" --qos='{job.resources.qos}'"
41+
42+
if job.resources.get("mem_mb_per_cpu"):
43+
call += f" --mem-per-cpu {job.resources.mem_mb_per_cpu}"
44+
elif job.resources.get("mem_mb"):
45+
call += f" --mem {job.resources.mem_mb}"
46+
47+
if job.resources.get("nodes", False):
48+
call += f" --nodes={job.resources.get('nodes', 1)}"
49+
50+
# fixes #40 - set ntasks regardless of mpi, because
51+
# SLURM v22.05 will require it for all jobs
52+
gpu_job = job.resources.get("gpu") or "gpu" in job.resources.get("gres", "")
53+
if gpu_job:
54+
call += f" --ntasks-per-gpu={job.resources.get('tasks', 1)}"
55+
else:
56+
call += f" --ntasks={job.resources.get('tasks', 1)}"
57+
58+
# we need to set cpus-per-task OR cpus-per-gpu, the function
59+
# will return a string with the corresponding value
60+
call += f" {get_cpu_setting(job, gpu_job)}"
61+
if job.resources.get("slurm_extra"):
62+
call += f" {job.resources.slurm_extra}"
63+
64+
# ensure that workdir is set correctly
65+
# use short argument as this is the same in all slurm versions
66+
# (see https://github.com/snakemake/snakemake/issues/2014)
67+
call += f" -D '{params.workdir}'"
68+
69+
return call

0 commit comments

Comments
 (0)