Skip to content

Commit 233e1e8

Browse files
authored
Adds a new passthrough flag for slurm launchers and fix arg parsing bug (#28380)
2 parents 8481648 + 73c4be1 commit 233e1e8

File tree

4 files changed

+139
-22
lines changed

4 files changed

+139
-22
lines changed

doc/rst/usingchapel/launcher.rst

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,33 @@ Common Slurm Settings
293293
294294
export CHPL_LAUNCHER_WALLTIME=00:10:00
295295
296+
* If you need to pass more arguments to slurm, you can use the slurm environment
297+
variable for the option. Alternatibvey, you can pass extra arguments through
298+
the ``--system-launcher-flags`` flag. For example, to pass ``--account=acct`` to
299+
slurm, you can use:
300+
301+
.. code-block:: bash
302+
303+
./myprogram --system-launcher-flags="--account=acct"
304+
305+
Multiple arguments can be passed by passing them as a single single string, or
306+
by specifying ``--system-launcher-flags`` multiple times. For example:
307+
308+
.. code-block:: bash
309+
310+
./myprogram --system-launcher-flags="--account=acct --partition=debug"
311+
312+
# or equivalently
313+
314+
./myprogram --system-launcher-flags="--account=acct" --system-launcher-flags="--partition=debug"
315+
316+
.. note::
317+
318+
If you plan to use submit a batch script via ``CHPL_LAUNCHER_USE_SBATCH``
319+
or ``--generate-sbatch-script``, you should prefer passing only a single
320+
argument per ``--system-launcher-flags`` flag, as multiple flags passed as
321+
a single string will not be parsed correctly in the generated batch script.
322+
296323
.. _ssh-launchers-with-slurm:
297324

298325
Using any SSH-based launcher with Slurm

runtime/include/chpl-mem.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,7 @@ void chpl_mem_free(void* memAlloc, int32_t lineno, int32_t filename) {
159159

160160
// Provide handles to instrument Chapel calls to memcpy and memmove
161161
static inline
162-
void* chpl_memcpy(void* dest, const void* src, size_t num)
163-
{
162+
void* chpl_memcpy(void* dest, const void* src, size_t num) {
164163
assert(dest != src || num == 0);
165164
assert(dest != NULL && src != NULL);
166165
return memcpy(dest, src, num);

runtime/src/launch/slurm-gasnetrun_common/slurm-gasnetrun_common.h

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#define CHPL_PARTITION_FLAG "--partition"
4848
#define CHPL_EXCLUDE_FLAG "--exclude"
4949
#define CHPL_GPUS_PER_NODE_FLAG "--gpus-per-node"
50+
#define CHPL_LAUNCHER_PASSTHROUGH_FLAG "--system-launcher-flags"
5051

5152
#define CHPL_LPN_VAR "LOCALES_PER_NODE"
5253

@@ -56,6 +57,8 @@ static char* nodelist = NULL;
5657
static char* partition = NULL;
5758
static char* exclude = NULL;
5859
static char* gpusPerNode = NULL;
60+
static char** launcherPassthroughFlags = NULL;
61+
static int numLauncherPassthroughFlags = 0;
5962
char* slurmFilename = NULL;
6063

6164
/* copies of binary to run per node */
@@ -70,6 +73,13 @@ typedef enum {
7073

7174
static const char* nodeAccessStr = NULL;
7275

76+
static void appendPassthroughFlag(char*** array, int* size, const char* flag) {
77+
*array = (char**)chpl_mem_realloc(*array, (*size + 1) * sizeof(char*),
78+
CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
79+
(*array)[*size] = (char*)string_copy((char*)flag, -1, 0);
80+
(*size)++;
81+
}
82+
7383
// Check what version of slurm is on the system
7484
static sbatchVersion determineSlurmVersion(void) {
7585
const int buflen = 256;
@@ -257,6 +267,12 @@ static char* chpl_launch_create_command(int argc, char* argv[],
257267
gpusPerNode = getenv("CHPL_LAUNCHER_GPUS_PER_NODE");
258268
}
259269

270+
// append any user specified passthrough flags to the list of flags to pass through
271+
char* passthroughFlagsEnv = getenv("CHPL_LAUNCHER_PASSTHROUGH_FLAGS");
272+
if (passthroughFlagsEnv) {
273+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, passthroughFlagsEnv);
274+
}
275+
260276
// request exclusive node access by default, but allow user to override
261277
nodeAccessEnv = getenv("CHPL_LAUNCHER_NODE_ACCESS");
262278
if (nodeAccessEnv == NULL || strcmp(nodeAccessEnv, "exclusive") == 0) {
@@ -295,6 +311,13 @@ static char* chpl_launch_create_command(int argc, char* argv[],
295311
if (projectString && strlen(projectString) > 0)
296312
fprintf(slurmFile, "#SBATCH -A %s\n", projectString);
297313

314+
// add any additional flags
315+
if (launcherPassthroughFlags != NULL) {
316+
for (int i = 0; i < numLauncherPassthroughFlags; i++) {
317+
fprintf(slurmFile, "#SBATCH %s\n", launcherPassthroughFlags[i]);
318+
}
319+
}
320+
298321
if (outputfn != NULL)
299322
fprintf(slurmFile, "#SBATCH -o %s\n", outputfn);
300323
else
@@ -345,6 +368,12 @@ static char* chpl_launch_create_command(int argc, char* argv[],
345368
if (projectString && strlen(projectString) > 0)
346369
chpl_append_to_cmd(&iCom, &len, "--account=%s ", projectString);
347370
if (constraint) chpl_append_to_cmd(&iCom, &len, "-C %s", constraint);
371+
// add any additional flags
372+
if (launcherPassthroughFlags != NULL) {
373+
for (int i = 0; i < numLauncherPassthroughFlags; i++) {
374+
chpl_append_to_cmd(&iCom, &len, " %s", launcherPassthroughFlags[i]);
375+
}
376+
}
348377
chpl_append_to_cmd(&iCom, &len, " %s/%s/%s -n %d -N %d -c 0",
349378
CHPL_THIRD_PARTY, WRAP_TO_STR(LAUNCH_PATH),
350379
GASNETRUN_LAUNCHER, numLocales, numNodes);
@@ -407,44 +436,53 @@ int chpl_launch_handle_arg(int argc, char* argv[], int argNum,
407436
if (!strcmp(argv[argNum], CHPL_WALLTIME_FLAG)) {
408437
walltime = argv[argNum+1];
409438
return 2;
410-
} else if (!strncmp(argv[argNum], CHPL_WALLTIME_FLAG"=", strlen(CHPL_WALLTIME_FLAG))) {
411-
walltime = &(argv[argNum][strlen(CHPL_WALLTIME_FLAG)+1]);
439+
} else if (!strncmp(argv[argNum], CHPL_WALLTIME_FLAG"=", strlen(CHPL_WALLTIME_FLAG"="))) {
440+
walltime = &(argv[argNum][strlen(CHPL_WALLTIME_FLAG"=")]);
412441
return 1;
413442
}
414443

415444
// handle --nodelist <nodelist> or --nodelist=<nodelist>
416445
if (!strcmp(argv[argNum], CHPL_NODELIST_FLAG)) {
417446
nodelist = argv[argNum+1];
418447
return 2;
419-
} else if (!strncmp(argv[argNum], CHPL_NODELIST_FLAG"=", strlen(CHPL_NODELIST_FLAG))) {
420-
nodelist = &(argv[argNum][strlen(CHPL_NODELIST_FLAG)+1]);
448+
} else if (!strncmp(argv[argNum], CHPL_NODELIST_FLAG"=", strlen(CHPL_NODELIST_FLAG"="))) {
449+
nodelist = &(argv[argNum][strlen(CHPL_NODELIST_FLAG"=")]);
421450
return 1;
422451
}
423452

424453
// handle --partition <partition> or --partition=<partition>
425454
if (!strcmp(argv[argNum], CHPL_PARTITION_FLAG)) {
426455
partition = argv[argNum+1];
427456
return 2;
428-
} else if (!strncmp(argv[argNum], CHPL_PARTITION_FLAG"=", strlen(CHPL_PARTITION_FLAG))) {
429-
partition = &(argv[argNum][strlen(CHPL_PARTITION_FLAG)+1]);
457+
} else if (!strncmp(argv[argNum], CHPL_PARTITION_FLAG"=", strlen(CHPL_PARTITION_FLAG"="))) {
458+
partition = &(argv[argNum][strlen(CHPL_PARTITION_FLAG"=")]);
430459
return 1;
431460
}
432461

433462
// handle --exclude <nodes> or --exclude=<nodes>
434463
if (!strcmp(argv[argNum], CHPL_EXCLUDE_FLAG)) {
435464
exclude = argv[argNum+1];
436465
return 2;
437-
} else if (!strncmp(argv[argNum], CHPL_EXCLUDE_FLAG"=", strlen(CHPL_EXCLUDE_FLAG))) {
438-
exclude = &(argv[argNum][strlen(CHPL_EXCLUDE_FLAG)+1]);
466+
} else if (!strncmp(argv[argNum], CHPL_EXCLUDE_FLAG"=", strlen(CHPL_EXCLUDE_FLAG"="))) {
467+
exclude = &(argv[argNum][strlen(CHPL_EXCLUDE_FLAG"=")]);
439468
return 1;
440469
}
441470

442471
// handle --gpus-per-node <gpus> or --gpus-per-node=<gpus>
443472
if (!strcmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG)) {
444473
gpusPerNode = argv[argNum+1];
445474
return 2;
446-
} else if (!strncmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG"=", strlen(CHPL_GPUS_PER_NODE_FLAG))) {
447-
gpusPerNode = &(argv[argNum][strlen(CHPL_GPUS_PER_NODE_FLAG)+1]);
475+
} else if (!strncmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG"=", strlen(CHPL_GPUS_PER_NODE_FLAG"="))) {
476+
gpusPerNode = &(argv[argNum][strlen(CHPL_GPUS_PER_NODE_FLAG"=")]);
477+
return 1;
478+
}
479+
480+
// handle --system-launcher-flags <flags> or --system-launcher-flags=<flags>
481+
if (!strcmp(argv[argNum], CHPL_LAUNCHER_PASSTHROUGH_FLAG)) {
482+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, argv[argNum+1]);
483+
return 2;
484+
} else if (!strncmp(argv[argNum], CHPL_LAUNCHER_PASSTHROUGH_FLAG"=", strlen(CHPL_LAUNCHER_PASSTHROUGH_FLAG"="))) {
485+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, &(argv[argNum][strlen(CHPL_LAUNCHER_PASSTHROUGH_FLAG"=")]));
448486
return 1;
449487
}
450488

@@ -486,6 +524,13 @@ const argDescTuple_t* chpl_launch_get_help(void) {
486524
{ "",
487525
"(or use $CHPL_LAUNCHER_GPUS_PER_NODE)"
488526
},
527+
{
528+
CHPL_LAUNCHER_PASSTHROUGH_FLAG " <flags>",
529+
"specify additional flags to pass through to the launcher"
530+
},
531+
{ "",
532+
"(or use $CHPL_LAUNCHER_PASSTHROUGH_FLAGS)"
533+
},
489534
{ NULL, NULL },
490535
};
491536
return args;

runtime/src/launch/slurm-srun/launch-slurm-srun.c

Lines changed: 56 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#define CHPL_PARTITION_FLAG "--partition"
4040
#define CHPL_EXCLUDE_FLAG "--exclude"
4141
#define CHPL_GPUS_PER_NODE_FLAG "--gpus-per-node"
42+
#define CHPL_LAUNCHER_PASSTHROUGH_FLAG "--system-launcher-flags"
4243

4344

4445
static char* debug = NULL;
@@ -49,6 +50,8 @@ static char* partition = NULL;
4950
static char* reservation = NULL;
5051
static char* exclude = NULL;
5152
static char* gpusPerNode = NULL;
53+
static char** launcherPassthroughFlags = NULL;
54+
static int numLauncherPassthroughFlags = 0;
5255

5356
char* slurmFilename = NULL;
5457

@@ -94,6 +97,13 @@ static int nomultithread(int batch) {
9497
return 0;
9598
}
9699

100+
static void appendPassthroughFlag(char*** array, int* size, const char* flag) {
101+
*array = (char**)chpl_mem_realloc(*array, (*size + 1) * sizeof(char*),
102+
CHPL_RT_MD_COMMAND_BUFFER, -1, 0);
103+
(*array)[*size] = (char*)string_copy((char*)flag, -1, 0);
104+
(*size)++;
105+
}
106+
97107

98108
// Get the number of locales from the environment variable or if that is not
99109
// set just use sinfo to get the number of cpus and divide by the number
@@ -310,6 +320,12 @@ static char* chpl_launch_create_command(int argc, char* argv[],
310320
gpusPerNode = getenv("CHPL_LAUNCHER_GPUS_PER_NODE");
311321
}
312322

323+
// append any user specified passthrough flags to the list of flags to pass through
324+
char* passthroughFlagsEnv = getenv("CHPL_LAUNCHER_PASSTHROUGH_FLAGS");
325+
if (passthroughFlagsEnv) {
326+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, passthroughFlagsEnv);
327+
}
328+
313329
reservation = getenv("SLURM_RESERVATION");
314330

315331
// request exclusive node access by default, but allow user to override
@@ -446,6 +462,13 @@ static char* chpl_launch_create_command(int argc, char* argv[],
446462
fprintf(slurmFile, "#SBATCH --account=%s\n", account);
447463
}
448464

465+
// add any additional flags
466+
if (launcherPassthroughFlags != NULL) {
467+
for (i = 0; i < numLauncherPassthroughFlags; i++) {
468+
fprintf(slurmFile, "#SBATCH %s\n", launcherPassthroughFlags[i]);
469+
}
470+
}
471+
449472
// set the output file name to either the user specified
450473
// name or to the binaryName.<jobID>.out if none specified
451474
if (outputfn != NULL) {
@@ -616,6 +639,13 @@ static char* chpl_launch_create_command(int argc, char* argv[],
616639
chpl_append_to_cmd(&iCom, &len, "--account=%s ", account);
617640
}
618641

642+
// add any additional flags
643+
if (launcherPassthroughFlags != NULL) {
644+
for (i = 0; i < numLauncherPassthroughFlags; i++) {
645+
chpl_append_to_cmd(&iCom, &len, "%s ", launcherPassthroughFlags[i]);
646+
}
647+
}
648+
619649
// add the (possibly wrapped) binary name
620650
chpl_append_to_cmd(&iCom, &len, "%s %s ",
621651
chpl_get_real_binary_wrapper(), chpl_get_real_binary_name());
@@ -712,44 +742,53 @@ int chpl_launch_handle_arg(int argc, char* argv[], int argNum,
712742
if (!strcmp(argv[argNum], CHPL_WALLTIME_FLAG)) {
713743
walltime = argv[argNum+1];
714744
return 2;
715-
} else if (!strncmp(argv[argNum], CHPL_WALLTIME_FLAG"=", strlen(CHPL_WALLTIME_FLAG))) {
716-
walltime = &(argv[argNum][strlen(CHPL_WALLTIME_FLAG)+1]);
745+
} else if (!strncmp(argv[argNum], CHPL_WALLTIME_FLAG"=", strlen(CHPL_WALLTIME_FLAG"="))) {
746+
walltime = &(argv[argNum][strlen(CHPL_WALLTIME_FLAG"=")]);
717747
return 1;
718748
}
719749

720750
// handle --nodelist <nodelist> or --nodelist=<nodelist>
721751
if (!strcmp(argv[argNum], CHPL_NODELIST_FLAG)) {
722752
nodelist = argv[argNum+1];
723753
return 2;
724-
} else if (!strncmp(argv[argNum], CHPL_NODELIST_FLAG"=", strlen(CHPL_NODELIST_FLAG))) {
725-
nodelist = &(argv[argNum][strlen(CHPL_NODELIST_FLAG)+1]);
754+
} else if (!strncmp(argv[argNum], CHPL_NODELIST_FLAG"=", strlen(CHPL_NODELIST_FLAG"="))) {
755+
nodelist = &(argv[argNum][strlen(CHPL_NODELIST_FLAG"=")]);
726756
return 1;
727757
}
728758

729759
// handle --partition <partition> or --partition=<partition>
730760
if (!strcmp(argv[argNum], CHPL_PARTITION_FLAG)) {
731761
partition = argv[argNum+1];
732762
return 2;
733-
} else if (!strncmp(argv[argNum], CHPL_PARTITION_FLAG"=", strlen(CHPL_PARTITION_FLAG))) {
734-
partition = &(argv[argNum][strlen(CHPL_PARTITION_FLAG)+1]);
763+
} else if (!strncmp(argv[argNum], CHPL_PARTITION_FLAG"=", strlen(CHPL_PARTITION_FLAG"="))) {
764+
partition = &(argv[argNum][strlen(CHPL_PARTITION_FLAG"=")]);
735765
return 1;
736766
}
737767

738768
// handle --exclude <nodes> or --exclude=<nodes>
739769
if (!strcmp(argv[argNum], CHPL_EXCLUDE_FLAG)) {
740770
exclude = argv[argNum+1];
741771
return 2;
742-
} else if (!strncmp(argv[argNum], CHPL_EXCLUDE_FLAG"=", strlen(CHPL_EXCLUDE_FLAG))) {
743-
exclude = &(argv[argNum][strlen(CHPL_EXCLUDE_FLAG)+1]);
772+
} else if (!strncmp(argv[argNum], CHPL_EXCLUDE_FLAG"=", strlen(CHPL_EXCLUDE_FLAG"="))) {
773+
exclude = &(argv[argNum][strlen(CHPL_EXCLUDE_FLAG"=")]);
744774
return 1;
745775
}
746776

747777
// handle --gpus-per-node <gpus> or --gpus-per-node=<gpus>
748778
if (!strcmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG)) {
749779
gpusPerNode = argv[argNum+1];
750780
return 2;
751-
} else if (!strncmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG"=", strlen(CHPL_GPUS_PER_NODE_FLAG))) {
752-
gpusPerNode = &(argv[argNum][strlen(CHPL_GPUS_PER_NODE_FLAG)+1]);
781+
} else if (!strncmp(argv[argNum], CHPL_GPUS_PER_NODE_FLAG"=", strlen(CHPL_GPUS_PER_NODE_FLAG"="))) {
782+
gpusPerNode = &(argv[argNum][strlen(CHPL_GPUS_PER_NODE_FLAG"=")]);
783+
return 1;
784+
}
785+
786+
// handle --system-launcher-flags <flags> or --system-launcher-flags=<flags>
787+
if (!strcmp(argv[argNum], CHPL_LAUNCHER_PASSTHROUGH_FLAG)) {
788+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, argv[argNum+1]);
789+
return 2;
790+
} else if (!strncmp(argv[argNum], CHPL_LAUNCHER_PASSTHROUGH_FLAG"=", strlen(CHPL_LAUNCHER_PASSTHROUGH_FLAG"="))) {
791+
appendPassthroughFlag(&launcherPassthroughFlags, &numLauncherPassthroughFlags, &(argv[argNum][strlen(CHPL_LAUNCHER_PASSTHROUGH_FLAG"=")]));
753792
return 1;
754793
}
755794

@@ -803,6 +842,13 @@ const argDescTuple_t* chpl_launch_get_help(void) {
803842
{ "",
804843
"(or use $CHPL_LAUNCHER_GPUS_PER_NODE)"
805844
},
845+
{
846+
CHPL_LAUNCHER_PASSTHROUGH_FLAG " <flags>",
847+
"specify additional flags to pass through to the launcher"
848+
},
849+
{ "",
850+
"(or use $CHPL_LAUNCHER_PASSTHROUGH_FLAGS)"
851+
},
806852
{ NULL, NULL },
807853
};
808854
return args;

0 commit comments

Comments
 (0)