2525#include <unistd.h>
2626
2727#include <slurm/spank.h>
28+ #include <slurm/slurm_version.h>
2829
2930#include "pyxis_slurmstepd.h"
3031#include "common.h"
@@ -1458,15 +1459,45 @@ static int enroot_export(void)
14581459 return (0 );
14591460}
14601461
1462+ static int enroot_cleanup (void )
1463+ {
1464+ int ret ;
1465+ int rv = 0 ;
1466+
1467+ /* Need to remove the temporary squashfs if the task was interrupted before cleanup. */
1468+ if (context .container .use_enroot_import && context .container .squashfs_path != NULL )
1469+ unlink (context .container .squashfs_path );
1470+
1471+ if (context .container .use_importer ) {
1472+ ret = importer_exec_release (context .config .importer_path , context .job .uid , context .job .gid ,
1473+ enroot_set_env );
1474+ if (ret < 0 ) {
1475+ slurm_info ("pyxis: failed to call importer release" );
1476+ rv = -1 ;
1477+ }
1478+ }
1479+
1480+ if (context .container .temporary_rootfs ) {
1481+ slurm_info ("pyxis: removing container filesystem: %s" , context .container .name );
1482+
1483+ ret = enroot_exec_wait_ctx ((char * const []){ "enroot" , "remove" , "-f" , context .container .name , NULL });
1484+ if (ret < 0 ) {
1485+ slurm_info ("pyxis: failed to remove container filesystem: %s" , context .container .name );
1486+ rv = -1 ;
1487+ }
1488+ }
1489+
1490+ return (rv );
1491+ }
1492+
14611493int slurm_spank_task_exit (spank_t sp , int ac , char * * av )
14621494{
14631495 int ret ;
1464- int rv = -1 ;
1496+ int rv = 0 ;
14651497
14661498 if (!context .enabled )
14671499 return (0 );
14681500
1469- rv = 0 ;
14701501 /* Last task to exit does the container export and/or container cleanup, if needed. */
14711502 if (atomic_fetch_add (& context .shm -> completed_tasks , 1 ) == context .job .local_task_count - 1 ) {
14721503 ret = enroot_export ();
@@ -1475,25 +1506,9 @@ int slurm_spank_task_exit(spank_t sp, int ac, char **av)
14751506 rv = -1 ;
14761507 }
14771508
1478- /* Need to remove the temporary squashfs if the task was interrupted before cleanup. */
1479- if (context .container .use_enroot_import && context .container .squashfs_path != NULL )
1480- unlink (context .container .squashfs_path );
1481-
1482- if (context .container .use_importer ) {
1483- ret = importer_exec_release (context .config .importer_path , context .job .uid , context .job .gid ,
1484- enroot_set_env );
1485- if (ret < 0 )
1486- slurm_info ("pyxis: failed to call importer release" );
1487- }
1488-
1489- if (context .container .temporary_rootfs ) {
1490- slurm_info ("pyxis: removing container filesystem: %s" , context .container .name );
1491-
1492- ret = enroot_exec_wait_ctx ((char * const []){ "enroot" , "remove" , "-f" , context .container .name , NULL });
1493- if (ret < 0 )
1494- slurm_info ("pyxis: failed to remove container filesystem: %s" , context .container .name );
1495- }
1496-
1509+ /* Slurm < 25.05: do cleanup here, before pam_finish: https://support.schedmd.com/show_bug.cgi?id=19362 */
1510+ if (SLURM_VERSION_NUMBER < SLURM_VERSION_NUM (25 , 5 , 0 ))
1511+ enroot_cleanup ();
14971512 }
14981513
14991514 return (rv );
@@ -1504,6 +1519,11 @@ int pyxis_slurmstepd_exit(spank_t sp, int ac, char **av)
15041519 int ret ;
15051520 int rv = 0 ;
15061521
1522+ if (context .enabled ) {
1523+ if (SLURM_VERSION_NUMBER >= SLURM_VERSION_NUM (25 , 5 , 0 ))
1524+ enroot_cleanup ();
1525+ }
1526+
15071527 free (context .container .name );
15081528 free (context .container .squashfs_path );
15091529 free (context .container .save_path );
0 commit comments