From 8e5c2031d5511bb8b1a18cefc62c767d1d1ab970 Mon Sep 17 00:00:00 2001 From: Allan Pinto Date: Mon, 20 Oct 2025 06:49:38 -0300 Subject: [PATCH 1/5] feat(copy): add exclude_from parameter for rsync-based copy in Singularity builds --- hpccm/primitives/copy.py | 34 ++++++++++++++++++++++++++++++++++ test/test_copy.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/hpccm/primitives/copy.py b/hpccm/primitives/copy.py index 11b6bfc9..cdb261d9 100644 --- a/hpccm/primitives/copy.py +++ b/hpccm/primitives/copy.py @@ -66,6 +66,17 @@ class copy(object): src: A file, or a list of files, to copy + exclude_from: String or list of strings. One or more filenames + containing rsync-style exclude patterns (e.g., `.apptainerignore`). + Only used when building for Singularity or Apptainer. If specified, + the copy operation is emitted in the `%setup` section using + `rsync --exclude-from=` rather than the standard `%files` + copy directive. This enables selective exclusion of files and + directories during the image build, for example to omit large data + files, caches, or temporary artifacts. Multiple exclusion files may + be provided as a list or tuple. The default is an empty list + (Singularity specific). + # Examples ```python @@ -80,6 +91,10 @@ class copy(object): copy(files={'a': '/tmp/a', 'b': '/opt/b'}) ``` + ```python + copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + ``` + """ def __init__(self, **kwargs): @@ -96,6 +111,14 @@ def __init__(self, **kwargs): self._post = kwargs.get('_post', '') # Singularity specific self.__src = kwargs.get('src', '') + ef = kwargs.get('exclude_from', None) + if ef is None: + self.__exclude_from = [] + elif isinstance(ef, (list, tuple)): + self.__exclude_from = list(ef) + else: + self.__exclude_from = [ef] + if self._mkdir and self._post: logging.error('_mkdir and _post are mutually exclusive!') self._post = False # prefer _mkdir @@ -179,6 +202,10 @@ def __str__(self): else: logging.warning(msg) + # If exclusion list is defined, switch to rsync copy method + if self.__exclude_from: + logging.info('copy: using rsync with exclude-from %s', self.__exclude_from) + # Format: # %files # src1 dest @@ -211,6 +238,13 @@ def __str__(self): dest = pair['dest'] src = pair['src'] + # Use rsync if exclusion file provided and not multi-stage copy + if self.__exclude_from and not self.__from: + excl_opts = ' '.join('--exclude-from={}'.format(x) for x in self.__exclude_from) + pre.append(' mkdir -p ${{SINGULARITY_ROOTFS}}{0}'.format(dest)) + pre.append(' rsync -av {0} {1}/ ${{SINGULARITY_ROOTFS}}{2}/'.format(excl_opts, src, dest)) + continue + if self._post: dest = '/' diff --git a/test/test_copy.py b/test/test_copy.py index 73bb99b7..2abbb982 100644 --- a/test/test_copy.py +++ b/test/test_copy.py @@ -255,3 +255,38 @@ def test_from_temp_staging(self): """Singularity files from previous stage in tmp""" c = copy(_from='base', src='foo', dest='/var/tmp/foo') self.assertEqual(str(c), '%files from base\n foo /var/tmp/foo') + + @singularity + def test_exclude_from_single_singularity(self): + """rsync-based copy with exclude_from (single source)""" + c = copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + recipe = str(c) + self.assertIn('%setup', recipe) + self.assertIn('rsync -av', recipe) + self.assertIn('--exclude-from=.apptainerignore', recipe) + # Allow trailing %files section but ensure rsync setup comes first + self.assertTrue(recipe.strip().startswith('%setup'), + "Expected rsync setup section to appear first") + + @singularity + def test_exclude_from_multiple_singularity(self): + """rsync-based copy with multiple exclude_from files""" + c = copy(src='data', dest='/opt/data', + exclude_from=['.ignore1', '.ignore2']) + recipe = str(c) + self.assertIn('%setup', recipe) + self.assertIn('rsync -av', recipe) + self.assertIn('--exclude-from=.ignore1', recipe) + self.assertIn('--exclude-from=.ignore2', recipe) + # Ensure setup section appears before %files + self.assertTrue(recipe.strip().startswith('%setup'), + "Expected rsync setup section to appear first") + + @docker + def test_exclude_from_docker_ignored(self): + """exclude_from ignored in Docker context""" + c = copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + recipe = str(c) + self.assertIn('COPY', recipe) + self.assertNotIn('rsync', recipe) + self.assertNotIn('%setup', recipe) From 29ddc697586995824cb322f144cccc6d9d46528b Mon Sep 17 00:00:00 2001 From: Allan Pinto Date: Tue, 21 Oct 2025 09:44:25 -0300 Subject: [PATCH 2/5] refactor(copy): rename exclude_from to _exclude_from and reorder docstring alphabetically Renamed the parameter `exclude_from` to `_exclude_from` to follow HPCCM convention that container framework-specific options begin with an underscore (e.g., `_chown`, `_mkdir`, `_post`). Also reordered the parameter documentation block in `copy.py` to maintain alphabetical order within the class docstring for consistency. --- hpccm/primitives/copy.py | 26 +++++++++++++------------- test/test_copy.py | 12 ++++++------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/hpccm/primitives/copy.py b/hpccm/primitives/copy.py index cdb261d9..adad9291 100644 --- a/hpccm/primitives/copy.py +++ b/hpccm/primitives/copy.py @@ -44,6 +44,17 @@ class copy(object): dest: Path in the container image to copy the file(s) + _exclude_from: String or list of strings. One or more filenames + containing rsync-style exclude patterns (e.g., `.apptainerignore`). + Only used when building for Singularity or Apptainer. If specified, + the copy operation is emitted in the `%setup` section using + `rsync --exclude-from=` rather than the standard `%files` + copy directive. This enables selective exclusion of files and + directories during the image build, for example to omit large data + files, caches, or temporary artifacts. Multiple exclusion files may + be provided as a list or tuple. The default is an empty list + (Singularity specific). + files: A dictionary of file pairs, source and destination, to copy into the container image. If specified, has precedence over `dest` and `src`. @@ -66,17 +77,6 @@ class copy(object): src: A file, or a list of files, to copy - exclude_from: String or list of strings. One or more filenames - containing rsync-style exclude patterns (e.g., `.apptainerignore`). - Only used when building for Singularity or Apptainer. If specified, - the copy operation is emitted in the `%setup` section using - `rsync --exclude-from=` rather than the standard `%files` - copy directive. This enables selective exclusion of files and - directories during the image build, for example to omit large data - files, caches, or temporary artifacts. Multiple exclusion files may - be provided as a list or tuple. The default is an empty list - (Singularity specific). - # Examples ```python @@ -92,7 +92,7 @@ class copy(object): ``` ```python - copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + copy(src='.', dest='/opt/app', _exclude_from='.apptainerignore') ``` """ @@ -111,7 +111,7 @@ def __init__(self, **kwargs): self._post = kwargs.get('_post', '') # Singularity specific self.__src = kwargs.get('src', '') - ef = kwargs.get('exclude_from', None) + ef = kwargs.get('_exclude_from', None) if ef is None: self.__exclude_from = [] elif isinstance(ef, (list, tuple)): diff --git a/test/test_copy.py b/test/test_copy.py index 2abbb982..f590e6ea 100644 --- a/test/test_copy.py +++ b/test/test_copy.py @@ -258,8 +258,8 @@ def test_from_temp_staging(self): @singularity def test_exclude_from_single_singularity(self): - """rsync-based copy with exclude_from (single source)""" - c = copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + """rsync-based copy with _exclude_from (single source)""" + c = copy(src='.', dest='/opt/app', _exclude_from='.apptainerignore') recipe = str(c) self.assertIn('%setup', recipe) self.assertIn('rsync -av', recipe) @@ -270,9 +270,9 @@ def test_exclude_from_single_singularity(self): @singularity def test_exclude_from_multiple_singularity(self): - """rsync-based copy with multiple exclude_from files""" + """rsync-based copy with multiple _exclude_from files""" c = copy(src='data', dest='/opt/data', - exclude_from=['.ignore1', '.ignore2']) + _exclude_from=['.ignore1', '.ignore2']) recipe = str(c) self.assertIn('%setup', recipe) self.assertIn('rsync -av', recipe) @@ -284,8 +284,8 @@ def test_exclude_from_multiple_singularity(self): @docker def test_exclude_from_docker_ignored(self): - """exclude_from ignored in Docker context""" - c = copy(src='.', dest='/opt/app', exclude_from='.apptainerignore') + """_exclude_from ignored in Docker context""" + c = copy(src='.', dest='/opt/app', _exclude_from='.apptainerignore') recipe = str(c) self.assertIn('COPY', recipe) self.assertNotIn('rsync', recipe) From 26231b76509e37d553b9bda3fa0fd65eb2222f67 Mon Sep 17 00:00:00 2001 From: Allan Pinto Date: Tue, 21 Oct 2025 13:02:12 -0300 Subject: [PATCH 3/5] chore(copy): remove unnecessary info log for rsync exclude_from Removes the redundant `logging.info()` statement in the rsync exclusion branch of the copy primitive to keep logging output minimal and consistent with other primitives. --- hpccm/primitives/copy.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hpccm/primitives/copy.py b/hpccm/primitives/copy.py index adad9291..208f92cd 100644 --- a/hpccm/primitives/copy.py +++ b/hpccm/primitives/copy.py @@ -202,10 +202,6 @@ def __str__(self): else: logging.warning(msg) - # If exclusion list is defined, switch to rsync copy method - if self.__exclude_from: - logging.info('copy: using rsync with exclude-from %s', self.__exclude_from) - # Format: # %files # src1 dest From aa7bbf43c9e0025effb20be906f29d522ae3b60f Mon Sep 17 00:00:00 2001 From: Allan Pinto Date: Tue, 21 Oct 2025 13:05:33 -0300 Subject: [PATCH 4/5] test(copy): improve _exclude_from tests Refactors the _exclude_from tests to use assertEqual() with full expected recipe strings instead of multiple substring checks. This aligns the test style with other HPCCM copy primitive tests. Note: When `_exclude_from` is used, an empty %files section is still emitted after the rsync-based %setup block. This is intentional to preserve compatibility with the existing copy control flow. The extra section is harmless and may be removed in a future cleanup. --- test/test_copy.py | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/test/test_copy.py b/test/test_copy.py index f590e6ea..a695bcd2 100644 --- a/test/test_copy.py +++ b/test/test_copy.py @@ -260,33 +260,27 @@ def test_from_temp_staging(self): def test_exclude_from_single_singularity(self): """rsync-based copy with _exclude_from (single source)""" c = copy(src='.', dest='/opt/app', _exclude_from='.apptainerignore') - recipe = str(c) - self.assertIn('%setup', recipe) - self.assertIn('rsync -av', recipe) - self.assertIn('--exclude-from=.apptainerignore', recipe) - # Allow trailing %files section but ensure rsync setup comes first - self.assertTrue(recipe.strip().startswith('%setup'), - "Expected rsync setup section to appear first") + self.assertEqual(str(c), +r'''%setup + mkdir -p ${SINGULARITY_ROOTFS}/opt/app + rsync -av --exclude-from=.apptainerignore ./ ${SINGULARITY_ROOTFS}/opt/app/ +%files +''') @singularity def test_exclude_from_multiple_singularity(self): """rsync-based copy with multiple _exclude_from files""" c = copy(src='data', dest='/opt/data', _exclude_from=['.ignore1', '.ignore2']) - recipe = str(c) - self.assertIn('%setup', recipe) - self.assertIn('rsync -av', recipe) - self.assertIn('--exclude-from=.ignore1', recipe) - self.assertIn('--exclude-from=.ignore2', recipe) - # Ensure setup section appears before %files - self.assertTrue(recipe.strip().startswith('%setup'), - "Expected rsync setup section to appear first") + self.assertEqual(str(c), +r'''%setup + mkdir -p ${SINGULARITY_ROOTFS}/opt/data + rsync -av --exclude-from=.ignore1 --exclude-from=.ignore2 data/ ${SINGULARITY_ROOTFS}/opt/data/ +%files +''') @docker def test_exclude_from_docker_ignored(self): """_exclude_from ignored in Docker context""" c = copy(src='.', dest='/opt/app', _exclude_from='.apptainerignore') - recipe = str(c) - self.assertIn('COPY', recipe) - self.assertNotIn('rsync', recipe) - self.assertNotIn('%setup', recipe) + self.assertEqual(str(c), 'COPY . /opt/app') From aa30380a4ab13bb7b963d4d5a8a0c834abc57a7c Mon Sep 17 00:00:00 2001 From: Allan Pinto Date: Wed, 22 Oct 2025 10:12:58 -0300 Subject: [PATCH 5/5] refactor(copy): simplify _exclude_from initialization with default [] --- hpccm/primitives/copy.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hpccm/primitives/copy.py b/hpccm/primitives/copy.py index 208f92cd..51bbfeab 100644 --- a/hpccm/primitives/copy.py +++ b/hpccm/primitives/copy.py @@ -111,12 +111,10 @@ def __init__(self, **kwargs): self._post = kwargs.get('_post', '') # Singularity specific self.__src = kwargs.get('src', '') - ef = kwargs.get('_exclude_from', None) - if ef is None: - self.__exclude_from = [] - elif isinstance(ef, (list, tuple)): + ef = kwargs.get('_exclude_from', []) + if isinstance(ef, (list, tuple)): self.__exclude_from = list(ef) - else: + elif ef: self.__exclude_from = [ef] if self._mkdir and self._post: