3535 "tests/snapshot" ,
3636}
3737
38+ # Glob patterns for generated files that are gitignored but required at runtime.
39+ # These are produced by build hooks (e.g. hatch_build.py protobuf generation)
40+ # and must be included in task bundles so that `uv sync` inside containers can
41+ # skip regeneration.
42+ GENERATED_ARTIFACT_GLOBS = [
43+ "src/iris/rpc/*_pb2.py" ,
44+ "src/iris/rpc/*_pb2.pyi" ,
45+ "src/iris/rpc/*_connect.py" ,
46+ "lib/iris/src/iris/rpc/*_pb2.py" ,
47+ "lib/iris/src/iris/rpc/*_pb2.pyi" ,
48+ "lib/iris/src/iris/rpc/*_connect.py" ,
49+ ]
3850
39- def _should_exclude (relative : Path ) -> bool :
51+
52+ def _should_exclude (
53+ relative : Path ,
54+ extra_dirs : set [str ] | None = None ,
55+ extra_extensions : set [str ] | None = None ,
56+ extra_subpaths : set [str ] | None = None ,
57+ ) -> bool :
4058 """Check whether a relative path should be excluded from the bundle."""
41- if relative .suffix in EXCLUDE_EXTENSIONS :
59+ all_extensions = EXCLUDE_EXTENSIONS | (extra_extensions or set ())
60+ all_dirs = EXCLUDE_DIRS | (extra_dirs or set ())
61+ all_subpaths = EXCLUDE_SUBPATHS | (extra_subpaths or set ())
62+
63+ if relative .suffix in all_extensions :
4264 return True
4365 # e.g. foo.egg-info
4466 if any (part .endswith (".egg-info" ) for part in relative .parts ):
4567 return True
46- if any (part in EXCLUDE_DIRS for part in relative .parts ):
68+ if any (part in all_dirs for part in relative .parts ):
4769 return True
4870 rel_str = str (relative )
49- return any (subpath in rel_str for subpath in EXCLUDE_SUBPATHS )
71+ return any (subpath in rel_str for subpath in all_subpaths )
5072
5173
52- def _get_git_non_ignored_files (workspace : Path ) -> set [Path ] | None :
74+ def get_git_non_ignored_files (
75+ workspace : Path ,
76+ * ,
77+ exclude_dirs : set [str ] | None = None ,
78+ exclude_extensions : set [str ] | None = None ,
79+ exclude_subpaths : set [str ] | None = None ,
80+ ) -> set [Path ] | None :
5381 """Get files that are not ignored by git.
5482
5583 Returns None if git is not available or this isn't a git repo.
@@ -63,41 +91,109 @@ def _get_git_non_ignored_files(workspace: Path) -> set[Path] | None:
6391 check = True ,
6492 )
6593 files = [Path (f ) for f in result .stdout .splitlines () if f ]
66- files = [f for f in files if not _should_exclude (f )]
94+ files = [f for f in files if not _should_exclude (f , exclude_dirs , exclude_extensions , exclude_subpaths )]
6795 return {workspace / f for f in files }
6896 except (subprocess .CalledProcessError , FileNotFoundError ) as e :
6997 logger .debug ("Git not available, using pattern-based exclusion: %s" , e )
7098 return None
7199
72100
73- # Glob patterns for generated files that are gitignored but required at runtime.
74- # These are produced by build hooks (e.g. hatch_build.py protobuf generation)
75- # and must be included in task bundles so that `uv sync` inside containers can
76- # skip regeneration.
77- _GENERATED_ARTIFACT_GLOBS = [
78- "src/iris/rpc/*_pb2.py" ,
79- "src/iris/rpc/*_pb2.pyi" ,
80- "src/iris/rpc/*_connect.py" ,
81- "lib/iris/src/iris/rpc/*_pb2.py" ,
82- "lib/iris/src/iris/rpc/*_pb2.pyi" ,
83- "lib/iris/src/iris/rpc/*_connect.py" ,
84- ]
85-
86-
87- def _include_generated_build_artifacts (workspace : Path , files : set [Path ]) -> None :
101+ def include_generated_build_artifacts (
102+ workspace : Path ,
103+ files : set [Path ],
104+ * ,
105+ exclude_dirs : set [str ] | None = None ,
106+ exclude_extensions : set [str ] | None = None ,
107+ exclude_subpaths : set [str ] | None = None ,
108+ ) -> None :
88109 """Add generated build artifacts that exist on disk but are gitignored."""
89110 added = 0
90- for pattern in _GENERATED_ARTIFACT_GLOBS :
111+ for pattern in GENERATED_ARTIFACT_GLOBS :
91112 for path in workspace .glob (pattern ):
92- if path .is_file () and path not in files and not _should_exclude (path .relative_to (workspace )):
113+ if (
114+ path .is_file ()
115+ and path not in files
116+ and not _should_exclude (path .relative_to (workspace ), exclude_dirs , exclude_extensions , exclude_subpaths )
117+ ):
93118 files .add (path )
94119 added += 1
95120 if added :
96121 logger .debug ("Included %d generated build artifact(s) in bundle" , added )
97122
98123
124+ def create_workspace_zip (
125+ workspace : str | Path ,
126+ * ,
127+ exclude_dirs : set [str ] | None = None ,
128+ exclude_extensions : set [str ] | None = None ,
129+ exclude_subpaths : set [str ] | None = None ,
130+ max_size_bytes : int | None = MAX_BUNDLE_SIZE_BYTES ,
131+ ) -> str :
132+ """Create a zip of the workspace suitable for Ray's working_dir or Iris bundles.
133+
134+ Uses git ls-files to determine which files to include (respecting .gitignore),
135+ then adds back generated protobuf artifacts that are gitignored but needed at
136+ runtime. When git is unavailable, falls back to pattern-based exclusion.
137+
138+ Args:
139+ workspace: Root directory to bundle.
140+ exclude_dirs: Additional directory names to exclude (merged with defaults).
141+ exclude_extensions: Additional file extensions to exclude (merged with defaults).
142+ exclude_subpaths: Additional subpath strings to exclude (merged with defaults).
143+ max_size_bytes: Maximum allowed zip size. Pass None to disable the check.
144+
145+ Returns:
146+ Path to the created zip file (in a temp directory; caller should not delete
147+ the parent directory while the zip is in use).
148+ """
149+ workspace = Path (workspace )
150+
151+ git_files = get_git_non_ignored_files (
152+ workspace ,
153+ exclude_dirs = exclude_dirs ,
154+ exclude_extensions = exclude_extensions ,
155+ exclude_subpaths = exclude_subpaths ,
156+ )
157+ if git_files is not None :
158+ include_generated_build_artifacts (
159+ workspace ,
160+ git_files ,
161+ exclude_dirs = exclude_dirs ,
162+ exclude_extensions = exclude_extensions ,
163+ exclude_subpaths = exclude_subpaths ,
164+ )
165+
166+ # Use a persistent temp directory (not a context manager) so the caller
167+ # can use the zip path after this function returns.
168+ td = tempfile .mkdtemp (prefix = "workspace_zip_" )
169+ zip_path = Path (td ) / "workspace.zip"
170+
171+ with zipfile .ZipFile (zip_path , "w" , zipfile .ZIP_DEFLATED ) as zf :
172+ if git_files is not None :
173+ for file in git_files :
174+ if file .is_file ():
175+ zf .write (file , file .relative_to (workspace ))
176+ else :
177+ for file in workspace .rglob ("*" ):
178+ rel = file .relative_to (workspace )
179+ if file .is_file () and not _should_exclude (rel , exclude_dirs , exclude_extensions , exclude_subpaths ):
180+ zf .write (file , rel )
181+
182+ if max_size_bytes is not None :
183+ zip_size = zip_path .stat ().st_size
184+ if zip_size > max_size_bytes :
185+ zip_size_mb = zip_size / (1024 * 1024 )
186+ max_size_mb = max_size_bytes / (1024 * 1024 )
187+ raise ValueError (
188+ f"Bundle size { zip_size_mb :.1f} MB exceeds maximum { max_size_mb :.0f} MB. "
189+ "Consider excluding large files or using .gitignore."
190+ )
191+
192+ return str (zip_path )
193+
194+
99195class BundleCreator :
100- """Helper for creating workspace bundles.
196+ """Helper for creating workspace bundles for Iris job submission .
101197
102198 Bundles a user's workspace directory (containing pyproject.toml, uv.lock,
103199 and source code) into a zip file for job execution.
@@ -118,31 +214,5 @@ def create_bundle(self) -> bytes:
118214 Raises:
119215 ValueError: If bundle size exceeds MAX_BUNDLE_SIZE_BYTES
120216 """
121- git_files = _get_git_non_ignored_files (self ._workspace )
122- if git_files is not None :
123- _include_generated_build_artifacts (self ._workspace , git_files )
124-
125- with tempfile .TemporaryDirectory (prefix = "bundle_" ) as td :
126- bundle_path = Path (td ) / "bundle.zip"
127- with zipfile .ZipFile (bundle_path , "w" , zipfile .ZIP_DEFLATED ) as zf :
128- if git_files is not None :
129- for file in git_files :
130- if file .is_file ():
131- zf .write (file , file .relative_to (self ._workspace ))
132- else :
133- for file in self ._workspace .rglob ("*" ):
134- rel = file .relative_to (self ._workspace )
135- if file .is_file () and not _should_exclude (rel ):
136- zf .write (file , rel )
137-
138- bundle_bytes = bundle_path .read_bytes ()
139- bundle_size_mb = len (bundle_bytes ) / (1024 * 1024 )
140- max_size_mb = MAX_BUNDLE_SIZE_BYTES / (1024 * 1024 )
141-
142- if len (bundle_bytes ) > MAX_BUNDLE_SIZE_BYTES :
143- raise ValueError (
144- f"Bundle size { bundle_size_mb :.1f} MB exceeds maximum { max_size_mb :.0f} MB. "
145- "Consider excluding large files or using .gitignore."
146- )
147-
148- return bundle_bytes
217+ zip_path = create_workspace_zip (self ._workspace , max_size_bytes = MAX_BUNDLE_SIZE_BYTES )
218+ return Path (zip_path ).read_bytes ()
0 commit comments