Skip to content

Commit 8e88834

Browse files
committed
Use iris bundle for Ray working_dir to include pb2 files
Extract create_workspace_zip() from BundleCreator and use it in fray v1/v2 backends and ray_run.py so that gitignored protobuf artifacts (pb2 files) are included in Ray job bundles.
1 parent 079e8cd commit 8e88834

5 files changed

Lines changed: 187 additions & 68 deletions

File tree

lib/fray/src/fray/v1/cluster/ray/cluster.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from ray.job_submission import JobStatus as RayJobStatus
1818
from ray.job_submission import JobSubmissionClient
1919

20+
from iris.cluster.client.bundle import create_workspace_zip
21+
2022
from fray.v1.cluster.base import (
2123
Cluster,
2224
EnvironmentConfig,
@@ -276,8 +278,12 @@ def _get_runtime_env(self, request: JobRequest) -> dict | None:
276278
pip_packages=list(environment.pip_packages),
277279
env_vars=env_vars,
278280
)
279-
runtime_env["working_dir"] = environment.workspace
280-
runtime_env["excludes"] = [".git", "tests/", "docs/", "**/*.pack"]
281+
runtime_env["working_dir"] = create_workspace_zip(
282+
environment.workspace,
283+
exclude_dirs={"tests", "docs"},
284+
exclude_extensions={".pack"},
285+
max_size_bytes=None,
286+
)
281287
runtime_env["config"] = {"setup_timeout_seconds": 1800}
282288
else:
283289
# No runtime package installation: rely on the existing environment.

lib/fray/src/fray/v2/ray_backend/backend.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from ray.job_submission import JobStatus as RayJobStatus
1818
from ray.job_submission import JobSubmissionClient
1919

20+
from iris.cluster.client.bundle import create_workspace_zip
21+
2022
from fray.v2.actor import (
2123
ActorContext,
2224
ActorFuture,
@@ -209,8 +211,12 @@ def build_runtime_env(request: JobRequest) -> dict:
209211
pip_packages=list(environment.pip_packages),
210212
env_vars=env_vars,
211213
)
212-
runtime_env["working_dir"] = environment.workspace
213-
runtime_env["excludes"] = [".git", "tests/", "docs/", "**/*.pack"]
214+
runtime_env["working_dir"] = create_workspace_zip(
215+
environment.workspace,
216+
exclude_dirs={"tests", "docs"},
217+
exclude_extensions={".pack"},
218+
max_size_bytes=None,
219+
)
214220
runtime_env["config"] = {"setup_timeout_seconds": 1800}
215221
else:
216222
python_path = build_python_path(submodules_dir=os.path.join(environment.workspace, "submodules"))

lib/iris/src/iris/cluster/client/bundle.py

Lines changed: 122 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -35,21 +35,49 @@
3535
"tests/snapshot",
3636
}
3737

38+
# Glob patterns for generated files that are gitignored but required at runtime.
39+
# These are produced by build hooks (e.g. hatch_build.py protobuf generation)
40+
# and must be included in task bundles so that `uv sync` inside containers can
41+
# skip regeneration.
42+
GENERATED_ARTIFACT_GLOBS = [
43+
"src/iris/rpc/*_pb2.py",
44+
"src/iris/rpc/*_pb2.pyi",
45+
"src/iris/rpc/*_connect.py",
46+
"lib/iris/src/iris/rpc/*_pb2.py",
47+
"lib/iris/src/iris/rpc/*_pb2.pyi",
48+
"lib/iris/src/iris/rpc/*_connect.py",
49+
]
3850

39-
def _should_exclude(relative: Path) -> bool:
51+
52+
def _should_exclude(
53+
relative: Path,
54+
extra_dirs: set[str] | None = None,
55+
extra_extensions: set[str] | None = None,
56+
extra_subpaths: set[str] | None = None,
57+
) -> bool:
4058
"""Check whether a relative path should be excluded from the bundle."""
41-
if relative.suffix in EXCLUDE_EXTENSIONS:
59+
all_extensions = EXCLUDE_EXTENSIONS | (extra_extensions or set())
60+
all_dirs = EXCLUDE_DIRS | (extra_dirs or set())
61+
all_subpaths = EXCLUDE_SUBPATHS | (extra_subpaths or set())
62+
63+
if relative.suffix in all_extensions:
4264
return True
4365
# e.g. foo.egg-info
4466
if any(part.endswith(".egg-info") for part in relative.parts):
4567
return True
46-
if any(part in EXCLUDE_DIRS for part in relative.parts):
68+
if any(part in all_dirs for part in relative.parts):
4769
return True
4870
rel_str = str(relative)
49-
return any(subpath in rel_str for subpath in EXCLUDE_SUBPATHS)
71+
return any(subpath in rel_str for subpath in all_subpaths)
5072

5173

52-
def _get_git_non_ignored_files(workspace: Path) -> set[Path] | None:
74+
def get_git_non_ignored_files(
75+
workspace: Path,
76+
*,
77+
exclude_dirs: set[str] | None = None,
78+
exclude_extensions: set[str] | None = None,
79+
exclude_subpaths: set[str] | None = None,
80+
) -> set[Path] | None:
5381
"""Get files that are not ignored by git.
5482
5583
Returns None if git is not available or this isn't a git repo.
@@ -63,41 +91,109 @@ def _get_git_non_ignored_files(workspace: Path) -> set[Path] | None:
6391
check=True,
6492
)
6593
files = [Path(f) for f in result.stdout.splitlines() if f]
66-
files = [f for f in files if not _should_exclude(f)]
94+
files = [f for f in files if not _should_exclude(f, exclude_dirs, exclude_extensions, exclude_subpaths)]
6795
return {workspace / f for f in files}
6896
except (subprocess.CalledProcessError, FileNotFoundError) as e:
6997
logger.debug("Git not available, using pattern-based exclusion: %s", e)
7098
return None
7199

72100

73-
# Glob patterns for generated files that are gitignored but required at runtime.
74-
# These are produced by build hooks (e.g. hatch_build.py protobuf generation)
75-
# and must be included in task bundles so that `uv sync` inside containers can
76-
# skip regeneration.
77-
_GENERATED_ARTIFACT_GLOBS = [
78-
"src/iris/rpc/*_pb2.py",
79-
"src/iris/rpc/*_pb2.pyi",
80-
"src/iris/rpc/*_connect.py",
81-
"lib/iris/src/iris/rpc/*_pb2.py",
82-
"lib/iris/src/iris/rpc/*_pb2.pyi",
83-
"lib/iris/src/iris/rpc/*_connect.py",
84-
]
85-
86-
87-
def _include_generated_build_artifacts(workspace: Path, files: set[Path]) -> None:
101+
def include_generated_build_artifacts(
102+
workspace: Path,
103+
files: set[Path],
104+
*,
105+
exclude_dirs: set[str] | None = None,
106+
exclude_extensions: set[str] | None = None,
107+
exclude_subpaths: set[str] | None = None,
108+
) -> None:
88109
"""Add generated build artifacts that exist on disk but are gitignored."""
89110
added = 0
90-
for pattern in _GENERATED_ARTIFACT_GLOBS:
111+
for pattern in GENERATED_ARTIFACT_GLOBS:
91112
for path in workspace.glob(pattern):
92-
if path.is_file() and path not in files and not _should_exclude(path.relative_to(workspace)):
113+
if (
114+
path.is_file()
115+
and path not in files
116+
and not _should_exclude(path.relative_to(workspace), exclude_dirs, exclude_extensions, exclude_subpaths)
117+
):
93118
files.add(path)
94119
added += 1
95120
if added:
96121
logger.debug("Included %d generated build artifact(s) in bundle", added)
97122

98123

124+
def create_workspace_zip(
125+
workspace: str | Path,
126+
*,
127+
exclude_dirs: set[str] | None = None,
128+
exclude_extensions: set[str] | None = None,
129+
exclude_subpaths: set[str] | None = None,
130+
max_size_bytes: int | None = MAX_BUNDLE_SIZE_BYTES,
131+
) -> str:
132+
"""Create a zip of the workspace suitable for Ray's working_dir or Iris bundles.
133+
134+
Uses git ls-files to determine which files to include (respecting .gitignore),
135+
then adds back generated protobuf artifacts that are gitignored but needed at
136+
runtime. When git is unavailable, falls back to pattern-based exclusion.
137+
138+
Args:
139+
workspace: Root directory to bundle.
140+
exclude_dirs: Additional directory names to exclude (merged with defaults).
141+
exclude_extensions: Additional file extensions to exclude (merged with defaults).
142+
exclude_subpaths: Additional subpath strings to exclude (merged with defaults).
143+
max_size_bytes: Maximum allowed zip size. Pass None to disable the check.
144+
145+
Returns:
146+
Path to the created zip file (in a temp directory; caller should not delete
147+
the parent directory while the zip is in use).
148+
"""
149+
workspace = Path(workspace)
150+
151+
git_files = get_git_non_ignored_files(
152+
workspace,
153+
exclude_dirs=exclude_dirs,
154+
exclude_extensions=exclude_extensions,
155+
exclude_subpaths=exclude_subpaths,
156+
)
157+
if git_files is not None:
158+
include_generated_build_artifacts(
159+
workspace,
160+
git_files,
161+
exclude_dirs=exclude_dirs,
162+
exclude_extensions=exclude_extensions,
163+
exclude_subpaths=exclude_subpaths,
164+
)
165+
166+
# Use a persistent temp directory (not a context manager) so the caller
167+
# can use the zip path after this function returns.
168+
td = tempfile.mkdtemp(prefix="workspace_zip_")
169+
zip_path = Path(td) / "workspace.zip"
170+
171+
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
172+
if git_files is not None:
173+
for file in git_files:
174+
if file.is_file():
175+
zf.write(file, file.relative_to(workspace))
176+
else:
177+
for file in workspace.rglob("*"):
178+
rel = file.relative_to(workspace)
179+
if file.is_file() and not _should_exclude(rel, exclude_dirs, exclude_extensions, exclude_subpaths):
180+
zf.write(file, rel)
181+
182+
if max_size_bytes is not None:
183+
zip_size = zip_path.stat().st_size
184+
if zip_size > max_size_bytes:
185+
zip_size_mb = zip_size / (1024 * 1024)
186+
max_size_mb = max_size_bytes / (1024 * 1024)
187+
raise ValueError(
188+
f"Bundle size {zip_size_mb:.1f}MB exceeds maximum {max_size_mb:.0f}MB. "
189+
"Consider excluding large files or using .gitignore."
190+
)
191+
192+
return str(zip_path)
193+
194+
99195
class BundleCreator:
100-
"""Helper for creating workspace bundles.
196+
"""Helper for creating workspace bundles for Iris job submission.
101197
102198
Bundles a user's workspace directory (containing pyproject.toml, uv.lock,
103199
and source code) into a zip file for job execution.
@@ -118,31 +214,5 @@ def create_bundle(self) -> bytes:
118214
Raises:
119215
ValueError: If bundle size exceeds MAX_BUNDLE_SIZE_BYTES
120216
"""
121-
git_files = _get_git_non_ignored_files(self._workspace)
122-
if git_files is not None:
123-
_include_generated_build_artifacts(self._workspace, git_files)
124-
125-
with tempfile.TemporaryDirectory(prefix="bundle_") as td:
126-
bundle_path = Path(td) / "bundle.zip"
127-
with zipfile.ZipFile(bundle_path, "w", zipfile.ZIP_DEFLATED) as zf:
128-
if git_files is not None:
129-
for file in git_files:
130-
if file.is_file():
131-
zf.write(file, file.relative_to(self._workspace))
132-
else:
133-
for file in self._workspace.rglob("*"):
134-
rel = file.relative_to(self._workspace)
135-
if file.is_file() and not _should_exclude(rel):
136-
zf.write(file, rel)
137-
138-
bundle_bytes = bundle_path.read_bytes()
139-
bundle_size_mb = len(bundle_bytes) / (1024 * 1024)
140-
max_size_mb = MAX_BUNDLE_SIZE_BYTES / (1024 * 1024)
141-
142-
if len(bundle_bytes) > MAX_BUNDLE_SIZE_BYTES:
143-
raise ValueError(
144-
f"Bundle size {bundle_size_mb:.1f}MB exceeds maximum {max_size_mb:.0f}MB. "
145-
"Consider excluding large files or using .gitignore."
146-
)
147-
148-
return bundle_bytes
217+
zip_path = create_workspace_zip(self._workspace, max_size_bytes=MAX_BUNDLE_SIZE_BYTES)
218+
return Path(zip_path).read_bytes()

lib/iris/tests/cluster/client/test_bundle.py

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
# Copyright The Marin Authors
22
# SPDX-License-Identifier: Apache-2.0
33

4-
"""Tests for BundleCreator."""
4+
"""Tests for BundleCreator and create_workspace_zip."""
55

66
import io
7+
import os
78
import zipfile
9+
from pathlib import Path
810
from unittest.mock import patch
911

1012
import pytest
1113

12-
from iris.cluster.client.bundle import MAX_BUNDLE_SIZE_BYTES, BundleCreator
14+
from iris.cluster.client.bundle import MAX_BUNDLE_SIZE_BYTES, BundleCreator, create_workspace_zip
1315

1416

1517
@pytest.fixture
@@ -24,7 +26,7 @@ def workspace(tmp_path):
2426

2527

2628
def test_bundle_creator_uses_fallback_when_git_unavailable(workspace):
27-
with patch("iris.cluster.client.bundle._get_git_non_ignored_files", return_value=None):
29+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files", return_value=None):
2830
creator = BundleCreator(workspace)
2931
bundle_bytes = creator.create_bundle()
3032

@@ -37,7 +39,7 @@ def test_bundle_creator_uses_fallback_when_git_unavailable(workspace):
3739

3840
def test_bundle_creator_uses_git_files_when_available(workspace):
3941
git_files = {workspace / "pyproject.toml", workspace / "src" / "main.py"}
40-
with patch("iris.cluster.client.bundle._get_git_non_ignored_files", return_value=git_files):
42+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files", return_value=git_files):
4143
creator = BundleCreator(workspace)
4244
bundle_bytes = creator.create_bundle()
4345

@@ -58,9 +60,9 @@ def test_bundle_includes_generated_proto_files(workspace):
5860

5961
# Simulate git ls-files returning only tracked files (not the generated ones)
6062
git_files = {workspace / "pyproject.toml", workspace / "src" / "main.py"}
61-
with patch("iris.cluster.client.bundle._get_git_non_ignored_files") as mock_git:
63+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files") as mock_git:
6264
# Call the real function's logic but with controlled git output,
63-
# then verify generated files are added via _include_generated_build_artifacts.
65+
# then verify generated files are added via include_generated_build_artifacts.
6466
mock_git.return_value = git_files
6567
creator = BundleCreator(workspace)
6668
bundle_bytes = creator.create_bundle()
@@ -74,14 +76,43 @@ def test_bundle_includes_generated_proto_files(workspace):
7476

7577
def test_bundle_creator_rejects_oversized_bundles(workspace):
7678
"""Test that bundles exceeding MAX_BUNDLE_SIZE_BYTES are rejected."""
77-
# Create a large file with random data that won't compress well
78-
import os
79-
8079
large_file = workspace / "large_file.bin"
8180
# Use urandom to create incompressible data
8281
large_file.write_bytes(os.urandom(MAX_BUNDLE_SIZE_BYTES + 1024 * 1024))
8382

84-
with patch("iris.cluster.client.bundle._get_git_non_ignored_files", return_value=None):
83+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files", return_value=None):
8584
creator = BundleCreator(workspace)
8685
with pytest.raises(ValueError, match=r"Bundle size .* exceeds maximum"):
8786
creator.create_bundle()
87+
88+
89+
def test_create_workspace_zip_returns_path_with_expected_files(workspace):
90+
"""create_workspace_zip returns a zip path containing workspace files, excluding __pycache__."""
91+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files", return_value=None):
92+
zip_path = create_workspace_zip(workspace)
93+
94+
assert Path(zip_path).exists()
95+
assert zip_path.endswith(".zip")
96+
97+
with zipfile.ZipFile(zip_path) as zf:
98+
names = zf.namelist()
99+
assert "pyproject.toml" in names
100+
assert "src/main.py" in names
101+
assert not any("__pycache__" in n for n in names)
102+
assert not any(n.endswith(".pyc") for n in names)
103+
104+
105+
def test_create_workspace_zip_with_custom_exclude_dirs(workspace):
106+
"""create_workspace_zip respects custom exclude_dirs in addition to the defaults."""
107+
(workspace / "experiments").mkdir()
108+
(workspace / "experiments" / "run.py").write_text("# experiment")
109+
(workspace / "src" / "core.py").write_text("# core")
110+
111+
with patch("iris.cluster.client.bundle.get_git_non_ignored_files", return_value=None):
112+
zip_path = create_workspace_zip(workspace, exclude_dirs={"experiments"})
113+
114+
with zipfile.ZipFile(zip_path) as zf:
115+
names = zf.namelist()
116+
assert "src/core.py" in names
117+
assert "pyproject.toml" in names
118+
assert not any("experiments" in n for n in names)

lib/marin/src/marin/run/ray_run.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from marin.cluster.config import find_config_by_region
3030
from fray.v1.cluster.ray import DashboardConfig, ray_dashboard
3131
from fray.v1.cluster.ray.deps import build_runtime_env_for_packages, accelerator_type_from_extra, AcceleratorType
32+
from iris.cluster.client.bundle import create_workspace_zip
3233
from iris.logging import configure_logging
3334

3435
logger = logging.getLogger(__name__)
@@ -156,9 +157,14 @@ async def submit_and_track_job(
156157
logger.info(f"env_vars: {json.dumps(env_vars, indent=4)}")
157158

158159
runtime_dict = {
159-
"working_dir": current_dir,
160+
"working_dir": create_workspace_zip(
161+
current_dir,
162+
exclude_dirs={"docs"},
163+
exclude_extensions={".pack"},
164+
exclude_subpaths={"lib/levanter/docs"},
165+
max_size_bytes=None,
166+
),
160167
"config": {"setup_timeout_seconds": 1800},
161-
"excludes": [".git", "docs/", "**/*.pack", "lib/levanter/docs"],
162168
}
163169

164170
# add the TPU dependency for cluster jobs.

0 commit comments

Comments
 (0)