Skip to content

Commit 05621b1

Browse files
committed
fix: project-name SIF, quoted paths, cleaner verify, lock hint
- SIF named <project>-<sha>.sif, not train-<sha>.sif - container_job.sh: quote all variable paths, bind /project on DRAC - Simplify verify command (no triple-escape) - Hint 'uv lock' when uv export --locked fails due to stale lock - Drop unnecessary TYPE_CHECKING import
1 parent 1678ed4 commit 05621b1

2 files changed

Lines changed: 26 additions & 20 deletions

File tree

cluv/cli/build.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,12 @@
88

99
import logging
1010
from pathlib import Path, PurePosixPath
11-
from typing import TYPE_CHECKING
1211

1312
from cluv.cli.login import login
1413
from cluv.cli.sync import sync
1514
from cluv.config import ContainerConfig, find_pyproject, get_config
1615
from cluv.utils import console
1716

18-
if TYPE_CHECKING:
19-
from cluv.remote import Remote
20-
2117
logger = logging.getLogger(__name__)
2218

2319
__all__ = ["build"]
@@ -81,15 +77,23 @@ async def build(cluster: str, extra: str | None = None, no_sync: bool = False) -
8177
project_path = PurePosixPath(find_pyproject().parent.relative_to(Path.home()))
8278

8379
console.print("[bold]Exporting pinned requirements from uv.lock...[/bold]")
84-
uv_extra = f"--extra {extra}" if extra else ""
85-
export_cmd = (
86-
f"bash -l -c 'cd ~/{project_path} && "
87-
f"uv export --locked --no-dev --no-hashes --no-annotate --no-header --no-emit-project "
88-
f"{uv_extra} --format requirements-txt'"
89-
)
80+
export_parts = [
81+
"uv export --locked --no-dev --no-hashes --no-annotate --no-header --no-emit-project",
82+
]
83+
if extra:
84+
export_parts.append(f"--extra {extra}")
85+
export_parts.append("--format requirements-txt")
86+
export_cmd = f"bash -l -c 'cd ~/{project_path} && {' '.join(export_parts)}'"
9087
result = await remote.run(export_cmd, display=True, hide="out")
9188
if result.returncode != 0:
92-
console.print(f"[red]uv export failed: {result.stderr}[/red]")
89+
stderr = result.stderr.strip()
90+
if "locked" in stderr.lower() or "lock" in stderr.lower():
91+
console.print(
92+
"[red]uv.lock is out of sync with pyproject.toml. "
93+
"Run 'uv lock' locally, commit, and try again.[/red]"
94+
)
95+
else:
96+
console.print(f"[red]uv export failed: {stderr}[/red]")
9397
return None
9498
requirements = result.stdout
9599

@@ -107,7 +111,8 @@ async def build(cluster: str, extra: str | None = None, no_sync: bool = False) -
107111
git_sha = await remote.get_output(
108112
f"git -C ~/{project_path} rev-parse --short HEAD",
109113
)
110-
sif_name = f"train-{git_sha}.sif"
114+
project_name = find_pyproject().parent.name
115+
sif_name = f"{project_name}-{git_sha}.sif"
111116
deploy_path = container.deploy_path
112117

113118
console.print("[bold]Building container (this may take several minutes)...[/bold]")
@@ -131,11 +136,11 @@ async def build(cluster: str, extra: str | None = None, no_sync: bool = False) -
131136

132137
# Verify the image loads before deploying.
133138
console.print("[bold]Verifying container...[/bold]")
139+
verify_script = "import sys; sys.exit(0)"
134140
verify_cmd = (
135141
f"bash -l -c '"
136142
f"module load apptainer 2>/dev/null || true; "
137-
f"apptainer exec /tmp/cluv-build/{sif_name} "
138-
f"python -c \"import importlib.metadata; print(\\\"verify OK\\\")\"'"
143+
f"apptainer exec /tmp/cluv-build/{sif_name} python -c \"{verify_script}\"'"
139144
)
140145
result = await remote.run(verify_cmd, display=True, hide="out")
141146
if result.returncode != 0:
@@ -166,5 +171,5 @@ async def build(cluster: str, extra: str | None = None, no_sync: bool = False) -
166171
return sif_path
167172

168173

169-
async def _cleanup_build_dir(remote: "Remote") -> None:
174+
async def _cleanup_build_dir(remote) -> None:
170175
await remote.run("rm -rf /tmp/cluv-build", warn=True, hide=True, display=False)

scripts/container_job.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,13 @@ module load apptainer 2>/dev/null || true
2222
echo "Running command: apptainer exec $CONTAINER_PATH $@"
2323
srun apptainer exec --nv \
2424
--env PYTHONUNBUFFERED=1 \
25-
--env PYTHONPATH=$project_root \
25+
--env "PYTHONPATH=$project_root" \
2626
--env MPLCONFIGDIR=/tmp/mpl \
2727
--env TORCHDYNAMO_DISABLE=1 \
28-
--bind $project_root:$project_root \
28+
--bind "$project_root":"$project_root" \
2929
--bind /dev/shm:/dev/shm \
30-
${SLURM_TMPDIR:+--bind $SLURM_TMPDIR:$SLURM_TMPDIR} \
31-
${SCRATCH:+--bind $SCRATCH:$SCRATCH} \
32-
$CONTAINER_PATH \
30+
${SLURM_TMPDIR:+--bind "$SLURM_TMPDIR":"$SLURM_TMPDIR"} \
31+
${SCRATCH:+--bind "$SCRATCH":"$SCRATCH"} \
32+
${PROJECT:+--bind /project:/project} \
33+
"$CONTAINER_PATH" \
3334
"$@"

0 commit comments

Comments
 (0)