Skip to content

Commit 7c37b3d

Browse files
authored
Fix hatch build proto staleness detection (#4078)
Replace mtime-based proto staleness check with SHA-256 content-hash checksums in `lib/iris/hatch_build.py` ## Problem Git does not preserve file modification times. After a `git pull`, both proto source files and their generated outputs get the same mtime, which means: - Real changes to `.proto` files can be missed (no rebuild when one is needed) - Spurious rebuilds can be triggered when mtimes shift The previous 60-second tolerance on mtime comparison was a workaround for zip extraction jitter but did not address the fundamental git mtime problem. ## Fix - Compute a SHA-256 digest over all proto source file contents (sorted by path for determinism) - Store the digest in `build/.proto_checksum` after successful generation - On subsequent builds, compare the current digest against the stored one - Fall back to the old mtime comparison when no checksum file exists yet (first build)
1 parent 0abc41d commit 7c37b3d

1 file changed

Lines changed: 31 additions & 4 deletions

File tree

lib/iris/hatch_build.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
``_ensure_dashboard_dist()`` in the Docker image build pipeline.
1313
"""
1414

15+
import hashlib
1516
import logging
1617
import shutil
1718
import subprocess
@@ -62,13 +63,35 @@ def _has_missing_outputs(root: Path, source_globs: list[str]) -> bool:
6263
return False
6364

6465

66+
_CHECKSUM_FILE = "build/.proto_checksum"
67+
68+
69+
def _source_digest(root: Path, source_globs: list[str]) -> str:
70+
"""SHA-256 digest of all proto source contents, sorted by path for stability."""
71+
h = hashlib.sha256()
72+
paths = sorted(p for pattern in source_globs for p in root.glob(pattern) if p.is_file())
73+
for p in paths:
74+
h.update(p.relative_to(root).as_posix().encode())
75+
h.update(p.read_bytes())
76+
return h.hexdigest()
77+
78+
6579
def _needs_rebuild(root: Path, source_globs: list[str], output_globs: list[str]) -> bool:
66-
"""Return True if any source file is strictly newer than the oldest output file.
80+
"""Return True if proto sources have changed since the last generation.
81+
82+
Uses a content-hash checksum file rather than mtime comparison, because
83+
git does not preserve file timestamps — after a pull both source and
84+
output files get the same mtime, hiding real staleness.
6785
68-
Uses a 60-second tolerance because zip archives (used by task bundles)
69-
can extract files with slightly different timestamps, causing spurious
70-
rebuilds.
86+
Falls back to mtime comparison (with a 60-second tolerance for zip
87+
extraction jitter) when no checksum file exists yet.
7188
"""
89+
checksum_path = root / _CHECKSUM_FILE
90+
current_digest = _source_digest(root, source_globs)
91+
if checksum_path.exists():
92+
return checksum_path.read_text().strip() != current_digest
93+
94+
# No checksum file — fall back to mtime comparison for backwards compat
7295
source_newest = _newest_mtime(root, source_globs)
7396
output_oldest = _oldest_mtime(root, output_globs)
7497
return source_newest > output_oldest + 60.0
@@ -117,4 +140,8 @@ def _maybe_generate_protos(self, root: Path) -> None:
117140
)
118141
if result.returncode != 0:
119142
raise RuntimeError(f"Protobuf generation failed:\n{result.stdout}\n{result.stderr}")
143+
# Write checksum so future builds can detect when sources change
144+
checksum_path = root / _CHECKSUM_FILE
145+
checksum_path.parent.mkdir(parents=True, exist_ok=True)
146+
checksum_path.write_text(_source_digest(root, _PROTO_SOURCE_GLOBS) + "\n")
120147
logger.info("Protobuf generation complete")

0 commit comments

Comments
 (0)