Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/asm_cache_invalidation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
name: asm cache invalidation

# Guards that the rom-setup asm/ROM cache key tracks every input that determines
# the generated binaries (the linked C libs, the embedded float ELF, and the
# transpiler / emulator-asm sources). Mutating any of them must change the cache
# filename; otherwise stale binaries get silently reused. See
# tools/test_asm_cache_invalidation.sh and rom-setup/build.rs.

on:
pull_request:
branches:
- develop
- main
- "pre-develop-[0-9]+.[0-9]+.[0-9]*"
paths:
Comment thread
RogerTaule marked this conversation as resolved.
- "rom-setup/**"
- "core/src/**"
- "emulator-asm/**"
- "lib-c/**"
- "lib-float/c/lib/ziskfloat.elf"
Comment thread
RogerTaule marked this conversation as resolved.
- "tools/test_asm_cache_invalidation.sh"
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read

jobs:
cache-invalidation:
name: asm cache key tracks its inputs
runs-on: ubuntu-latest
timeout-minutes: 30
env:
CARGO_NET_GIT_FETCH_WITH_CLI: "true"
steps:
- name: Free disk space
uses: jlumbroso/free-disk-space@v1.3.1
with:
tool-cache: true
android: true
dotnet: true
haskell: true
large-packages: true
docker-images: true
swap-storage: false

- name: Checkout sources
uses: actions/checkout@v4

- name: Install dependencies
run: |
cd "$GITHUB_WORKSPACE/tools/test-env"
sudo ./install_deps.sh

- name: Run cache-invalidation test
run: ./tools/test_asm_cache_invalidation.sh --ci
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions prover-backend/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ pub fn get_rom_bin_path<F: PrimeField64>(
pub fn get_asm_paths(elf: &GuestProgram, with_hints: bool) -> Result<(String, String)> {
let name = elf.name();
let hash = get_elf_data_hash(elf.elf());
let prefix = if name != hash { format!("{name}-{hash}") } else { hash };
let base = if with_hints { format!("{prefix}-hints") } else { prefix };

let base = rom_setup::compute_asm_basename(name, &hash, with_hints);
Ok((format!("{base}-mt.bin"), format!("{base}-rh.bin")))
}

Expand Down
5 changes: 5 additions & 0 deletions rom-setup/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ license = { workspace = true }
keywords = { workspace = true }
repository = { workspace = true }
categories = { workspace = true }
build = "build.rs"

[build-dependencies]
blake3 = { workspace = true }
lib-float = { workspace = true }
Comment thread
RogerTaule marked this conversation as resolved.

[dependencies]
sm-rom = { workspace = true }
Expand Down
101 changes: 101 additions & 0 deletions rom-setup/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
//! Bakes two fingerprints of the *source* inputs behind the on-disk caches, so
//! a change forces regeneration (the caches are keyed by filename, and none of
//! these inputs has a runtime-hashable compiled artifact).
//!
//! `ZISK_ROM_INPUTS_HASH` covers the transpiler (`zisk-core`) plus the float ELF
//! `elf2rom` embeds, and keys the ROM/verkey cache (`utils.rs`).
//! `ZISK_ASM_INPUTS_HASH` covers the ROM inputs plus the asm build:
//! `emulator-asm/src` and the *source* of the linked libs (`ziskclib/src`,
//! `lib-c/c/src`). It keys the asm-binary cache (`asm_setup.rs`). Hashing lib
//! source rather than the compiled archives keeps the key a build-time constant,
//! so the path a consumer computes always matches what the producer generates.
//!
//! The ROM inputs are a subset of the asm inputs; keeping them separate avoids
//! regenerating the expensive ROM/verkey on an asm-only change.

use std::path::{Path, PathBuf};

fn main() {
let manifest = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
// rom-setup lives at <workspace>/rom-setup.
let ws = manifest.parent().expect("rom-setup has a parent dir");

let core_src = ws.join("core/src");
Comment thread
RogerTaule marked this conversation as resolved.
let emu_src = ws.join("emulator-asm/src");

// We `rerun-if-changed` each existing file below, not the dirs (cargo's
// directory watch doesn't reliably fire on file *adds*). That's sufficient:
// a new source file only affects the output once it's referenced — a `mod`
// in `lib.rs`, an entry in `emulator-asm/Makefile`, or an `.asm`/`.inc`
// include — and editing that referencing file (which IS watched) triggers
// the rerun that picks the new file up.

// Deny-list (fail-safe): `zisk-core` files that don't affect the transpiled
// ROM — the Rust-emulator runtime and CLI binaries. Anything else is hashed
// by default, so a new transpiler module is covered automatically.
let rom_excluded =
[ws.join("core/src/bin"), core_src.join("inst_context.rs"), core_src.join("mem.rs")];
let mut rom_files = collect_files(&core_src, &rom_excluded);
rom_files.push(ws.join("lib-float/c/lib/ziskfloat.elf")); // embedded into the ROM
let rom_hash = hash_files(blake3::Hasher::new(), ws, rom_files);

// The asm binaries additionally link two libs; hashing their *source* (not
// the compiled archives) keeps the key a pure build-time constant, so the
// path a consumer computes always matches what the producer generates. The
// libs' build *config* (ziskclib's manifest, lib-c's Makefile) is folded in
// too, since it changes the produced libs without touching their src/.
let mut asm_files = collect_files(&emu_src, &[]);
asm_files.push(ws.join("emulator-asm/Makefile"));
asm_files.extend(collect_files(&ws.join("ziskclib/src"), &[])); // -> libziskclib.a
asm_files.push(ws.join("ziskclib/Cargo.toml"));
asm_files.extend(collect_files(&ws.join("lib-c/c/src"), &[])); // -> libziskc.a
Comment thread
RogerTaule marked this conversation as resolved.
asm_files.push(ws.join("lib-c/c/Makefile"));
let mut seeded = blake3::Hasher::new();
seeded.update(rom_hash.as_bytes());
seeded.update(&[0xffu8]);
let asm_hash = hash_files(seeded, ws, asm_files);

println!("cargo:rustc-env=ZISK_ROM_INPUTS_HASH={rom_hash}");
println!("cargo:rustc-env=ZISK_ASM_INPUTS_HASH={asm_hash}");
}

/// Fold a list of files (workspace-relative path + content) into `hasher` and
/// return a 12-hex digest, emitting `rerun-if-changed` for each.
fn hash_files(mut hasher: blake3::Hasher, ws: &Path, mut files: Vec<PathBuf>) -> String {
files.sort(); // deterministic, independent of readdir order
for f in &files {
println!("cargo:rerun-if-changed={}", f.display());
let rel = f.strip_prefix(ws).unwrap_or(f);
hasher.update(rel.to_string_lossy().as_bytes());
hasher.update(&[0u8]);
let bytes = std::fs::read(f)
.unwrap_or_else(|e| panic!("rom-setup build.rs: cannot read {}: {e}", f.display()));
hasher.update(&(bytes.len() as u64).to_le_bytes());
hasher.update(&bytes);
hasher.update(&[0xffu8]);
}
hasher.finalize().to_hex().as_str()[..12].to_string()
}

/// Recursively collect files under `dir` (no-op if absent), skipping anything
/// matched by `excluded` (an exact path or an ancestor dir).
fn collect_files(dir: &Path, excluded: &[PathBuf]) -> Vec<PathBuf> {
let is_excluded = |p: &Path| excluded.iter().any(|e| p == e || p.starts_with(e));
let mut out = Vec::new();
let mut stack = vec![dir.to_path_buf()];
while let Some(d) = stack.pop() {
let Ok(entries) = std::fs::read_dir(&d) else { continue };
for entry in entries.flatten() {
let path = entry.path();
if is_excluded(&path) {
continue;
}
if path.is_dir() {
stack.push(path);
} else {
out.push(path);
}
}
}
out
}
18 changes: 18 additions & 0 deletions rom-setup/examples/cache_probe.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
//! Throwaway probe: prints the asm-binary cache base name for a fixed guest ELF
//! identity, so a wrapper script can mutate an input (lib / float / transpiler)
//! and observe whether the cache filename changes.
//!
//! `get_assembly_file_paths_from_id` is `pub` on both the base branch and the
//! fix branch, so the same probe runs on either — the point is that on base the
//! name is invariant to lib/float/transpiler changes (the bug), and on the fix
//! branch it changes (the fix).

fn main() {
let [mt, _rh, _mo] = rom_setup::get_assembly_file_paths_from_id(
"probe",
"deadbeefcafef00d",
std::path::Path::new("/tmp"),
false,
);
println!("{}", mt.file_name().unwrap().to_string_lossy());
}
65 changes: 59 additions & 6 deletions rom-setup/src/asm_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,35 @@ pub fn ensure_ziskclib(emu_dir: &Path, source: EmulatorAsmSource) -> Result<()>
Ok(())
}

fn asm_file_base(name: &str, hash: &str, hints: bool) -> String {
fn asm_file_base(name: &str, hash: &str, hints: bool, deps_hash: &str) -> String {
let prefix = if name != hash { format!("{name}-{hash}") } else { hash.to_string() };
let prefix = if deps_hash.is_empty() { prefix } else { format!("{prefix}-d{deps_hash}") };
if hints {
format!("{prefix}-hints")
} else {
prefix
}
}

/// Build-time hash of everything (besides the guest ELF) that determines the asm
/// binaries: the transpiler, embedded float ELF, `emulator-asm/src`, and the
/// *source* of the linked libs (`ziskclib`, `lib-c`). Baked by `build.rs`.
const ASM_INPUTS_HASH: &str = env!("ZISK_ASM_INPUTS_HASH");

/// Canonical cache base name for the asm binaries (`<base>-{mt,rh,mo}.bin`).
///
/// The cache is keyed by filename alone, so the name must change whenever the
/// binaries would — i.e. whenever the guest ELF or any input in
/// [`ASM_INPUTS_HASH`] changes. Without this, rebuilding a lib or the transpiler
/// left the old name in place and stale binaries were reused.
///
/// `ASM_INPUTS_HASH` is a compile-time constant, so every producer and consumer
/// derives the identical key — a consumer never computes a path the producer
/// won't generate. Every caller MUST route through here.
pub fn compute_asm_basename(elf_name: &str, elf_hash: &str, hints: bool) -> String {
asm_file_base(elf_name, elf_hash, hints, ASM_INPUTS_HASH)
}
Comment thread
RogerTaule marked this conversation as resolved.
Comment thread
RogerTaule marked this conversation as resolved.

/// Get the paths to all assembly binary files for a given ELF and output path
pub fn get_assembly_file_paths(
elf: &Path,
Expand All @@ -175,7 +195,7 @@ pub fn get_assembly_file_paths_from_id(
output_path: &Path,
hints: bool,
) -> [PathBuf; 3] {
let base = asm_file_base(elf_name, elf_hash, hints);
let base = compute_asm_basename(elf_name, elf_hash, hints);
[
output_path.join(format!("{base}-mt.bin")),
output_path.join(format!("{base}-rh.bin")),
Expand Down Expand Up @@ -226,15 +246,16 @@ pub fn generate_assembly(
anyhow::bail!("ROM file is not a valid ELF file");
}

let base = asm_file_base(elf_name, &elf_hash, hints);
let (emulator_asm_path, asm_source) = resolve_emulator_asm()?;
ensure_ziskclib(&emulator_asm_path, asm_source)?;

// Same canonical key the consumers use, so the files we write are exactly
// the ones they look for.
let base = compute_asm_basename(elf_name, &elf_hash, hints);
let bin_mt_file = output_path.join(format!("{base}-mt.bin"));
let bin_rh_file = output_path.join(format!("{base}-rh.bin"));
let bin_mo_file = output_path.join(format!("{base}-mo.bin"));

let (emulator_asm_path, asm_source) = resolve_emulator_asm()?;
ensure_ziskclib(&emulator_asm_path, asm_source)?;

let emulator_asm_path =
emulator_asm_path.to_str().context("Failed to convert emulator-asm path to string")?;

Expand Down Expand Up @@ -284,3 +305,35 @@ pub fn generate_assembly(

Ok(())
}

#[cfg(test)]
mod tests {
use super::asm_file_base;

// The core invariant of the cache-invalidation fix: the base name embeds
// `deps_hash`, so a change in the linked libs / transpiler / float (which
// all flow into `deps_hash`) yields a *different* filename and forces
// regeneration — while identical inputs reproduce the identical name.
#[test]
fn deps_hash_changes_the_name() {
let a = asm_file_base("prog", "elfhash", false, "aaaaaaaaaaaa");
let b = asm_file_base("prog", "elfhash", false, "bbbbbbbbbbbb");
assert_ne!(a, b, "different deps_hash must produce different cache names");
assert!(a.contains("-daaaaaaaaaaaa"), "name must carry the -d<deps_hash> segment: {a}");
}

#[test]
fn same_inputs_same_name() {
let a = asm_file_base("prog", "elfhash", true, "aaaaaaaaaaaa");
let b = asm_file_base("prog", "elfhash", true, "aaaaaaaaaaaa");
assert_eq!(a, b, "identical inputs must be cache-stable");
}

// Empty deps_hash (toolchain unresolved) reproduces the legacy elf-only name
// exactly, so pre-existing caches in that mode stay valid.
#[test]
fn empty_deps_hash_is_legacy_name() {
assert_eq!(asm_file_base("prog", "elfhash", false, ""), "prog-elfhash");
assert_eq!(asm_file_base("prog", "elfhash", true, ""), "prog-elfhash-hints");
}
}
12 changes: 10 additions & 2 deletions rom-setup/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ pub fn get_elf_data_hash(elf: &[u8]) -> String {
blake3::hash(elf).to_hex().to_string()
}

/// Hash of the ROM inputs (transpiler + embedded float ELF), baked by
/// `build.rs`. Folded into the ROM/verkey filenames so a transpiler or float
/// change invalidates them — otherwise a warm cache returns a verkey stale
/// w.r.t. the current `zisk-core`, i.e. the wrong program vk.
const ROM_INPUTS_HASH: &str = env!("ZISK_ROM_INPUTS_HASH");

pub fn get_elf_bin_file_path_with_hash(
hash: &str,
default_cache_path: &Path,
Expand All @@ -86,8 +92,9 @@ pub fn get_elf_bin_file_path_with_hash(

let gpu_suffix = if gpu { "_gpu" } else { "" };
let rom_cache_file_name = format!(
"{}_{}_{}_{}_{}{}.bin",
"{}-d{}_{}_{}_{}_{}{}.bin",
hash,
ROM_INPUTS_HASH,
pilout_hash,
&n.to_string(),
&ROM_BLOWUP_FACTOR.to_string(),
Expand All @@ -107,8 +114,9 @@ pub fn get_elf_bin_verkey_file_path_with_hash(
let n = RomRomTrace::<Goldilocks>::NUM_ROWS;

let rom_cache_file_name = format!(
"{}_{}_{}_{}_{}.verkey.bin",
"{}-d{}_{}_{}_{}_{}.verkey.bin",
hash,
ROM_INPUTS_HASH,
pilout_hash,
&n.to_string(),
&ROM_BLOWUP_FACTOR.to_string(),
Expand Down
Loading
Loading