Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions crates/prover/compress_shape_apc.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"shape": {
"heights": {
"BaseAlu": 592576,
"ExtAlu": 6754400,
"MemoryConst": 1334464,
"MemoryVar": 1392288,
"Poseidon2WideDeg3": 170432,
"PrefixSumChecks": 2641920,
"BaseAlu": 592544,
"ExtAlu": 2159104,
"MemoryConst": 495488,
"MemoryVar": 674048,
"Poseidon2WideDeg3": 161184,
"PrefixSumChecks": 877280,
"PublicValues": 16,
"Select": 1087808
},
Expand Down
12 changes: 6 additions & 6 deletions crates/prover/src/shapes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1413,11 +1413,11 @@ mod tests {
}

/// Regenerate `compress_shape_apc.json`, the universal recursion shape used for any APC
/// machine. We pin the machine to RSP / 100 APCs on the largest checked-in block so the
/// committed shape covers every smaller APC configuration. Block selection is essentially
/// free: changing it perturbs the shape by <0.1% across all chips. APC count is the
/// dominant lever (about 20% from APC=50 to APC=100), so 100 is fixed as the upper bound this shape is
/// sized to cover.
/// machine. We pin the machine to RSP / 12 APCs on the largest checked-in block (21740137).
/// 12 is the highest APC count that fits compressed proving in 32 GB VRAM on the current
/// prover; sizing the shape any larger pushes construction + shard work past the GPU limit.
/// Block selection is essentially free: changing it perturbs the shape by <0.1% across all
/// chips.
#[tokio::test]
#[cfg(feature = "apc")]
#[ignore = "should be invoked for apc shape tuning; runs ~3 min once built"]
Expand Down Expand Up @@ -1450,7 +1450,7 @@ mod tests {

let program = Arc::new(Program::from(&elf).expect("parse rsp-client elf"));
let execution_profile = execution_profile_from_program(program, stdin.clone());
let config = sp1_powdr_config(100, 0);
let config = sp1_powdr_config(12, 0);
let pgo_config = PgoConfig::Instruction(execution_profile);
let compiled_program = CompiledProgram::new(&elf, config, pgo_config);
let apcs: Vec<_> = compiled_program
Expand Down
32 changes: 32 additions & 0 deletions crates/prover/src/worker/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,38 @@ impl<C: SP1ProverComponents, A, W> SP1WorkerBuilder<C, A, W> {
worker_client,
}
}

/// Skip the recursion compose & deferred PK build in `SP1RecursionProver::new`.
/// Use only when the caller knows it will only run `--mode core` (e.g. RSP perf
/// benchmarks). Saves ~14 GB of GPU VRAM at startup on APC-configured machines.
/// Any subsequent compressed/shrink/wrap request will fail with "key not found".
#[cfg(feature = "experimental")]
pub fn without_recursion(self) -> SP1WorkerBuilder<C, A, W> {
let SP1WorkerBuilder {
machine,
mut config,
core_air_prover_and_permits,
compress_air_prover_and_permits,
shrink_air_prover_and_permits,
wrap_air_prover_and_permits,
artifact_client,
worker_client,
} = self;

config.prover_config.recursion_prover_config =
config.prover_config.recursion_prover_config.without_recursion();

SP1WorkerBuilder {
machine,
config,
core_air_prover_and_permits,
compress_air_prover_and_permits,
shrink_air_prover_and_permits,
wrap_air_prover_and_permits,
artifact_client,
worker_client,
}
}
}

/// Create a [SP1WorkerBuilder] for a CPU worker with default components.
Expand Down
158 changes: 104 additions & 54 deletions crates/prover/src/worker/prover/recursion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ pub struct SP1RecursionProverConfig {
machine: Machine<SP1Field, RiscvAir<SP1Field>>,
/// The reduce shape
pub reduce_shape: SP1RecursionProofShape,
/// When true, `SP1RecursionProver::new` skips the eager compose & deferred PK build
/// entirely. The resulting prover can only serve `--mode core` requests; any
/// compress/shrink/wrap lookup will fail at the use-site. Should be `false` by
/// default; opt in via [`Self::without_recursion`] when the caller knows it will
/// only run core mode (e.g. RSP perf benchmarks).
skip_recursion_pk_init: bool,
}

impl SP1RecursionProverConfig {
Expand Down Expand Up @@ -113,6 +119,7 @@ impl SP1RecursionProverConfig {
vk_map_file: None,
machine,
reduce_shape,
skip_recursion_pk_init: false,
}
}
#[cfg(feature = "experimental")]
Expand All @@ -121,6 +128,15 @@ impl SP1RecursionProverConfig {
Self { vk_verification: false, ..self }
}

#[cfg(feature = "experimental")]
/// Skip the eager compose & deferred PK build inside `SP1RecursionProver::new`.
/// Use only when the caller knows it will only run `--mode core` — the resulting
/// prover cannot serve compress/shrink/wrap requests. Saves ~14 GB of GPU VRAM
/// at startup on machines configured with autoprecompiles.
pub fn without_recursion(self) -> Self {
Self { skip_recursion_pk_init: true, ..self }
}

#[cfg(feature = "experimental")]
/// Set the path to the recursion vk map.
pub fn with_vk_map_path(self, vk_map_path: String) -> Self {
Expand Down Expand Up @@ -485,62 +501,96 @@ impl<A: ArtifactClient, C: SP1ProverComponents> SP1RecursionProver<A, C> {
recursive_verifier::<SP1GlobalContext, _, InnerConfig>(
compress_verifier.shard_verifier(),
);
for arity in 1..=config.max_compose_arity {
let dummy_input =
dummy_compose_input::<C>(
&reduce_shape,
arity,
recursion_vks_height,
&config.machine,

// Compose & deferred PKs are only needed for compressed/shrink/wrap modes.
// For core-only callers (e.g. RSP perf benchmarks that pre-call
// `SP1RecursionProverConfig::without_recursion`) we skip building them and
// save ~14 GB of GPU VRAM at startup. Any later compose/deferred lookup
// will then return None and the caller will error out with a clear
// "key not found" message.
let (compose_programs, compose_keys, deferred_program, deferred_keys) =
if config.skip_recursion_pk_init {
tracing::warn!(
"SP1RecursionProverConfig::without_recursion was set — skipping \
compose & deferred PK initialization. Only --mode core will work."
);
let mut program = compose_program_from_input(
&recursive_compress_verifier,
config.vk_verification,
&dummy_input,
);
program.shape = Some(reduce_shape.shape.clone());
let program = Arc::new(program);

// Make the reduce keys.
let (tx, rx) = oneshot::channel();
tokio::task::spawn({
let program = program.clone();
let air_prover = compress_prover.clone();
async move {
let permits = ProverSemaphore::new(1);
let (pk, vk) = air_prover.setup(program, permits).await;
tx.send((pk, vk)).ok();
let deferred_program = {
let deferred_input = dummy_deferred_input(
&compress_verifier,
&reduce_shape,
recursion_vks_height,
);
let mut deferred_program = deferred_program_from_input(
&recursive_compress_verifier,
config.vk_verification,
&deferred_input,
);
deferred_program.shape = Some(reduce_shape.shape.clone());
Arc::new(deferred_program)
};
(compose_programs, compose_keys, deferred_program, None)
} else {
for arity in 1..=config.max_compose_arity {
let dummy_input =
dummy_compose_input::<C>(
&reduce_shape,
arity,
recursion_vks_height,
&config.machine,
);
let mut program = compose_program_from_input(
&recursive_compress_verifier,
config.vk_verification,
&dummy_input,
);
program.shape = Some(reduce_shape.shape.clone());
let program = Arc::new(program);

// Make the reduce keys.
let (tx, rx) = oneshot::channel();
tokio::task::spawn({
let program = program.clone();
let air_prover = compress_prover.clone();
async move {
let permits = ProverSemaphore::new(1);
let (pk, vk) = air_prover.setup(program, permits).await;
tx.send((pk, vk)).ok();
}
});
let (pk, vk) = rx.blocking_recv().unwrap();
let pk = unsafe { pk.into_inner() };
compose_keys.insert(arity, (pk, vk));
compose_programs.insert(arity, program);
}
});
let (pk, vk) = rx.blocking_recv().unwrap();
let pk = unsafe { pk.into_inner() };
compose_keys.insert(arity, (pk, vk));
compose_programs.insert(arity, program);
}

// Make the deferred program and keys.
let deferred_input =
dummy_deferred_input(&compress_verifier, &reduce_shape, recursion_vks_height);
let mut deferred_program = deferred_program_from_input(
&recursive_compress_verifier,
config.vk_verification,
&deferred_input,
);
deferred_program.shape = Some(reduce_shape.shape.clone());
let deferred_program = Arc::new(deferred_program);
let (tx, rx) = oneshot::channel();
tokio::task::spawn({
let program = deferred_program.clone();
let air_prover = compress_prover.clone();
async move {
let permits = ProverSemaphore::new(1);
let (pk, vk) = air_prover.setup(program, permits).await;
tx.send((pk, vk)).ok();
}
});
let (pk, vk) = rx.blocking_recv().unwrap();
let pk = unsafe { pk.into_inner() };
let deferred_keys = (pk, vk);
// Make the deferred program and keys.
let deferred_input = dummy_deferred_input(
&compress_verifier,
&reduce_shape,
recursion_vks_height,
);
let mut deferred_program = deferred_program_from_input(
&recursive_compress_verifier,
config.vk_verification,
&deferred_input,
);
deferred_program.shape = Some(reduce_shape.shape.clone());
let deferred_program = Arc::new(deferred_program);
let (tx, rx) = oneshot::channel();
tokio::task::spawn({
let program = deferred_program.clone();
let air_prover = compress_prover.clone();
async move {
let permits = ProverSemaphore::new(1);
let (pk, vk) = air_prover.setup(program, permits).await;
tx.send((pk, vk)).ok();
}
});
let (pk, vk) = rx.blocking_recv().unwrap();
let pk = unsafe { pk.into_inner() };
let deferred_keys = Some((pk, vk));
(compose_programs, compose_keys, deferred_program, deferred_keys)
};

let prover_data = Arc::new(RecursionProverData {
recursion_vks,
Expand All @@ -549,7 +599,7 @@ impl<A: ArtifactClient, C: SP1ProverComponents> SP1RecursionProver<A, C> {
compose_programs,
compose_keys,
deferred_program,
deferred_keys: Some(deferred_keys),
deferred_keys,
});

let compress_verifier = C::compress_verifier(&config.machine);
Expand Down
9 changes: 9 additions & 0 deletions sp1-gpu/crates/perf/src/bin/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,15 @@ async fn main() {
let worker_builder = base_builder.without_vk_verification();
#[cfg(not(feature = "mprotect"))]
let worker_builder = base_builder;
// Core-only runs don't need the recursion compose/deferred PKs, which would
// otherwise eagerly allocate ~14 GB of GPU VRAM at construction time. Opt out
// when the caller has requested `--mode core`.
#[cfg(feature = "experimental")]
let worker_builder = if proof_mode_from_string(&args.mode) == ProofMode::Core {
worker_builder.without_recursion()
} else {
worker_builder
};
let client =
SP1LocalNodeBuilder::from_worker_client_builder(worker_builder).build().await.unwrap();

Expand Down
Loading