Skip to content

Commit 4ce80c0

Browse files
perf: cpu TraceCommitter should reserve capacity for LDE (#68)
1 parent 92171ba commit 4ce80c0

File tree

8 files changed

+99
-20
lines changed

8 files changed

+99
-20
lines changed

crates/stark-backend/src/engine.rs

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{
1313
},
1414
proof::Proof,
1515
prover::{
16-
cpu::{CpuBackend, CpuDevice, PcsData},
16+
cpu::PcsData,
1717
hal::{DeviceDataTransporter, TraceCommitter},
1818
types::{
1919
AirProofInput, AirProvingContext, ProofInput, ProvingContext, SingleCommitPreimage,
@@ -57,14 +57,7 @@ pub trait StarkEngine<SC: StarkGenericConfig> {
5757

5858
fn prover<'a>(&'a self) -> MultiTraceStarkProver<'a, SC>
5959
where
60-
Self: 'a,
61-
{
62-
MultiTraceStarkProver::new(
63-
CpuBackend::<SC>::default(),
64-
CpuDevice::new(self.config()),
65-
self.new_challenger(),
66-
)
67-
}
60+
Self: 'a;
6861

6962
fn verifier(&self) -> MultiTraceStarkVerifier<SC> {
7063
MultiTraceStarkVerifier::new(self.config())

crates/stark-backend/src/prover/cpu/mod.rs

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,6 @@ pub mod opener;
3535
/// Computation of DEEP quotient polynomial and commitment
3636
pub mod quotient;
3737

38-
/// Proves multiple chips with interactions together.
39-
/// This prover implementation is specialized for Interactive AIRs.
40-
pub struct MultiTraceStarkProver<'c, SC: StarkGenericConfig> {
41-
pub config: &'c SC,
42-
}
43-
4438
/// CPU backend using Plonky3 traits.
4539
#[derive(Derivative)]
4640
#[derivative(Clone(bound = ""), Copy(bound = ""), Default(bound = ""))]
@@ -52,6 +46,9 @@ pub struct CpuBackend<SC> {
5246
#[derivative(Clone(bound = ""), Copy(bound = ""))]
5347
pub struct CpuDevice<'a, SC> {
5448
config: &'a SC,
49+
/// When committing a matrix, the matrix is cloned into newly allocated memory.
50+
/// The size of the newly allocated memory will be `matrix.size() << log_blowup_factor`.
51+
log_blowup_factor: usize,
5552
}
5653

5754
impl<SC: StarkGenericConfig> ProverBackend for CpuBackend<SC> {
@@ -103,6 +100,7 @@ impl<SC: StarkGenericConfig> ProverDevice<CpuBackend<SC>> for CpuDevice<'_, SC>
103100

104101
impl<SC: StarkGenericConfig> TraceCommitter<CpuBackend<SC>> for CpuDevice<'_, SC> {
105102
fn commit(&self, traces: &[Arc<RowMajorMatrix<Val<SC>>>]) -> (Com<SC>, PcsData<SC>) {
103+
let log_blowup_factor = self.log_blowup_factor;
106104
let pcs = self.pcs();
107105
let (log_trace_heights, traces_with_domains): (Vec<_>, Vec<_>) = traces
108106
.iter()
@@ -111,7 +109,33 @@ impl<SC: StarkGenericConfig> TraceCommitter<CpuBackend<SC>> for CpuDevice<'_, SC
111109
let log_height: u8 = log2_strict_usize(height).try_into().unwrap();
112110
// Recomputing the domain is lightweight
113111
let domain = pcs.natural_domain_for_degree(height);
114-
(log_height, (domain, matrix.as_ref().clone()))
112+
// pcs.commit takes the trace matrix and in the case of FRI, does in-place cosetDFT
113+
// which requires resizing to a larger buffer size. Since we are cloning anyways,
114+
// we should just allocate the larger size to avoid memory-reallocation
115+
// ref: https://github.com/Plonky3/Plonky3/blob/8c8bbb4c17bd2b7ef2404338ab8f9036d5f08337/dft/src/traits.rs#L116
116+
let trace_slice = &matrix.as_ref().values;
117+
let new_buffer_size = trace_slice
118+
.len()
119+
.checked_shl(log_blowup_factor.try_into().unwrap())
120+
.unwrap();
121+
let mut new_buffer = Vec::with_capacity(new_buffer_size);
122+
// SAFETY:
123+
// - `trace_slice` is allocated for `trace_slice.len() * size_of::<F>` bytes, obviously
124+
// - we just allocated `new_buffer` for at least `trace_slice.len() * size_of::<F>` bytes above (more if there's blowup)
125+
// - both are slices of &[F] so alignment is guaranteed
126+
// - `new_buffer` is newly allocated so non-overlapping with `trace_slice`
127+
unsafe {
128+
std::ptr::copy_nonoverlapping(
129+
trace_slice.as_ptr(),
130+
new_buffer.as_mut_ptr(),
131+
trace_slice.len(),
132+
);
133+
new_buffer.set_len(trace_slice.len());
134+
}
135+
(
136+
log_height,
137+
(domain, RowMajorMatrix::new(new_buffer, matrix.width)),
138+
)
115139
})
116140
.unzip();
117141
let (commit, data) = pcs.commit(traces_with_domains);

crates/stark-sdk/src/config/baby_bear_bytehash.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ use openvm_stark_backend::{
44
p3_challenger::{HashChallenger, SerializingChallenger32},
55
p3_commit::ExtensionMmcs,
66
p3_field::extension::BinomialExtensionField,
7+
prover::{
8+
cpu::{CpuBackend, CpuDevice},
9+
MultiTraceStarkProver,
10+
},
711
};
812
use p3_baby_bear::BabyBear;
913
use p3_dft::Radix2DitParallel;
@@ -56,6 +60,17 @@ where
5660
&self.config
5761
}
5862

63+
fn prover<'a>(&'a self) -> MultiTraceStarkProver<'a, BabyBearByteHashConfig<H>>
64+
where
65+
Self: 'a,
66+
{
67+
MultiTraceStarkProver::new(
68+
CpuBackend::default(),
69+
CpuDevice::new(self.config(), self.fri_params.log_blowup),
70+
self.new_challenger(),
71+
)
72+
}
73+
5974
fn max_constraint_degree(&self) -> Option<usize> {
6075
Some(self.max_constraint_degree)
6176
}

crates/stark-sdk/src/config/baby_bear_poseidon2.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ use openvm_stark_backend::{
66
p3_challenger::DuplexChallenger,
77
p3_commit::ExtensionMmcs,
88
p3_field::{extension::BinomialExtensionField, Field, FieldAlgebra},
9+
prover::{
10+
cpu::{CpuBackend, CpuDevice},
11+
MultiTraceStarkProver,
12+
},
913
};
1014
use p3_baby_bear::{BabyBear, Poseidon2BabyBear};
1115
use p3_dft::Radix2DitParallel;
@@ -81,6 +85,17 @@ where
8185
&self.config
8286
}
8387

88+
fn prover<'a>(&'a self) -> MultiTraceStarkProver<'a, BabyBearPermutationConfig<P>>
89+
where
90+
Self: 'a,
91+
{
92+
MultiTraceStarkProver::new(
93+
CpuBackend::default(),
94+
CpuDevice::new(self.config(), self.fri_params.log_blowup),
95+
self.new_challenger(),
96+
)
97+
}
98+
8499
fn max_constraint_degree(&self) -> Option<usize> {
85100
Some(self.max_constraint_degree)
86101
}

crates/stark-sdk/src/config/baby_bear_poseidon2_root.rs

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
use ff::PrimeField;
22
use openvm_stark_backend::{
3-
config::StarkConfig, interaction::fri_log_up::FriLogUpPhase,
4-
p3_challenger::MultiField32Challenger, p3_commit::ExtensionMmcs,
3+
config::StarkConfig,
4+
interaction::fri_log_up::FriLogUpPhase,
5+
p3_challenger::MultiField32Challenger,
6+
p3_commit::ExtensionMmcs,
57
p3_field::extension::BinomialExtensionField,
8+
prover::{
9+
cpu::{CpuBackend, CpuDevice},
10+
MultiTraceStarkProver,
11+
},
612
};
713
use p3_baby_bear::BabyBear;
814
use p3_bn254_fr::{Bn254Fr, FFBn254Fr, Poseidon2Bn254};
@@ -69,6 +75,17 @@ where
6975
&self.config
7076
}
7177

78+
fn prover<'a>(&'a self) -> MultiTraceStarkProver<'a, BabyBearPermutationRootConfig<P>>
79+
where
80+
Self: 'a,
81+
{
82+
MultiTraceStarkProver::new(
83+
CpuBackend::default(),
84+
CpuDevice::new(self.config(), self.fri_params.log_blowup),
85+
self.new_challenger(),
86+
)
87+
}
88+
7289
fn max_constraint_degree(&self) -> Option<usize> {
7390
Some(self.max_constraint_degree)
7491
}

crates/stark-sdk/src/config/goldilocks_poseidon.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ use openvm_stark_backend::{
66
p3_challenger::DuplexChallenger,
77
p3_commit::ExtensionMmcs,
88
p3_field::{extension::BinomialExtensionField, Field},
9+
prover::{
10+
cpu::{CpuBackend, CpuDevice},
11+
MultiTraceStarkProver,
12+
},
913
};
1014
use p3_dft::Radix2DitParallel;
1115
use p3_fri::{FriConfig, TwoAdicFriPcs};
@@ -73,6 +77,17 @@ where
7377
&self.config
7478
}
7579

80+
fn prover<'a>(&'a self) -> MultiTraceStarkProver<'a, GoldilocksPermutationConfig<P>>
81+
where
82+
Self: 'a,
83+
{
84+
MultiTraceStarkProver::new(
85+
CpuBackend::default(),
86+
CpuDevice::new(self.config(), self.security_params.fri_params.log_blowup),
87+
self.new_challenger(),
88+
)
89+
}
90+
7691
fn max_constraint_degree(&self) -> Option<usize> {
7792
Some(self.max_constraint_degree)
7893
}

crates/stark-sdk/src/dummy_airs/interaction/dummy_interaction_air.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ where
162162
) -> Self {
163163
let air = DummyInteractionAir::new(field_width, is_send, bus_index).partition();
164164
Self {
165-
device: Some(CpuDevice::new(config)),
165+
device: Some(CpuDevice::new(config, 0)),
166166
data: None,
167167
air,
168168
}

crates/stark-sdk/src/dummy_airs/interaction/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub fn verify_interactions(
5353
.collect();
5454

5555
let challenger = config::baby_bear_poseidon2::Challenger::new(perm.clone());
56-
let mut prover = MultiTraceStarkProver::new(backend, CpuDevice::new(&config), challenger);
56+
let mut prover = MultiTraceStarkProver::new(backend, CpuDevice::new(&config, 1), challenger);
5757
let proof = prover.prove(&pk, ProvingContext::new(per_air));
5858

5959
// Verify the proof:

0 commit comments

Comments
 (0)