Skip to content

Commit b5f104c

Browse files
authored
feat(gpu_prover): implement low VRAM mode (#186)
## What ❔ This PR implements low VRAM mode. ## Why ❔ Low VRAM mode will enable to run the prover on GPUs with VRAM size 24GB instead if 32GB at the price of a performance penalty. Depending on the workload an the GPU used the performance penalty can be in the ballpark of 2-3x. The low VRAM mode can be enabled by setting the environment variable `ZKSYNC_AIRBENDER_LOW_VRAM_MODE` to `1` or `true` . ## Is this a breaking change? - [ ] Yes - [x] No ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted.
1 parent b1af1cf commit b5f104c

File tree

1 file changed

+11
-18
lines changed

1 file changed

+11
-18
lines changed

gpu_prover/src/execution/gpu_worker.rs

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@ use crate::prover::tracing_data::{InitsAndTeardownsTransfer, TracingDataTransfer
1717
use crate::witness::trace_unrolled::get_aux_arguments_boundary_values;
1818
use crossbeam_channel::{Receiver, Sender};
1919
use era_cudart::device::get_device_properties;
20-
use log::{debug, error, info, trace};
20+
use log::{debug, error, info, trace, warn};
2121
use prover::definitions::AuxArgumentsBoundaryValues;
2222
use std::ffi::CStr;
23-
use std::mem;
2423
use std::ops::Deref;
2524
use std::process::exit;
25+
use std::{env, mem};
2626
use verifier_common::num_queries_for_security_params;
2727

2828
pub fn get_gpu_worker_func(
@@ -52,19 +52,6 @@ enum JobType<'a> {
5252
Proof(ProofJob<'a>),
5353
}
5454

55-
fn get_trees_cache_mode(_circuit_type: CircuitType, _context: &ProverContext) -> TreesCacheMode {
56-
// match circuit_type {
57-
// CircuitType::Main(main) => match main {
58-
// MainCircuitType::ReducedRiscVLog23Machine if (context.get_mem_size() >> 30) < 28 => {
59-
// TreesCacheMode::CacheNone
60-
// } // less than 28GB
61-
// _ => TreesCacheMode::CacheFull,
62-
// },
63-
// _ => TreesCacheMode::CacheFull,
64-
// }
65-
TreesCacheMode::CachePatrial
66-
}
67-
6855
fn gpu_worker(
6956
device_id: i32,
7057
prover_context_config: ProverContextConfig,
@@ -73,6 +60,13 @@ fn gpu_worker(
7360
results: Sender<Option<GpuWorkResult<A>>>,
7461
) -> CudaResult<()> {
7562
trace!("GPU_WORKER[{device_id}] started");
63+
// Recompute cosets in low VRAM mode to reduce memory requirement.
64+
let recompute_cosets = env::var("ZKSYNC_AIRBENDER_LOW_VRAM_MODE")
65+
.map(|s| s == "1" || s.to_lowercase() == "true")
66+
.unwrap_or_default();
67+
if recompute_cosets {
68+
warn!("GPU_WORKER[{device_id}] running in low VRAM mode, this will have negative performance impact");
69+
}
7670
Precomputations::ensure_initialized();
7771
set_device(device_id)?;
7872
let props = get_device_properties(device_id)?;
@@ -251,7 +245,6 @@ fn gpu_worker(
251245
log_lde_factor as usize,
252246
);
253247
let pow_bits = verifier_common::POW_BITS as u32;
254-
let trees_cache_mode = get_trees_cache_mode(circuit_type, &context);
255248
trace!("BATCH[{batch_id}] GPU_WORKER[{device_id}] producing proof for circuit {circuit_type:?}[{sequence_id}]");
256249
let job = prove(
257250
circuit_type,
@@ -268,8 +261,8 @@ fn gpu_worker(
268261
num_queries,
269262
pow_bits,
270263
None,
271-
false,
272-
trees_cache_mode,
264+
recompute_cosets,
265+
TreesCacheMode::CachePatrial,
273266
&context,
274267
)?;
275268
JobType::Proof(job)

0 commit comments

Comments
 (0)