Skip to content

Commit 51b90b2

Browse files
- fix: resolve stack overflow in casper debug tests (F1R3FLY-io#305, F1R3FLY-io#306);
* - fix: resolve stack overflow in casper debug tests (F1R3FLY-io#305, F1R3FLY-io#306); * - delete --relese from CI; * - fix i64 overflow in put_and_collect_partial_data_from_the_stor test; * - create unit tests for deep rholang recursion; - increase stack red zone for correct stacker work;
1 parent d6535c5 commit 51b90b2

10 files changed

Lines changed: 184 additions & 6 deletions

File tree

.cargo/config.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22
#
33
# This file contains build configuration and compiler flags for the entire workspace.
44

5+
[env]
6+
# Set minimum thread stack size to 8MB for test threads.
7+
# The Rholang interpreter uses deep async recursion (eval → produce/consume → dispatch → eval)
8+
# via Box::pin patterns. In debug builds, each recursion level consumes significantly more
9+
# stack space (~1-2KB) than in release builds (~100-200 bytes) due to lack of inlining and
10+
# unoptimized async state machines. The default 2MB stack overflows during normal test execution.
11+
# See: https://github.com/F1R3FLY-io/f1r3node/issues/305
12+
RUST_MIN_STACK = "8388608"
13+
514
[build]
615
# Enable native CPU features for gxhash (requires AES and SSE2 intrinsics)
716
rustflags = ["-C", "target-cpu=native"]

.github/workflows/build-test-and-deploy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ jobs:
156156
run: |
157157
mkdir -p /tmp/test-logs
158158
pushd ${{ matrix.tests }}
159-
cargo test --release 2>&1 | tee /tmp/test-logs/${{ matrix.tests }}-test-output.txt
159+
cargo test 2>&1 | tee /tmp/test-logs/${{ matrix.tests }}-test-output.txt
160160
TEST_EXIT_CODE=${PIPESTATUS[0]}
161161
popd
162162
exit $TEST_EXIT_CODE

Cargo.lock

Lines changed: 42 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ imbl = "7"
4343
regex = "1.12"
4444
async-trait = "0.1"
4545
metrics = "0.24"
46+
stacker = "0.1"
4647

4748
[profile.dev]
4849
debug = true
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
use crate::genesis::contracts::test_util::TestUtil;
2+
use crate::util::rholang::resources::{generate_scope_id, mk_test_rnode_store_manager_shared};
3+
use casper::rust::genesis::genesis::Genesis;
4+
use crypto::rust::hash::blake2b512_random::Blake2b512Random;
5+
use models::rhoapi::{BindPattern, ListParWithRandom};
6+
use rholang::rust::build::compile_rholang_source::CompiledRholangSource;
7+
use rholang::rust::interpreter::matcher::r#match::Matcher;
8+
use rholang::rust::interpreter::rho_runtime::create_runtime_from_kv_store;
9+
use rspace_plus_plus::rspace::r#match::Match;
10+
use std::collections::HashMap;
11+
use std::sync::Arc;
12+
use std::time::Duration;
13+
14+
async fn eval_rholang_code(code: &str, timeout: Duration) -> Result<(), String> {
15+
let scope_id = generate_scope_id();
16+
let mut kvs_manager = mk_test_rnode_store_manager_shared(scope_id);
17+
let r_store = kvs_manager
18+
.r_space_stores()
19+
.await
20+
.map_err(|e| format!("Failed to create RSpaceStore: {}", e))?;
21+
22+
let matcher =
23+
Arc::new(Box::new(Matcher::default()) as Box<dyn Match<BindPattern, ListParWithRandom>>);
24+
25+
let runtime = create_runtime_from_kv_store(
26+
r_store,
27+
Genesis::non_negative_mergeable_tag_name(),
28+
true,
29+
&mut vec![],
30+
matcher,
31+
rholang::rust::interpreter::external_services::ExternalServices::noop(),
32+
)
33+
.await;
34+
35+
let rand = Blake2b512Random::create_from_length(128);
36+
37+
match tokio::time::timeout(
38+
timeout,
39+
TestUtil::eval(code, &runtime, HashMap::new(), rand),
40+
)
41+
.await
42+
{
43+
Ok(Ok(())) => Ok(()),
44+
Ok(Err(e)) => Err(format!("Interpreter error: {:?}", e)),
45+
Err(_) => Err(format!("Timeout of {:?} expired", timeout)),
46+
}
47+
}
48+
49+
/// Regression test for https://github.com/F1R3FLY-io/f1r3node/issues/305
50+
///
51+
/// shortslow.rho: direct recursive contract that calls itself 32768 times.
52+
/// Without StackGrowingFuture, this causes stack overflow in debug builds.
53+
#[tokio::test]
54+
async fn deep_recursion_shortslow_should_not_stackoverflow() {
55+
let code = CompiledRholangSource::load_source("shortslow.rho")
56+
.expect("Failed to load shortslow.rho");
57+
58+
let result = eval_rholang_code(&code, Duration::from_secs(300)).await;
59+
assert!(result.is_ok(), "shortslow deep recursion failed: {:?}", result.err());
60+
}
61+
62+
/// Regression test for https://github.com/F1R3FLY-io/f1r3node/issues/306
63+
///
64+
/// longslow.rho: sends a 32768-char string to a channel, reads its length,
65+
/// then recurses that many times. This exercises produce/consume + string ops
66+
/// in addition to deep recursion, matching the exact integration test scenario.
67+
#[tokio::test]
68+
async fn deep_recursion_longslow_should_not_stackoverflow() {
69+
let code = CompiledRholangSource::load_source("longslow.rho")
70+
.expect("Failed to load longslow.rho");
71+
72+
let result = eval_rholang_code(&code, Duration::from_secs(300)).await;
73+
assert!(result.is_ok(), "longslow deep recursion failed: {:?}", result.err());
74+
}

casper/tests/genesis/contracts/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub mod stack_spec;
2121
pub mod standard_deploys_spec;
2222
pub mod timeout_result_collector_spec;
2323
pub mod tree_hash_map_spec;
24+
pub mod deep_recursion_spec;
2425

2526
// See casper/src/test/scala/coop/rchain/casper/genesis/contracts/package.scala
2627
pub const GENESIS_TEST_TIMEOUT: Duration = Duration::from_secs(60);

casper/tests/util/in_memory_key_value_store_spec.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,13 @@ mod tests {
268268
// Note: Fixed the bug here - using max() instead of min()
269269
let k_min = *keys.iter().min().unwrap();
270270
let k_max = *keys.iter().max().unwrap(); // Fixed: was keys.min in Scala
271-
let k_avg = k_max - k_min / 2;
271+
// Old formula: k_max - k_min / 2 — when k_min is negative, subtracting
272+
// a negative becomes addition, which overflows i64. In --release Rust
273+
// silently wraps and the test "passes" with an incorrect result.
274+
// In debug mode — panic.
275+
// New formula: computes the average without overflow for any i64 values,
276+
// even at extreme ranges.
277+
let k_avg = k_min / 2 + k_max / 2 + (k_min % 2 + k_max % 2) / 2;
272278

273279
let expected_filtered: HashMap<i64, String> = expected
274280
.iter()

rholang/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ typed-arena = "2.0.2"
5353
tracing = { workspace = true }
5454
tracing-subscriber = { workspace = true }
5555
metrics = { workspace = true }
56+
stacker = { workspace = true }
5657
reqwest = { version = "0.11", features = ["json"] }
5758
serde_json = { workspace = true }
5859

rholang/src/rust/build/compile_rholang_source.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ impl CompiledRholangSource {
4141
format!("src/test/resources/{}", filepath),
4242
format!("../casper/src/main/resources/{}", filepath),
4343
format!("../casper/src/test/resources/{}", filepath),
44+
format!("rholang/examples/{}", filepath),
45+
format!("../rholang/examples/{}", filepath),
4446
];
4547

4648
for path in &possible_paths {

rholang/src/rust/interpreter/reduce.rs

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,10 @@ use prost::Message;
2828
use rspace_plus_plus::rspace::util::unpack_option_with_peek;
2929
use std::collections::{BTreeMap, BTreeSet};
3030
use std::collections::{HashMap, HashSet};
31+
use std::future::Future;
3132
use std::pin::Pin;
3233
use std::sync::{Arc, RwLock};
34+
use std::task::{Context, Poll};
3335

3436
use crate::rust::interpreter::accounting::costs::{
3537
add_cost, bytes_to_hex_cost, diff_cost, hex_to_bytes_cost, interpolate_cost, keys_method_cost,
@@ -57,6 +59,46 @@ use super::unwrap_option_safe;
5759
use super::util::GeneratedMessage;
5860
use models::rust::pathmap_crate_type_mapper::PathMapCrateTypeMapper;
5961

62+
/// Minimum remaining stack space (in bytes) before growing.
63+
/// When the current stack has less than this amount remaining, a new stack segment is allocated.
64+
// 128 KB is too small: a single recursion frame in the Rholang interpreter
65+
// (eval → produce/consume → dispatch → eval) consumes more than 128 KB between
66+
// stacker checks, so the overflow happens before stacker can grow the stack.
67+
const STACK_RED_ZONE: usize = 1024 * 1024; // 1 MB
68+
69+
/// Size of each new stack segment allocated when the red zone is reached.
70+
const STACK_GROW_SIZE: usize = 2 * 1024 * 1024; // 2 MB
71+
72+
/// A Future wrapper that dynamically grows the thread stack during polling.
73+
///
74+
/// The Rholang interpreter uses deep async recursion: eval → produce/consume → dispatch → eval.
75+
/// Each poll of this recursive future chain adds stack frames. In debug builds, unoptimized
76+
/// async state machines consume ~1-2KB per recursion level, causing stack overflow with the
77+
/// default 2MB thread stack.
78+
///
79+
/// `StackGrowingFuture` wraps each recursive entry point (eval, produce, consume, dispatch).
80+
/// On each poll, `stacker::maybe_grow` checks remaining stack space. If below STACK_RED_ZONE,
81+
/// it allocates a new STACK_GROW_SIZE segment and runs the poll there. This allows arbitrarily
82+
/// deep Rholang recursion (e.g., longslow.rho with 32768 iterations) without stack overflow.
83+
///
84+
/// See: https://github.com/F1R3FLY-io/f1r3node/issues/305
85+
/// See: https://github.com/F1R3FLY-io/f1r3node/issues/306
86+
struct StackGrowingFuture<F> {
87+
inner: F,
88+
}
89+
90+
impl<F: Future> Future for StackGrowingFuture<F> {
91+
type Output = F::Output;
92+
93+
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
94+
// SAFETY: Structural pin projection on a single-field struct with no Drop impl.
95+
// `inner` is only accessed through this pinned projection, and StackGrowingFuture
96+
// does not implement Unpin when F doesn't, preserving pin guarantees.
97+
let inner = unsafe { self.map_unchecked_mut(|s| &mut s.inner) };
98+
stacker::maybe_grow(STACK_RED_ZONE, STACK_GROW_SIZE, || inner.poll(cx))
99+
}
100+
}
101+
60102
/**
61103
* Reduce is the interface for evaluating Rholang expressions.
62104
*/
@@ -95,7 +137,7 @@ impl DebruijnInterpreter {
95137
env: &'a Env<Par>,
96138
rand: Blake2b512Random,
97139
) -> Pin<Box<dyn std::future::Future<Output = Result<(), InterpreterError>> + std::marker::Send + 'a>> {
98-
Box::pin(self.eval_inner(par, env, rand))
140+
Box::pin(StackGrowingFuture { inner: self.eval_inner(par, env, rand) })
99141
}
100142

101143
async fn eval_inner(
@@ -212,7 +254,7 @@ impl DebruijnInterpreter {
212254
data: ListParWithRandom,
213255
persistent: bool,
214256
) -> Pin<Box<dyn std::future::Future<Output = Result<DispatchType, InterpreterError>> + std::marker::Send + 'a>> {
215-
Box::pin(self.produce_inner(chan, data, persistent))
257+
Box::pin(StackGrowingFuture { inner: self.produce_inner(chan, data, persistent) })
216258
}
217259

218260
async fn produce_inner(
@@ -283,7 +325,7 @@ impl DebruijnInterpreter {
283325
persistent: bool,
284326
peek: bool,
285327
) -> Pin<Box<dyn std::future::Future<Output = Result<DispatchType, InterpreterError>> + std::marker::Send + 'a>> {
286-
Box::pin(self.consume_inner(binds, body, persistent, peek))
328+
Box::pin(StackGrowingFuture { inner: self.consume_inner(binds, body, persistent, peek) })
287329
}
288330

289331
async fn consume_inner(
@@ -543,7 +585,7 @@ impl DebruijnInterpreter {
543585
is_replay: bool,
544586
previous_output: Vec<Par>,
545587
) -> Pin<Box<dyn std::future::Future<Output = Result<DispatchType, InterpreterError>> + std::marker::Send + 'a>> {
546-
Box::pin(self.dispatch_inner(continuation, data_list, is_replay, previous_output))
588+
Box::pin(StackGrowingFuture { inner: self.dispatch_inner(continuation, data_list, is_replay, previous_output) })
547589
}
548590

549591
async fn dispatch_inner(

0 commit comments

Comments
 (0)