IteraLabs
diff --git a/‎atelier-quant/Cargo.toml‎
Lines changed: 26 additions & 4 deletions b/‎atelier-quant/Cargo.toml‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎atelier-quant/examples/eg_hawkes_ob_arrivals.rs‎
Lines changed: 340 additions & 0 deletions b/‎atelier-quant/examples/eg_hawkes_ob_arrivals.rs‎
Lines changed: 340 additions & 0 deletions
@@ -25,15 +25,32 @@ path = "src/lib.rs"
 
 [[example]]
 name = "eg_inter_ats"
-path = "examples/eg_intera_ats.rs"
+path = "examples/eg_inter_ats.rs"
+
+[[example]]
+name = "eg_hawkes_ob_arrivals"
+path = "examples/eg_hawkes_ob_arrivals.rs"
+
+[[test]]
+name = "test_extracts"
+path = "tests/arrivals/test_extracts.rs"
+
+[[test]]
+name = "test_inter"
+path = "tests/arrivals/test_inter.rs"
 
 [[test]]
-name = "test_inter-ats"
-path = "tests/base/test_inter_ats.rs"
+name = "test_inter_stats"
+path = "tests/arrivals/test_inter_stats.rs"
+
+# --- Model: Hawkes --- #
+
+[[test]]
+name = "test_model_estimation_hawkes"
+path = "tests/hawkes/test_estimation.rs"
 
 [dependencies]
 approx = { version = "0.5.1" }
-atelier_data = { version = "0.0.13", features = ["parquet"] }
 chrono = { version = "0.4", features = ["serde"] }
 futures = { version = "0.3" }
 lazy_static = { version = "1.4" }
@@ -47,6 +64,11 @@ thiserror = { version = "1.0.64" }
 tokio = { version = "1", features = ["full"] }
 toml = { version = "0.8" }
 
+[dependencies.atelier_data]
+git = "https://github.com/IteraLabs/atelier-rs/"
+branch = "feature/9-inter-arrival-ts-modeling"
+features = ["parquet"]
+
 [lints.rust]
 trivial_casts = "warn"
 trivial_numeric_casts = "warn"
 
@@ -0,0 +1,340 @@
+//! # End-to-end Hawkes process example with real orderbook data
+//!
+//! Pipeline:
+//!
+//! 1. Load a Bybit SOLUSDT orderbook parquet file.
+//! 2. Extract timestamps → treat each snapshot as an "arrival".
+//! 3. Convert to milliseconds, compute interarrival times.
+//! 4. Hold out the last 10 arrivals as a test set.
+//! 5. Print descriptive statistics and gap diagnostics on the training set.
+//! 6. Fit a univariate Hawkes process (MLE) on the training interarrivals.
+//! 7. Use the fitted model to forecast the next 10 arrival times.
+//! 8. Compare forecast vs. actual, print the diff.
+//!
+//! ```text
+//! cargo run -p atelier_quant --example eg_hawkes_ob_arrivals
+//! ```
+
+use std::path::Path;
+
+use atelier_data::orderbooks::io::ob_parquet::load_parquet_to_ob;
+use atelier_data::temporal::{self, TimeResolution};
+
+use atelier_quant::arrivals::extract::extract_orderbook_timestamps;
+use atelier_quant::arrivals::inter::{compute_interarrivals, descriptive_stats};
+use atelier_quant::hawkes::estimation::{
+    compensator, estimate_hawkes_mle, time_rescaling_residuals, HawkesEstimationConfig,
+};
+use atelier_quant::hawkes::HawkesProcess;
+
+// ── Helpers ─────────────────────────────────────────────────────────
+
+/// Pretty-print a horizontal separator.
+fn separator(label: &str) {
+    println!("\n{:═^72}", format!(" {} ", label));
+}
+
+/// Print a small table row.
+fn row(label: &str, value: impl std::fmt::Display) {
+    println!("  {:<30} {}", label, value);
+}
+
+// ── Main ────────────────────────────────────────────────────────────
+
+fn main() {
+    // ── 1. Load orderbook parquet ───────────────────────────────────
+    separator("1. Load Parquet");
+
+    let parquet_path = Path::new(
+        "datasets/collected/bybit/SOLUSDT/orderbooks/ob_bybit_20260217_003728.819.parquet",
+    );
+
+    println!("  File: {}", parquet_path.display());
+
+    let orderbooks = load_parquet_to_ob(parquet_path).unwrap_or_else(|e| {
+        eprintln!("  ERROR: Failed to load parquet: {}", e);
+        std::process::exit(1);
+    });
+
+    println!("  Loaded {} orderbook snapshots", orderbooks.len());
+
+    if orderbooks.len() < 12 {
+        eprintln!("  ERROR: Need at least 12 snapshots (10 test + 2 train), got {}", orderbooks.len());
+        std::process::exit(1);
+    }
+
+    // ── 2. Extract timestamps (nanoseconds) ─────────────────────────
+    separator("2. Extract Timestamps");
+
+    let timestamps_ns = extract_orderbook_timestamps(&orderbooks);
+    println!("  Total arrivals: {}", timestamps_ns.len());
+    println!(
+        "  First ts (ns):  {}",
+        timestamps_ns.first().unwrap_or(&0)
+    );
+    println!(
+        "  Last ts  (ns):  {}",
+        timestamps_ns.last().unwrap_or(&0)
+    );
+
+    // Convert to milliseconds for display
+    let first_ms = temporal::from_nanos(*timestamps_ns.first().unwrap(), TimeResolution::Milliseconds);
+    let last_ms = temporal::from_nanos(*timestamps_ns.last().unwrap(), TimeResolution::Milliseconds);
+    let span_s = (last_ms - first_ms) / 1000.0;
+    println!("  Observation window: {:.3} seconds ({:.1} ms)", span_s, last_ms - first_ms);
+
+    // ── 3. Validate and detect gaps ─────────────────────────────────
+    separator("3. Validation & Gap Detection");
+
+    // Validate monotonicity
+    let mut is_monotonic = true;
+    for i in 1..timestamps_ns.len() {
+        if timestamps_ns[i] <= timestamps_ns[i - 1] {
+            eprintln!(
+                "  ✗ Monotonicity violation at index {}: {} <= {}",
+                i, timestamps_ns[i], timestamps_ns[i - 1]
+            );
+            is_monotonic = false;
+            break;
+        }
+    }
+    if is_monotonic {
+        println!("Timestamps are strictly monotonic");
+    } else {
+        std::process::exit(1);
+    }
+
+    // Detect gaps > 5 seconds (5_000_000_000 ns) — likely feed disconnects
+    let gap_threshold_ns = 5_000_000_000_u64;
+    let mut n_gaps = 0_usize;
+    for i in 1..timestamps_ns.len() {
+        let gap = timestamps_ns[i] - timestamps_ns[i - 1];
+        if gap > gap_threshold_ns {
+            if n_gaps == 0 {
+                println!("  ⚠ Gaps exceeding {:.1}s:", gap_threshold_ns as f64 / 1e9);
+            }
+            println!("    index {}: gap = {:.3} ms", i - 1, gap as f64 / 1e6);
+            n_gaps += 1;
+        }
+    }
+    if n_gaps == 0 {
+        println!("  ✓ No gaps exceeding {:.1}s detected", gap_threshold_ns as f64 / 1e9);
+    } else {
+        println!("  Total large gaps: {}", n_gaps);
+    }
+
+    // ── 4. Train/test split ─────────────────────────────────────────
+    separator("4. Train / Test Split");
+
+    let n_test = 10;
+    let n_total = timestamps_ns.len();
+    let n_train = n_total - n_test;
+
+    let train_ts = &timestamps_ns[..n_train];
+    let test_ts = &timestamps_ns[n_train..];
+
+    println!("  Training set:  {} arrivals", train_ts.len());
+    println!("  Test set:      {} arrivals", test_ts.len());
+
+    // ── 5. Compute interarrivals + stats (training set) ─────────────
+    separator("5. Interarrival Statistics (Training)");
+
+    let ia_result =
+        compute_interarrivals(train_ts, TimeResolution::Milliseconds).unwrap_or_else(|e| {
+            eprintln!("  ERROR: {}", e);
+            std::process::exit(1);
+        });
+
+    let stats = descriptive_stats(&ia_result.deltas_f64).unwrap();
+
+    row("Count (gaps)", format!("{}", stats.count));
+    row("Mean (ms)", format!("{:.6}", stats.mean));
+    row("Std dev (ms)", format!("{:.6}", stats.std_dev));
+    row("Variance (ms²)", format!("{:.6}", stats.variance));
+    row("Min (ms)", format!("{:.6}", stats.min));
+    row("Max (ms)", format!("{:.6}", stats.max));
+    row("Skewness", format!("{:.4}", stats.skewness));
+    row("Excess kurtosis", format!("{:.4}", stats.kurtosis));
+    row("CV (σ/μ)", format!("{:.4}", stats.covariance));
+
+    if stats.covariance > 1.0 {
+        println!("\n  → CV > 1 indicates clustering (super-Poisson), consistent with Hawkes excitation.");
+    } else if (stats.covariance - 1.0).abs() < 0.15 {
+        println!("\n  → CV ≈ 1 suggests near-Poisson (memoryless) arrivals.");
+    } else {
+        println!("\n  → CV < 1 indicates regularity (sub-Poisson), less common for LOB data.");
+    }
+
+    // ── 6. Fit Hawkes MLE ───────────────────────────────────────────
+    separator("6. Hawkes MLE Estimation");
+
+    // Build event times in milliseconds relative to the first arrival.
+    // This keeps numbers in a reasonable range for the optimizer.
+    let t0_ns = train_ts[0];
+    let train_events_ms: Vec<f64> = train_ts
+        .iter()
+        .map(|&t| temporal::from_nanos(t - t0_ns, TimeResolution::Milliseconds))
+        .collect();
+
+    let config = HawkesEstimationConfig {
+        max_iter: 10_000,
+        tol: 1e-4,
+        learning_rate: 1e-3,
+        initial_params: None,
+    };
+
+    let mle = estimate_hawkes_mle(&train_events_ms, &config).unwrap_or_else(|e| {
+        eprintln!("  ERROR: MLE failed: {}", e);
+        std::process::exit(1);
+    });
+
+    row("μ̂  (events/ms)", format!("{:.8}", mle.mu));
+    row("α̂  (excitation)", format!("{:.8}", mle.alpha));
+    row("β̂  (decay 1/ms)", format!("{:.8}", mle.beta));
+    row("Branching ratio α̂/β̂", format!("{:.6}", mle.branching_ratio));
+    row("Log-likelihood", format!("{:.4}", mle.log_likelihood));
+    row("AIC", format!("{:.4}", mle.aic));
+    row("BIC", format!("{:.4}", mle.bic));
+    row("Iterations", format!("{}", mle.iterations));
+    row("Converged", format!("{}", mle.converged));
+
+    let theoretical_rate = mle.mu / (1.0 - mle.branching_ratio);
+    row("Stationary rate (ev/ms)", format!("{:.8}", theoretical_rate));
+    row(
+        "Stationary mean gap (ms)",
+        format!("{:.6}", 1.0 / theoretical_rate),
+    );
+
+    // ── 7. Goodness-of-fit: time-rescaling residuals ────────────────
+    separator("7. Goodness-of-Fit (Time-Rescaling)");
+
+    let residuals =
+        time_rescaling_residuals(mle.mu, mle.alpha, mle.beta, &train_events_ms);
+
+    let res_stats = descriptive_stats(&residuals);
+    if let Some(rs) = &res_stats {
+        row("Residuals count", format!("{}", rs.count));
+        row("Residuals mean", format!("{:.6}", rs.mean));
+        row("Residuals std dev", format!("{:.6}", rs.std_dev));
+        println!(
+            "\n  Under correct specification, residuals ~ Exp(1): mean ≈ 1.0, std ≈ 1.0"
+        );
+        if (rs.mean - 1.0).abs() < 0.3 {
+            println!("  → Mean {:.3} is within 30% of 1.0: reasonable fit.", rs.mean);
+        } else {
+            println!("  → Mean {:.3} deviates from 1.0: model may be mis-specified.", rs.mean);
+        }
+    }
+
+    // ── 8. Forecast next 10 arrivals ────────────────────────────────
+    separator("8. Forecast Next 10 Arrivals");
+
+    // Strategy: use the compensator to convert from Hawkes time to
+    // calendar time. We simulate from the fitted model starting at the
+    // last training event.
+    let last_train_ms = *train_events_ms.last().unwrap();
+
+    // Build a HawkesProcess with the fitted parameters and simulate
+    let hp = HawkesProcess::new(mle.mu, mle.alpha, mle.beta).unwrap_or_else(|e| {
+        eprintln!("  ERROR: Could not create HawkesProcess: {:?}", e);
+        std::process::exit(1);
+    });
+
+    // We simulate events continuing from the last training time.
+    // The intensity at the boundary depends on the full training history,
+    // so we pass the last training time as the start.
+    let forecasted_events_ms = hp.generate_values(last_train_ms, n_test);
+
+    // Convert forecasted absolute times (ms relative to t0) back to
+    // nanosecond timestamps: ms × 1_000_000 = ns, then add origin.
+    let _forecasted_ts_ns: Vec<u64> = forecasted_events_ms
+        .iter()
+        .map(|&t_ms| t0_ns + (t_ms * 1_000_000.0) as u64)
+        .collect();
+
+    // ── 9. Compare forecast vs actual ───────────────────────────────
+    separator("9. Forecast vs Actual Comparison");
+
+    // Compute interarrival gaps for both series
+    // For actual: gaps between last train event and each subsequent test event
+    let actual_arrivals_ms: Vec<f64> = test_ts
+        .iter()
+        .map(|&t| temporal::from_nanos(t - t0_ns, TimeResolution::Milliseconds))
+        .collect();
+
+    let forecast_arrivals_ms: Vec<f64> = forecasted_events_ms.clone();
+
+    // Compute cumulative interarrivals from the last training point
+    let actual_gaps: Vec<f64> = actual_arrivals_ms
+        .iter()
+        .map(|&t| t - last_train_ms)
+        .collect();
+
+    let forecast_gaps: Vec<f64> = forecast_arrivals_ms
+        .iter()
+        .map(|&t| t - last_train_ms)
+        .collect();
+
+    println!(
+        "  {:>4}  {:>16}  {:>16}  {:>16}",
+        "i", "Actual Δt (ms)", "Forecast Δt (ms)", "Diff (ms)"
+    );
+    println!("  {}", "─".repeat(68));
+
+    for i in 0..n_test {
+        let actual = actual_gaps[i];
+        let forecast = if i < forecast_gaps.len() {
+            forecast_gaps[i]
+        } else {
+            f64::NAN
+        };
+        let diff = actual - forecast;
+        println!(
+            "  {:>4}  {:>16.4}  {:>16.4}  {:>16.4}",
+            i + 1,
+            actual,
+            forecast,
+            diff
+        );
+    }
+
+    // ── 10. Summary error metrics ───────────────────────────────────
+    separator("10. Forecast Error Metrics");
+
+    let n_compare = n_test.min(forecast_gaps.len());
+    let mut sum_abs_err = 0.0_f64;
+    let mut sum_sq_err = 0.0_f64;
+
+    for i in 0..n_compare {
+        let err = actual_gaps[i] - forecast_gaps[i];
+        sum_abs_err += err.abs();
+        sum_sq_err += err * err;
+    }
+
+    let mae = sum_abs_err / n_compare as f64;
+    let rmse = (sum_sq_err / n_compare as f64).sqrt();
+
+    row("MAE  (ms)", format!("{:.6}", mae));
+    row("RMSE (ms)", format!("{:.6}", rmse));
+    row("Mean actual gap (ms)", format!("{:.6}", actual_gaps.iter().sum::<f64>() / n_compare as f64));
+    row("Mean forecast gap (ms)", format!("{:.6}", forecast_gaps.iter().sum::<f64>() / n_compare as f64));
+
+    // ── 11. Compensator at boundary (diagnostic) ────────────────────
+    separator("11. Compensator Diagnostic");
+
+    let t_end = *train_events_ms.last().unwrap();
+    let comp_end = compensator(mle.mu, mle.alpha, mle.beta, &train_events_ms, t_end);
+    let expected_comp = (train_events_ms.len() - 1) as f64; // under correct model, Λ(T) ≈ n-1
+
+    row("Λ(T) at last train event", format!("{:.4}", comp_end));
+    row("Expected (n-1)", format!("{}", train_events_ms.len() - 1));
+    row(
+        "Ratio Λ(T)/(n-1)",
+        format!("{:.4}", comp_end / expected_comp),
+    );
+
+    println!("\n  Under correct specification, Λ(T)/(n−1) ≈ 1.0");
+
+    separator("Done");
+    println!("  Example completed successfully.\n");
+}