Skip to content

Commit e70e666

Browse files
authored
JIT-2454 Snapshot metric (#181)
Emit metric for uploaded snapshot
1 parent 75b252a commit e70e666

File tree

4 files changed

+77
-2
lines changed

4 files changed

+77
-2
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

gcp_uploader/Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@ description = "A tool to continuously monitor and upload epoch-related files to
77

88
[dependencies]
99
anyhow = { workspace = true }
10-
clap = { workspace = true, features = ["derive"] }
10+
clap = { workspace = true, features = ["derive", "env"] }
1111
cloud-storage = "0.11"
1212
env_logger = { workspace = true }
1313
futures-util = "0.3.31"
1414
hostname = "0.3"
1515
log = { workspace = true }
1616
regex = "1.10"
1717
serde_json = { workspace = true }
18+
solana-client = { workspace = true }
19+
solana-metrics = { workspace = true }
1820
tokio = { workspace = true, features = ["full"] }

gcp_uploader/monitor-merkle-uploads.service

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ ExecStart=/home/core/jito-tip-router/target/release/gcp_uploader \
1313
Restart=always
1414
RestartSec=10
1515
Environment="RUST_LOG=info"
16+
Environment="SOLANA_METRICS_CONFIG="
17+
Environment="RPC_URL="
1618

1719
[Install]
1820
WantedBy=multi-user.target

gcp_uploader/src/main.rs

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use env_logger::Env;
44
use hostname::get as get_hostname_raw;
55
use log::{error, info};
66
use regex::Regex;
7+
use solana_client::rpc_client::RpcClient;
8+
use solana_metrics::{datapoint_info, set_host_id};
79
use std::collections::HashSet;
810
use std::path::Path;
911
use std::process::Command;
@@ -34,6 +36,14 @@ struct Args {
3436
/// Directory to scan for snapshot files
3537
#[arg(short, long)]
3638
snapshot_directory: String,
39+
40+
/// Solana JSON RPC URL to fetch current epoch for metrics
41+
#[arg(
42+
long,
43+
env = "RPC_URL",
44+
default_value = "https://api.mainnet-beta.solana.com"
45+
)]
46+
rpc_url: String,
3747
}
3848

3949
#[tokio::main]
@@ -64,6 +74,8 @@ async fn main() -> Result<()> {
6474
// Get hostname
6575
let hostname = get_hostname()?;
6676

77+
set_host_id(hostname.clone());
78+
6779
// Determine bucket name
6880
let bucket_name = args
6981
.bucket
@@ -125,6 +137,18 @@ async fn main() -> Result<()> {
125137
}
126138
}
127139

140+
// Emit metric about whether snapshot for current epoch is present in GCS
141+
if let Err(e) = emit_current_epoch_snapshot_metric(
142+
&args.rpc_url,
143+
&bucket_name,
144+
&hostname,
145+
&args.cluster,
146+
)
147+
.await
148+
{
149+
error!("Error emitting snapshot metric: {}", e);
150+
}
151+
128152
// Wait for the next polling interval
129153
sleep(Duration::from_secs(args.interval)).await;
130154
}
@@ -188,7 +212,7 @@ async fn scan_and_upload_files(
188212
}
189213

190214
/// Scans directory for snapshots & uploads after deriving the associated epoch
191-
#[allow(clippy::arithmetic_side_effects)]
215+
#[allow(clippy::arithmetic_side_effects, clippy::integer_division)]
192216
async fn scan_and_upload_snapshot_files(
193217
dir_path: &Path,
194218
bucket_name: &str,
@@ -311,6 +335,51 @@ async fn upload_file(
311335
Ok(())
312336
}
313337

338+
async fn emit_current_epoch_snapshot_metric(
339+
rpc_url: &str,
340+
bucket_name: &str,
341+
hostname: &str,
342+
cluster: &str,
343+
) -> Result<()> {
344+
// Fetch current epoch via Solana RPC
345+
let client = RpcClient::new(rpc_url.to_string());
346+
let epoch_info = client
347+
.get_epoch_info()
348+
.with_context(|| format!("Failed to fetch epoch info from {}", rpc_url))?;
349+
let epoch = epoch_info.epoch;
350+
351+
// Build GCS prefix path used by upload_file: {epoch}/{hostname}/snapshot-*.tar.zst
352+
// First, list objects under epoch/hostname and look for any snapshot-*.tar.zst
353+
let list_output = Command::new("/opt/gcloud/google-cloud-sdk/bin/gcloud")
354+
.args([
355+
"storage",
356+
"ls",
357+
&format!("gs://{}/{}/{}/", bucket_name, epoch, hostname),
358+
])
359+
.output()
360+
.with_context(|| "Failed to execute gcloud ls for snapshot metric")?;
361+
362+
let uploaded = if list_output.status.success() {
363+
let stdout = String::from_utf8_lossy(&list_output.stdout);
364+
stdout
365+
.lines()
366+
.any(|line| line.contains("snapshot-") && line.ends_with(".tar.zst"))
367+
} else {
368+
false
369+
};
370+
371+
datapoint_info!(
372+
"tip_router_gcp_uploader.snapshot_present",
373+
("epoch", epoch as i64, i64),
374+
("present", uploaded, bool),
375+
"cluster" => cluster,
376+
"hostname" => hostname,
377+
"bucket" => bucket_name,
378+
);
379+
380+
Ok(())
381+
}
382+
314383
fn get_hostname() -> Result<String> {
315384
let hostname = get_hostname_raw()
316385
.context("Failed to get hostname")?

0 commit comments

Comments
 (0)