diff --git a/Cargo.lock b/Cargo.lock index 8b194988..bf1cec47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13374,7 +13374,7 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tip-router-operator-cli" -version = "3.0.0" +version = "3.0.1" dependencies = [ "anyhow", "base64 0.22.1", diff --git a/tip-router-operator-cli/Cargo.toml b/tip-router-operator-cli/Cargo.toml index 400e7c00..7cfd7348 100644 --- a/tip-router-operator-cli/Cargo.toml +++ b/tip-router-operator-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tip-router-operator-cli" -version = "3.0.0" +version = "3.0.1" edition = "2021" description = "CLI for Jito Tip Router" diff --git a/tip-router-operator-cli/src/process_epoch.rs b/tip-router-operator-cli/src/process_epoch.rs index 0c67503f..0675814d 100644 --- a/tip-router-operator-cli/src/process_epoch.rs +++ b/tip-router-operator-cli/src/process_epoch.rs @@ -112,8 +112,44 @@ pub async fn loop_stages( save_stages: bool, ) -> Result<()> { let keypair = read_keypair_file(&cli.keypair_path).expect("Failed to read keypair file"); - let mut current_epoch_info = rpc_client.get_epoch_info().await?; - let epoch_schedule = rpc_client.get_epoch_schedule().await?; + + let mut current_epoch_info = { + loop { + match rpc_client.get_epoch_info().await { + Ok(info) => break info, + Err(e) => { + error!("Error getting epoch info from RPC. Retrying..."); + datapoint_error!( + "tip_router_cli.get_epoch_info", + ("operator_address", cli.operator_address.clone(), String), + ("status", "error", String), + ("error", e.to_string(), String), + "cluster" => &cli.cluster, + ); + tokio::time::sleep(Duration::from_secs(5)).await; + } + } + } + }; + + let epoch_schedule = { + loop { + match rpc_client.get_epoch_schedule().await { + Ok(schedule) => break schedule, + Err(e) => { + error!("Error getting epoch schedule from RPC. Retrying..."); + datapoint_error!( + "tip_router_cli.get_epoch_schedule", + ("operator_address", cli.operator_address.clone(), String), + ("status", "error", String), + ("error", e.to_string(), String), + "cluster" => &cli.cluster, + ); + tokio::time::sleep(Duration::from_secs(5)).await; + } + } + } + }; // Track runs that are starting right at the beginning of a new epoch let operator_address = cli.operator_address.clone(); @@ -289,7 +325,7 @@ pub async fn loop_stages( meta_merkle_tree_path(epoch_to_process, &cli.get_save_path()); let operator_address = Pubkey::from_str(&cli.operator_address)?; - submit_to_ncn( + let submit_result = submit_to_ncn( &rpc_client, &keypair, &operator_address, @@ -305,7 +341,22 @@ pub async fn loop_stages( cli.vote_microlamports, &cli.cluster, ) - .await?; + .await; + if let Err(e) = submit_result { + error!( + "Failed to submit epoch {} to NCN: {:?}", + epoch_to_process, e + ); + datapoint_error!( + "tip_router_cli.cast_vote", + ("operator_address", operator_address.to_string(), String), + ("epoch", epoch_to_process, i64), + ("status", "error", String), + ("error", e.to_string(), String), + ("state", "cast_vote", String), + "cluster" => &cli.cluster, + ); + } stage = OperatorState::WaitForNextEpoch; } OperatorState::WaitForNextEpoch => { diff --git a/tip-router-operator-cli/src/submit.rs b/tip-router-operator-cli/src/submit.rs index 857b6255..2bfccefe 100644 --- a/tip-router-operator-cli/src/submit.rs +++ b/tip-router-operator-cli/src/submit.rs @@ -91,10 +91,27 @@ pub async fn submit_to_ncn( compute_unit_price: u64, cluster: &str, ) -> Result<(), anyhow::Error> { - let epoch_info = client.get_epoch_info().await?; - let meta_merkle_tree = MetaMerkleTree::new_from_file(meta_merkle_tree_path)?; + let epoch_info = client + .get_epoch_info() + .await + .map_err(|e| anyhow::anyhow!("Failed to fetch epoch info from RPC client: {:?}", e))?; + let meta_merkle_tree = MetaMerkleTree::new_from_file(meta_merkle_tree_path).map_err(|e| { + anyhow::anyhow!( + "Failed to load Meta Merkle Tree from file {:?}: {:?}", + meta_merkle_tree_path, + e + ) + })?; let config_pda = Config::find_program_address(tip_router_program_id, ncn_address).0; - let config = get_ncn_config(client, tip_router_program_id, ncn_address).await?; + let config = get_ncn_config(client, tip_router_program_id, ncn_address) + .await + .map_err(|e| { + anyhow::anyhow!( + "Failed to fetch Tip Router config for NCN {}: {:?}", + ncn_address, + e + ) + })?; // The meta merkle root files are tagged with the epoch they have created the snapshot for // Tip router accounts for that merkle root are created in the next epoch @@ -119,12 +136,35 @@ pub async fn submit_to_ncn( } }; - let ballot_box = BallotBox::try_from_slice_unchecked(&ballot_box_account.data)?; + let ballot_box = + BallotBox::try_from_slice_unchecked(&ballot_box_account.data).map_err(|e| { + datapoint_error!( + "tip_router_cli.ballot_box_deserialize_error", + ("operator_address", operator_address.to_string(), String), + ("epoch", tip_router_target_epoch, i64), + ("status", "error", String), + ("error", format!("{:?}", e), String), + "cluster" => cluster, + ); + anyhow::anyhow!("Failed to deserialize ballot box: {:?}", e) + })?; - let is_voting_valid = ballot_box.is_voting_valid( - epoch_info.absolute_slot, - config.valid_slots_after_consensus(), - )?; + let is_voting_valid = ballot_box + .is_voting_valid( + epoch_info.absolute_slot, + config.valid_slots_after_consensus(), + ) + .map_err(|e| { + datapoint_error!( + "tip_router_cli.voting_validity_error", + ("operator_address", operator_address.to_string(), String), + ("epoch", tip_router_target_epoch, i64), + ("status", "error", String), + ("error", format!("{:?}", e), String), + "cluster" => cluster, + ); + anyhow::anyhow!("Failed to determine if voting is valid: {:?}", e) + })?; // If exists, look for vote from current operator let vote = ballot_box