-
Notifications
You must be signed in to change notification settings - Fork 11
fix: transaction retry in case of invalid nonce #124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
hlgltvnnk
wants to merge
41
commits into
near:main
Choose a base branch
from
hlgltvnnk:fix/nonce-race-condition
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
41 commits
Select commit
Hold shift + click to select a range
fb3aa7d
tests: multiple sequential tx test added
hlgltvnnk 9488867
fix: sequential tx broadcast
hlgltvnnk ba1d927
fix: tx group locking removed due to blocking full dashmap shard
hlgltvnnk aa8eef5
sequential signer implementation added
hlgltvnnk 4a537c4
meta transaction sequential processing added
hlgltvnnk a82f196
test: updated tests for multisigner
hlgltvnnk 206d3b8
simplified sequential mode impl
hlgltvnnk 6056756
chores
hlgltvnnk ddd2d8c
updated sequential tests
hlgltvnnk ffb62d3
nonce tests added
hlgltvnnk c086c0d
docs added
hlgltvnnk 100ef12
fetch tx tests added
hlgltvnnk 3ed779e
bug with block_hash found
hlgltvnnk b7a973f
FinalExecutionOutcome deserialization bug found
hlgltvnnk b754c2b
reverted changes with into<Network>
hlgltvnnk ec54830
chores
hlgltvnnk 2f75a72
Merge branch 'main' into fix/nonce-race-condition
hlgltvnnk 86e6ccd
cspell cfg update
hlgltvnnk 468e736
Execution final result -> TransactionResult
hlgltvnnk 0a1c067
tmp: run workflow
hlgltvnnk 322f3c9
tx count in non_sequential tests decreased
hlgltvnnk 91aeee1
fmt
hlgltvnnk e60ca70
chores
hlgltvnnk 240ac97
chores
hlgltvnnk f12e72c
Merge branch 'main' into fix/nonce-race-condition
hlgltvnnk f55d04a
dashmap replaced
hlgltvnnk 8de95cb
Merge branch 'main' into fix/nonce-race-condition
vsavchyn-dev 122ff8b
signer seq mod enabled by default
hlgltvnnk 6459600
chores
hlgltvnnk 56620a8
chores
hlgltvnnk 5fe79bf
Merge branch 'main' into fix/nonce-race-condition
hlgltvnnk 76dcacc
sequential mode removed, invalid tx retries added
hlgltvnnk 6258563
refactoring
hlgltvnnk aa8a267
chores
hlgltvnnk 2b84c08
chores
hlgltvnnk a502991
openapi client cache added
hlgltvnnk 46337cb
chores
hlgltvnnk f8a2b01
max retriries added
hlgltvnnk 9d68a37
chores
hlgltvnnk 5f603b8
chores
hlgltvnnk 04f8908
chores
hlgltvnnk File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,235 @@ | ||
| use std::{sync::atomic::Ordering, time::Duration}; | ||
|
|
||
| use near_api_types::{ | ||
| AccountId, BlockHeight, CryptoHash, Nonce, PublicKey, Reference, TxExecutionStatus, | ||
| transaction::{PrepopulateTransaction, result::TransactionResult}, | ||
| }; | ||
|
|
||
| use near_openapi_client::types::RpcTransactionError; | ||
| use tokio::time::sleep; | ||
| use tracing::{debug, instrument, warn}; | ||
|
|
||
| use crate::{ | ||
| Signer, | ||
| advanced::{ExecuteMetaTransaction, ExecuteSignedTransaction, TxExecutionResult}, | ||
| config::NetworkConfig, | ||
| errors::{ | ||
| ExecuteMetaTransactionsError, ExecuteTransactionError, MetaSignError, RetryError, | ||
| SendRequestError, SignerError, | ||
| }, | ||
| signer::SIGNER_TARGET, | ||
| }; | ||
|
|
||
| impl Signer { | ||
| async fn fetch_nonce_data( | ||
| account_id: AccountId, | ||
| public_key: PublicKey, | ||
| network: &NetworkConfig, | ||
| ) -> Result<(Nonce, CryptoHash, BlockHeight), SignerError> { | ||
| debug!(target: SIGNER_TARGET, "Fetching latest nonce"); | ||
|
|
||
| let nonce_data = crate::account::Account(account_id.clone()) | ||
| .access_key(public_key) | ||
| .at(Reference::Final) | ||
| .fetch_from(network) | ||
| .await | ||
| .map_err(|e| SignerError::FetchNonceError(Box::new(e)))?; | ||
|
|
||
| Ok(( | ||
| nonce_data.data.nonce.0, | ||
| nonce_data.block_hash, | ||
| nonce_data.block_height, | ||
| )) | ||
| } | ||
|
|
||
| /// Fetches the transaction nonce and block hash associated to the access key. Internally | ||
| /// caches the nonce as to not need to query for it every time, and ending up having to run | ||
| /// into contention with others. | ||
| /// | ||
| /// Uses finalized block hash to avoid "Transaction Expired" errors when sending transactions | ||
| /// to load-balanced RPC endpoints where different nodes may be at different chain heights. | ||
| #[allow(clippy::significant_drop_tightening)] | ||
| #[instrument(skip(self, network))] | ||
| pub async fn fetch_tx_nonce( | ||
| &self, | ||
| account_id: AccountId, | ||
| public_key: PublicKey, | ||
| network: &NetworkConfig, | ||
| ) -> Result<(Nonce, CryptoHash, BlockHeight), SignerError> { | ||
| debug!(target: SIGNER_TARGET, "Fetching transaction nonce"); | ||
|
|
||
| let key = (network.network_name.clone(), account_id.clone(), public_key); | ||
|
|
||
| let (fetched_nonce, block_hash, block_height) = | ||
| Self::fetch_nonce_data(account_id, public_key, network).await?; | ||
|
|
||
| let nonce = { | ||
| let mut nonce_cache = self.nonce_cache.lock().await; | ||
| let nonce = nonce_cache.entry(key).or_default(); | ||
|
|
||
| *nonce = (*nonce).max(fetched_nonce) + 1; | ||
| *nonce | ||
| }; | ||
|
|
||
| Ok((nonce, block_hash, block_height)) | ||
| } | ||
|
|
||
| /// Signs and sends a transaction to the network. | ||
| /// | ||
| /// Concurrent broadcasting of transactions of the same transaction group | ||
| /// (network, account, public key) can cause nonce conflicts | ||
| /// (`InvalidTransaction` errors), so this method retries with a fresh nonce | ||
| /// up to `max_nonce_retries` specified in the signer configuration | ||
| #[instrument(skip(self, network, transaction, account_id))] | ||
| pub async fn sign_and_send( | ||
| &self, | ||
| account_id: impl Into<AccountId>, | ||
| network: &NetworkConfig, | ||
| transaction: PrepopulateTransaction, | ||
| wait_until: TxExecutionStatus, | ||
| ) -> TxExecutionResult { | ||
| let account_id = account_id.into(); | ||
| let public_key = self | ||
| .get_public_key() | ||
| .await | ||
| .map_err(SignerError::PublicKeyError)?; | ||
|
|
||
| self.sign_and_send_with_retry(account_id, public_key, network, transaction, wait_until) | ||
| .await | ||
| } | ||
|
|
||
| async fn sign_and_send_with_retry( | ||
| &self, | ||
| account_id: AccountId, | ||
| public_key: PublicKey, | ||
| network: &NetworkConfig, | ||
| transaction: PrepopulateTransaction, | ||
| wait_until: TxExecutionStatus, | ||
| ) -> TxExecutionResult { | ||
| let max_nonce_retries = self.max_nonce_retries.load(Ordering::SeqCst); | ||
| let attempts = max_nonce_retries + 1; // +1 for the initial attempt | ||
|
|
||
| for attempt in 0..attempts { | ||
| match self | ||
| .broadcast_tx( | ||
| &account_id, | ||
| public_key, | ||
| network, | ||
| transaction.clone(), | ||
| wait_until, | ||
| ) | ||
| .await | ||
| { | ||
| Err(err) if Self::is_retryable_nonce_error(&err) && attempt + 1 < attempts => { | ||
| warn!( | ||
| target: SIGNER_TARGET, | ||
| account_id = %account_id, | ||
| attempt = attempt + 1, | ||
| max_attempts = max_nonce_retries, | ||
| error = ?err, | ||
| "Invalid transaction detected, retrying after delay" | ||
| ); | ||
|
|
||
| let delay = Self::calculate_retry_delay(attempt); | ||
| sleep(delay).await; | ||
| } | ||
|
|
||
| result => return result, | ||
| } | ||
| } | ||
|
|
||
| unreachable!("loop always returns on the final attempt") | ||
| } | ||
|
|
||
| const fn is_retryable_nonce_error(error: &ExecuteTransactionError) -> bool { | ||
| // TODO: check tx nonce error after fix in near openapi types | ||
| matches!( | ||
| error, | ||
| ExecuteTransactionError::TransactionError(RetryError::Critical( | ||
| SendRequestError::ServerError(RpcTransactionError::InvalidTransaction(_)) | ||
| )) | ||
| ) | ||
| } | ||
|
|
||
| fn calculate_retry_delay(attempt: u32) -> Duration { | ||
| const INITIAL_RETRY_DELAY: Duration = Duration::from_secs(2); | ||
|
|
||
| INITIAL_RETRY_DELAY * 2u32.pow(attempt) | ||
| } | ||
|
|
||
| /// Signs and sends a meta transaction to the relayer. | ||
| /// | ||
| /// This method is used to sign and send a meta transaction to the relayer. | ||
| #[instrument(skip(self, network, transaction, account_id))] | ||
| pub async fn sign_and_send_meta( | ||
| &self, | ||
| account_id: impl Into<AccountId>, | ||
| network: &NetworkConfig, | ||
| transaction: PrepopulateTransaction, | ||
| tx_live_for: BlockHeight, | ||
| ) -> Result<reqwest::Response, ExecuteMetaTransactionsError> { | ||
| let account_id = account_id.into(); | ||
| let public_key = self | ||
| .get_public_key() | ||
| .await | ||
| .map_err(SignerError::PublicKeyError) | ||
| .map_err(MetaSignError::from)?; | ||
|
|
||
| self.broadcast_meta_tx(account_id, public_key, network, transaction, tx_live_for) | ||
| .await | ||
| } | ||
|
|
||
| #[allow(clippy::significant_drop_tightening)] | ||
| #[instrument(skip(self, account_id, network))] | ||
| async fn broadcast_tx( | ||
| &self, | ||
| account_id: &AccountId, | ||
| public_key: PublicKey, | ||
| network: &NetworkConfig, | ||
| transaction: PrepopulateTransaction, | ||
| wait_until: TxExecutionStatus, | ||
| ) -> Result<TransactionResult, ExecuteTransactionError> { | ||
| debug!(target: SIGNER_TARGET, "Broadcasting transaction"); | ||
|
|
||
| let (fetched_nonce, block_hash, _) = self | ||
| .fetch_tx_nonce(account_id.clone(), public_key, network) | ||
| .await?; | ||
|
|
||
| let signed = self | ||
| .sign(transaction, public_key, fetched_nonce, block_hash) | ||
| .await?; | ||
|
|
||
| ExecuteSignedTransaction::send_impl(network, signed, wait_until).await | ||
| } | ||
|
|
||
| #[allow(clippy::significant_drop_tightening)] | ||
| #[instrument(skip(self, account_id, network))] | ||
| async fn broadcast_meta_tx( | ||
| &self, | ||
| account_id: impl Into<AccountId>, | ||
| public_key: PublicKey, | ||
| network: &NetworkConfig, | ||
| transaction: PrepopulateTransaction, | ||
| tx_live_for: BlockHeight, | ||
| ) -> Result<reqwest::Response, ExecuteMetaTransactionsError> { | ||
| debug!(target: SIGNER_TARGET, "Broadcasting meta transaction"); | ||
| let account_id = account_id.into(); | ||
|
|
||
| let (fetched_nonce, block_hash, block_height) = self | ||
| .fetch_tx_nonce(account_id, public_key, network) | ||
| .await | ||
| .map_err(MetaSignError::from)?; | ||
|
|
||
| let signed = self | ||
| .sign_meta( | ||
| transaction, | ||
| public_key, | ||
| fetched_nonce, | ||
| block_hash, | ||
| block_height + tx_live_for, | ||
| ) | ||
| .await?; | ||
|
|
||
| ExecuteMetaTransaction::send_impl(network, signed).await | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OpenapiClientCacheuses the entireRPCEndpoint(includingretries/retry_methodandbearer_header) as the cache key. This can significantly reduce reuse (same URL but different retry settings yields different clients) and it also stores/clones the API key string in a process-wide static map for the lifetime of the program. Consider keying the cache only by client-affecting fields (e.g., URL + auth header) and avoid retaining the raw secret in the key (store a digest/fingerprint instead).