Skip to content

Commit e57601e

Browse files
Sicheng PanSicheng-Pan
Sicheng Pan
authored andcommitted
Retry fork
1 parent 81a7c9e commit e57601e

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

Diff for: rust/frontend/src/impls/service_based_frontend.rs

+43-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use super::utils::to_records;
4444

4545
#[derive(Debug)]
4646
struct Metrics {
47+
fork_retries_counter: Counter<u64>,
4748
delete_retries_counter: Counter<u64>,
4849
count_retries_counter: Counter<u64>,
4950
query_retries_counter: Counter<u64>,
@@ -73,11 +74,13 @@ impl ServiceBasedFrontend {
7374
default_knn_index: KnnIndex,
7475
) -> Self {
7576
let meter = global::meter("chroma");
77+
let fork_retries_counter = meter.u64_counter("fork_retries").build();
7678
let delete_retries_counter = meter.u64_counter("delete_retries").build();
7779
let count_retries_counter = meter.u64_counter("count_retries").build();
7880
let query_retries_counter = meter.u64_counter("query_retries").build();
7981
let get_retries_counter = meter.u64_counter("query_retries").build();
8082
let metrics = Arc::new(Metrics {
83+
fork_retries_counter,
8184
delete_retries_counter,
8285
count_retries_counter,
8386
query_retries_counter,
@@ -543,7 +546,7 @@ impl ServiceBasedFrontend {
543546
Ok(DeleteCollectionRecordsResponse {})
544547
}
545548

546-
pub async fn fork_collection(
549+
pub async fn retryable_fork(
547550
&mut self,
548551
ForkCollectionRequest {
549552
source_collection_id,
@@ -556,6 +559,9 @@ impl ServiceBasedFrontend {
556559
.sysdb_client
557560
.fork_collection(
558561
source_collection_id,
562+
// TODO: Update this when wiring up log fork
563+
0,
564+
0,
559565
target_collection_id,
560566
target_collection_name,
561567
)
@@ -570,6 +576,42 @@ impl ServiceBasedFrontend {
570576
Ok(collection)
571577
}
572578

579+
pub async fn fork_collection(
580+
&mut self,
581+
request: ForkCollectionRequest,
582+
) -> Result<ForkCollectionResponse, ForkCollectionError> {
583+
let retries = Arc::new(AtomicUsize::new(0));
584+
let fork_to_retry = || {
585+
let mut self_clone = self.clone();
586+
let request_clone = request.clone();
587+
async move { self_clone.retryable_fork(request_clone).await }
588+
};
589+
590+
let res = fork_to_retry
591+
.retry(self.collections_with_segments_provider.get_retry_backoff())
592+
// NOTE: Transport level errors will manifest as unknown errors, and they should also be retried
593+
.when(|e| {
594+
matches!(
595+
e.code(),
596+
ErrorCodes::FailedPrecondition | ErrorCodes::NotFound | ErrorCodes::Unknown
597+
)
598+
})
599+
.notify(|_, _| {
600+
let retried = retries.fetch_add(1, Ordering::Relaxed);
601+
if retried > 0 {
602+
tracing::info!(
603+
"Retrying fork() request for collection {}",
604+
request.source_collection_id
605+
);
606+
}
607+
})
608+
.await;
609+
self.metrics
610+
.fork_retries_counter
611+
.add(retries.load(Ordering::Relaxed) as u64, &[]);
612+
res
613+
}
614+
573615
pub async fn add(
574616
&mut self,
575617
AddCollectionRecordsRequest {

Diff for: rust/sysdb/src/sysdb.rs

+8
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ impl SysDb {
318318
pub async fn fork_collection(
319319
&mut self,
320320
source_collection_id: CollectionUuid,
321+
source_collection_log_compaction_offset: u64,
322+
source_collection_log_enumeration_offset: u64,
321323
target_collection_id: CollectionUuid,
322324
target_collection_name: String,
323325
) -> Result<CollectionAndSegments, ForkCollectionError> {
@@ -326,6 +328,8 @@ impl SysDb {
326328
grpc_sys_db
327329
.fork_collection(
328330
source_collection_id,
331+
source_collection_log_compaction_offset,
332+
source_collection_log_enumeration_offset,
329333
target_collection_id,
330334
target_collection_name,
331335
)
@@ -921,13 +925,17 @@ impl GrpcSysDb {
921925
pub async fn fork_collection(
922926
&mut self,
923927
source_collection_id: CollectionUuid,
928+
source_collection_log_compaction_offset: u64,
929+
source_collection_log_enumeration_offset: u64,
924930
target_collection_id: CollectionUuid,
925931
target_collection_name: String,
926932
) -> Result<CollectionAndSegments, ForkCollectionError> {
927933
let res = self
928934
.client
929935
.fork_collection(chroma_proto::ForkCollectionRequest {
930936
source_collection_id: source_collection_id.0.to_string(),
937+
source_collection_log_compaction_offset,
938+
source_collection_log_enumeration_offset,
931939
target_collection_id: target_collection_id.0.to_string(),
932940
target_collection_name: target_collection_name.clone(),
933941
})

0 commit comments

Comments
 (0)