@@ -20,6 +20,7 @@ use std::{
2020 collections:: { BTreeSet , HashMap } ,
2121 num:: NonZeroUsize ,
2222} ;
23+ use time:: PrimitiveDateTime ;
2324use tracing:: { debug, error, info, warn} ;
2425
2526const EVENT_CIPHERTEXT_COMPUTED : & str = "event_ciphertext_computed" ;
@@ -118,6 +119,7 @@ async fn tfhe_worker_cycle(
118119 #[ cfg( feature = "bench" ) ]
119120 populate_cache_with_tenant_keys ( vec ! [ 1i32 ] , & pool, & tenant_key_cache) . await ?;
120121 let mut immedially_poll_more_work = false ;
122+ let mut no_progress_cycles = 0 ;
121123 loop {
122124 // only if previous iteration had no work done do the wait
123125 if !immedially_poll_more_work {
@@ -145,7 +147,7 @@ async fn tfhe_worker_cycle(
145147 s. end ( ) ;
146148
147149 // Query for transactions to execute, and if relevant the associated keys
148- let ( mut transactions, _ , has_more_work) = query_for_work (
150+ let ( mut transactions, earliest_computation , has_more_work) = query_for_work (
149151 args,
150152 & health_check,
151153 & mut trx,
@@ -159,7 +161,7 @@ async fn tfhe_worker_cycle(
159161 // for a notification after this cycle.
160162 immedially_poll_more_work = true ;
161163 } else {
162- dcid_mngr. release_current_lock ( true ) . await ?;
164+ dcid_mngr. release_current_lock ( true , None ) . await ?;
163165 dcid_mngr. do_cleanup ( ) . await ?;
164166
165167 // Lock another dependence chain if available and
@@ -212,7 +214,7 @@ async fn tfhe_worker_cycle(
212214 & loop_ctx,
213215 )
214216 . await ?;
215- upload_transaction_graph_results (
217+ let has_progressed = upload_transaction_graph_results (
216218 tenant_id,
217219 & mut tx_graph,
218220 & mut trx,
@@ -221,6 +223,20 @@ async fn tfhe_worker_cycle(
221223 & loop_ctx,
222224 )
223225 . await ?;
226+ if has_progressed {
227+ no_progress_cycles = 0 ;
228+ } else {
229+ no_progress_cycles += 1 ;
230+ if no_progress_cycles >= args. dcid_max_no_progress_cycles {
231+ // If we're not making progress on this dependence
232+ // chain, update the last_updated_at field and
233+ // release the lock so we can try to execute
234+ // another chain.
235+ dcid_mngr
236+ . release_current_lock ( false , Some ( earliest_computation) )
237+ . await ?;
238+ }
239+ }
224240 }
225241 s. end ( ) ;
226242 trx. commit ( ) . await ?;
@@ -313,7 +329,7 @@ async fn query_for_work<'a>(
313329 tracer : & opentelemetry:: global:: BoxedTracer ,
314330 loop_ctx : & opentelemetry:: Context ,
315331) -> Result <
316- ( Vec < ( i32 , Vec < ComponentNode > ) > , Vec < ( Handle , Handle ) > , bool ) ,
332+ ( Vec < ( i32 , Vec < ComponentNode > ) > , PrimitiveDateTime , bool ) ,
317333 Box < dyn std:: error:: Error + Send + Sync > ,
318334> {
319335 let mut s = tracer. start_with_context ( "query_dependence_chain" , loop_ctx) ;
@@ -331,7 +347,7 @@ async fn query_for_work<'a>(
331347 health_check. update_db_access ( ) ;
332348 health_check. update_activity ( ) ;
333349 info ! ( target: "tfhe_worker" , "No dcid found to process" ) ;
334- return Ok ( ( vec ! [ ] , vec ! [ ] , false ) ) ;
350+ return Ok ( ( vec ! [ ] , PrimitiveDateTime :: MAX , false ) ) ;
335351 }
336352
337353 s. set_attribute ( KeyValue :: new (
@@ -357,7 +373,8 @@ SELECT
357373 c.is_scalar,
358374 c.is_allowed,
359375 c.dependence_chain_id,
360- c.transaction_id
376+ c.transaction_id,
377+ c.created_at
361378FROM computations c
362379WHERE c.transaction_id IN (
363380 SELECT DISTINCT
@@ -393,11 +410,12 @@ FOR UPDATE SKIP LOCKED ",
393410 info ! ( target: "tfhe_worker" , dcid = %hex:: encode( dependence_chain_id) , locking = ?locking_reason, "No work items found to process" ) ;
394411 }
395412 health_check. update_activity ( ) ;
396- return Ok ( ( vec ! [ ] , vec ! [ ] , false ) ) ;
413+ return Ok ( ( vec ! [ ] , PrimitiveDateTime :: MAX , false ) ) ;
397414 }
398415 WORK_ITEMS_FOUND_COUNTER . inc_by ( the_work. len ( ) as u64 ) ;
399416 info ! ( target: "tfhe_worker" , { count = the_work. len( ) , dcid = ?dependence_chain_id. as_ref( ) . map( hex:: encode) ,
400- locking = ?locking_reason } , "Processing work items" ) ;
417+ locking = ?locking_reason } , "Processing work items" ) ;
418+ let mut earliest_created_at = the_work. first ( ) . unwrap ( ) . created_at ;
401419 // Make sure we process each tenant independently to avoid
402420 // setting different keys from different tenants in the worker
403421 // threads
@@ -417,7 +435,6 @@ FOR UPDATE SKIP LOCKED ",
417435 }
418436 // Traverse transactions and build transaction nodes
419437 let mut transactions: Vec < ( i32 , Vec < ComponentNode > ) > = vec ! [ ] ;
420- let mut unneeded_handles: Vec < ( Handle , Handle ) > = vec ! [ ] ;
421438 for ( tenant_id, work_by_transaction) in work_by_tenant_by_transaction. iter ( ) {
422439 let mut tenant_transactions: Vec < ComponentNode > = vec ! [ ] ;
423440 for ( transaction_id, txwork) in work_by_transaction. iter ( ) {
@@ -469,15 +486,17 @@ FOR UPDATE SKIP LOCKED ",
469486 inputs,
470487 is_allowed : w. is_allowed ,
471488 } ) ;
489+ if w. created_at < earliest_created_at {
490+ earliest_created_at = w. created_at ;
491+ }
472492 }
473- let ( mut components, mut unneeded ) = build_component_nodes ( ops, transaction_id) ?;
493+ let ( mut components, _ ) = build_component_nodes ( ops, transaction_id) ?;
474494 tenant_transactions. append ( & mut components) ;
475- unneeded_handles. append ( & mut unneeded) ;
476495 }
477496 transactions. push ( ( * tenant_id, tenant_transactions) ) ;
478497 }
479498 s_prep. end ( ) ;
480- Ok ( ( transactions, unneeded_handles , true ) )
499+ Ok ( ( transactions, earliest_created_at , true ) )
481500}
482501
483502#[ allow( clippy:: too_many_arguments) ]
@@ -563,10 +582,11 @@ async fn upload_transaction_graph_results<'a>(
563582 deps_mngr : & mut dependence_chain:: LockMngr ,
564583 tracer : & opentelemetry:: global:: BoxedTracer ,
565584 loop_ctx : & opentelemetry:: Context ,
566- ) -> Result < ( ) , Box < dyn std:: error:: Error + Send + Sync > > {
585+ ) -> Result < ( bool ) , Box < dyn std:: error:: Error + Send + Sync > > {
567586 // Get computation results
568587 let graph_results = tx_graph. get_results ( ) ;
569588 let mut handles_to_update = vec ! [ ] ;
589+ let mut res = false ;
570590
571591 // Traverse computations that have been scheduled and
572592 // upload their results/errors.
@@ -705,8 +725,9 @@ async fn upload_transaction_graph_results<'a>(
705725 err
706726 } ) ?;
707727 s. end ( ) ;
728+ res = true ;
708729 }
709- Ok ( ( ) )
730+ Ok ( res )
710731}
711732
712733#[ allow( clippy:: too_many_arguments) ]
0 commit comments