2323
2424MAX_RETRIES = 5 # Maximum number of retries
2525RETRY_DELAY = 5 # Delay between retries (in seconds)
26+ IMPORT_MAX_RETRIES = 3 # Maximum retries for transient import errors (e.g. Ice race conditions)
27+ IMPORT_RETRY_DELAY = 10 # Delay between import retries (in seconds)
2628TMP_OUTPUT_FOLDER = "OMERO_inplace"
2729PROCESSED_DATA_FOLDER = ".processed"
2830
31+ # Error patterns that indicate a transient/retryable import failure
32+ # (e.g. concurrent Ice session race conditions)
33+ RETRYABLE_IMPORT_ERRORS = [
34+ 'Ice.ObjectNotExistException' ,
35+ 'INTERNAL_EXCEPTION' ,
36+ 'Ice.ConnectionLostException' ,
37+ 'Ice.ConnectionRefusedException' ,
38+ 'Ice.TimeoutException' ,
39+ ]
40+
2941# Canonical keys for storing preprocessing artifacts on the data_package
3042PREPROC_META_KEY = "_preprocessing_metadata"
3143PREPROC_RESULTS_KEY = "_preprocessing_results"
@@ -45,6 +57,39 @@ def get_tmp_output_path(data_package):
4557 return os .path .join ("/OMERO" , TMP_OUTPUT_FOLDER , data_package .get ('UUID' ))
4658
4759
60+ def is_retryable_import_error (errs_file , logger = None ):
61+ """
62+ Check if an import error file contains a retryable (transient) error.
63+
64+ Reads the .errs file produced by the OMERO CLI import and checks for
65+ known transient error patterns (e.g. Ice.ObjectNotExistException from
66+ concurrent import race conditions).
67+
68+ Returns (True, matched_pattern) if retryable, (False, None) otherwise.
69+ """
70+ if not os .path .exists (errs_file ):
71+ if logger :
72+ logger .warning (
73+ f"No .errs file found at { errs_file } — import likely crashed "
74+ f"before writing output. Treating as retryable."
75+ )
76+ return True , "no_errs_file"
77+ try :
78+ with open (errs_file , 'r' ) as f :
79+ content = f .read ()
80+ for pattern in RETRYABLE_IMPORT_ERRORS :
81+ if pattern in content :
82+ if logger :
83+ logger .warning (
84+ f"Retryable error detected in { errs_file } : { pattern } "
85+ )
86+ return True , pattern
87+ except Exception as e :
88+ if logger :
89+ logger .warning (f"Could not read errs file { errs_file } : { e } " )
90+ return False , None
91+
92+
4893def connection (func ):
4994 """
5095 A decorator that wraps a function so that it receives an OMERO user connection.
@@ -421,7 +466,8 @@ def __init__(self, config, data_package, ttl_for_user_conn=6000000):
421466 self .imported = False
422467
423468 @connection
424- def import_to_omero (self , conn , file_path , target_id , target_type , uuid , transfer_type = "ln_s" , depth = None ):
469+ def import_to_omero (self , conn , file_path , target_id , target_type , uuid , transfer_type = "ln_s" , depth = None , log_id = None ):
470+ log_id = log_id or uuid
425471 self .logger .debug (
426472 f"Starting import to OMERO for file: { file_path } , Target: { target_id } ({ target_type } )" )
427473 cli = CLI ()
@@ -434,8 +480,8 @@ def import_to_omero(self, conn, file_path, target_id, target_type, uuid, transfe
434480 '-p' , str (conn .port ),
435481 f'--transfer={ transfer_type } ' ,
436482 '--no-upgrade' ,
437- '--file' , f"logs/cli.{ uuid } .logs" ,
438- '--errs' , f"logs/cli.{ uuid } .errs" ,
483+ '--file' , f"logs/cli.{ log_id } .logs" ,
484+ '--errs' , f"logs/cli.{ log_id } .errs" ,
439485 ]
440486 if 'parallel_upload_per_worker' in self .config :
441487 arguments += ['--parallel-upload' ,
@@ -631,7 +677,7 @@ def get_image_paths(self, conn, file_path, dataset_id):
631677 return [], template_prefixes # Return format consistent with get_plate_ids
632678
633679 @connection
634- def import_dataset (self , conn , target , dataset , transfer = "ln_s" , depth = None ):
680+ def import_dataset (self , conn , target , dataset , transfer = "ln_s" , depth = None , file_index = None ):
635681 kwargs = {"transfer" : transfer }
636682 if 'parallel_upload_per_worker' in self .config :
637683 kwargs ['parallel-upload' ] = str (
@@ -644,18 +690,49 @@ def import_dataset(self, conn, target, dataset, transfer="ln_s", depth=None):
644690 if depth :
645691 kwargs ['depth' ] = str (depth )
646692 uuid = self .data_package .get ('UUID' )
647- kwargs ['file' ] = f"logs/cli.{ uuid } .logs"
648- kwargs ['errs' ] = f"logs/cli.{ uuid } .errs"
649- self .logger .debug (f"EZImport: { conn } { target } { int (dataset )} { kwargs } " )
650- result = ezomero .ezimport (conn = conn , target = target , dataset = int (dataset ), ** kwargs )
651- # Check if import succeeded - ezimport returns None on failure, list (possibly empty) on success
652- if result is not None :
653- self .imported = True
654- self .logger .info (f"Import succeeded, got image IDs: { result } " )
655- else :
656- self .imported = False
657- self .logger .error ("Import failed - ezimport returned None" )
658- return result
693+ # Use per-file log IDs to avoid log collisions when importing multiple files
694+ log_id = f"{ uuid } _{ file_index } " if file_index is not None else uuid
695+
696+ for attempt in range (1 , IMPORT_MAX_RETRIES + 1 ):
697+ # Use attempt-specific log/err files so retries don't overwrite previous evidence
698+ attempt_log_id = f"{ log_id } _attempt{ attempt } " if attempt > 1 else log_id
699+ kwargs ['file' ] = f"logs/cli.{ attempt_log_id } .logs"
700+ kwargs ['errs' ] = f"logs/cli.{ attempt_log_id } .errs"
701+
702+ self .logger .debug (f"EZImport (attempt { attempt } /{ IMPORT_MAX_RETRIES } ): { conn } { target } { int (dataset )} { kwargs } " )
703+ result = ezomero .ezimport (conn = conn , target = target , dataset = int (dataset ), ** kwargs )
704+
705+ # Check if import succeeded - ezimport returns None on failure, list (possibly empty) on success
706+ if result is not None :
707+ self .imported = True
708+ self .logger .info (f"Import succeeded, got image IDs: { result } " )
709+ return result
710+
711+ # Import failed — check if the error is transient/retryable
712+ errs_file = kwargs ['errs' ]
713+ retryable , pattern = is_retryable_import_error (errs_file , self .logger )
714+
715+ if retryable and attempt < IMPORT_MAX_RETRIES :
716+ delay = IMPORT_RETRY_DELAY * attempt # increasing backoff
717+ self .logger .warning (
718+ f"Import failed with retryable error ({ pattern } ). "
719+ f"Retrying in { delay } s (attempt { attempt } /{ IMPORT_MAX_RETRIES } )..."
720+ )
721+ time .sleep (delay )
722+ continue
723+ else :
724+ if retryable :
725+ self .logger .error (
726+ f"Import failed with retryable error ({ pattern } ) but max retries "
727+ f"({ IMPORT_MAX_RETRIES } ) exhausted."
728+ )
729+ else :
730+ self .logger .error (
731+ f"Import failed - ezimport returned None (non-retryable error). "
732+ f"Check { errs_file } for details."
733+ )
734+ self .imported = False
735+ return result
659736
660737 def upload_files (self , conn , file_paths , dataset_id = None , screen_id = None , local_paths = None ):
661738 uuid = self .data_package .get ('UUID' )
@@ -696,12 +773,14 @@ def upload_files(self, conn, file_paths, dataset_id=None, screen_id=None, local_
696773 # and in local_paths folder on the omero server storage
697774 # we will import now in-place from the omero server storage
698775 # and then we'll switch the in-place symlinks to the remote storage (subfolder)
776+ log_id = f"{ uuid } _{ i } "
699777 imported = self .import_to_omero (
700778 file_path = local_path ,
701779 target_id = screen_id ,
702780 target_type = 'Screen' ,
703781 uuid = uuid ,
704- depth = 10
782+ depth = 10 ,
783+ log_id = log_id
705784 )
706785 self .logger .debug ("Upload done. Retrieving plate id." )
707786 image_ids , local_file_dir = self .get_plate_ids (
@@ -712,16 +791,19 @@ def upload_files(self, conn, file_paths, dataset_id=None, screen_id=None, local_
712791 image_ids = self .import_dataset (
713792 target = local_path ,
714793 dataset = dataset_id ,
715- transfer = "ln_s"
794+ transfer = "ln_s" ,
795+ file_index = i
716796 )
717797 self .logger .debug (f"EZimport returned ids { image_ids } for { str (file_path )} ({ dataset_id } )" )
718798 else :
799+ log_id = f"{ uuid } _{ i } "
719800 imported = self .import_to_omero (
720801 file_path = local_path ,
721802 target_id = dataset_id ,
722803 target_type = 'Dataset' ,
723804 uuid = uuid ,
724- depth = 10
805+ depth = 10 ,
806+ log_id = log_id
725807 )
726808 image_ids = dataset_id
727809
@@ -787,12 +869,14 @@ def upload_files(self, conn, file_paths, dataset_id=None, screen_id=None, local_
787869 )
788870 else :
789871 if screen_id : # screen
872+ log_id = f"{ uuid } _{ i } "
790873 imported = self .import_to_omero (
791874 file_path = str (file_path ),
792875 target_id = screen_id ,
793876 target_type = 'Screen' ,
794877 uuid = uuid ,
795- depth = 10
878+ depth = 10 ,
879+ log_id = log_id
796880 )
797881 image_ids , _ = self .get_plate_ids (
798882 str (file_path ), screen_id )
@@ -801,16 +885,19 @@ def upload_files(self, conn, file_paths, dataset_id=None, screen_id=None, local_
801885 image_ids = self .import_dataset (
802886 target = str (file_path ),
803887 dataset = dataset_id ,
804- transfer = "ln_s"
888+ transfer = "ln_s" ,
889+ file_index = i
805890 )
806891 self .logger .debug (f"EZimport returned ids { image_ids } for { str (file_path )} ({ dataset_id } )" )
807892 elif os .path .isdir (file_path ):
893+ log_id = f"{ uuid } _{ i } "
808894 imported = self .import_to_omero (
809895 file_path = str (file_path ),
810896 target_id = dataset_id ,
811897 target_type = 'Dataset' ,
812898 uuid = uuid ,
813- depth = 10
899+ depth = 10 ,
900+ log_id = log_id
814901 )
815902 image_ids = dataset_id
816903 self .logger .debug (f"Set ids { image_ids } to the dataset { dataset_id } " )
0 commit comments