@@ -999,14 +999,16 @@ def _import_collection_copied_associations(
999999 # sense.
10001000 hdca_copied_from_sinks = object_import_tracker .hdca_copied_from_sinks
10011001 if copied_from_object_key in object_import_tracker .hdcas_by_key :
1002- hdca .copied_from_history_dataset_collection_association = object_import_tracker .hdcas_by_key [
1003- copied_from_object_key
1004- ]
1002+ source_hdca = object_import_tracker .hdcas_by_key [copied_from_object_key ]
1003+ if source_hdca is not hdca :
1004+ # We may not have the copied source, in which case the first included HDCA in the chain
1005+ # acts as the source, so here we make sure we don't create a cycle.
1006+ hdca .copied_from_history_dataset_collection_association = source_hdca
10051007 else :
10061008 if copied_from_object_key in hdca_copied_from_sinks :
1007- hdca . copied_from_history_dataset_collection_association = object_import_tracker .hdcas_by_key [
1008- hdca_copied_from_sinks [ copied_from_object_key ]
1009- ]
1009+ source_hdca = object_import_tracker .hdcas_by_key [hdca_copied_from_sinks [ copied_from_object_key ]]
1010+ if source_hdca is not hdca :
1011+ hdca . copied_from_history_dataset_collection_association = source_hdca
10101012 else :
10111013 hdca_copied_from_sinks [copied_from_object_key ] = dataset_collection_key
10121014
@@ -1072,7 +1074,7 @@ def attach_workflow_step(imported_object, attrs):
10721074 for step_attrs in invocation_attrs ["steps" ]:
10731075 imported_invocation_step = model .WorkflowInvocationStep ()
10741076 imported_invocation_step .workflow_invocation = imported_invocation
1075- ensure_object_added_to_session (imported_invocation , session = self .sa_session )
1077+ ensure_object_added_to_session (imported_invocation_step , session = self .sa_session )
10761078 attach_workflow_step (imported_invocation_step , step_attrs )
10771079 restore_times (imported_invocation_step , step_attrs )
10781080 imported_invocation_step .action = step_attrs ["action" ]
@@ -1926,12 +1928,14 @@ def __init__(
19261928 self .export_files = export_files
19271929 self .included_datasets : Dict [model .DatasetInstance , Tuple [model .DatasetInstance , bool ]] = {}
19281930 self .dataset_implicit_conversions : Dict [model .DatasetInstance , model .ImplicitlyConvertedDatasetAssociation ] = {}
1929- self .included_collections : List [Union [model .DatasetCollection , model .HistoryDatasetCollectionAssociation ]] = []
1931+ self .included_collections : Dict [
1932+ Union [model .DatasetCollection , model .HistoryDatasetCollectionAssociation ],
1933+ Union [model .DatasetCollection , model .HistoryDatasetCollectionAssociation ],
1934+ ] = {}
19301935 self .included_libraries : List [model .Library ] = []
19311936 self .included_library_folders : List [model .LibraryFolder ] = []
19321937 self .included_invocations : List [model .WorkflowInvocation ] = []
19331938 self .collection_datasets : Set [int ] = set ()
1934- self .collections_attrs : List [Union [model .DatasetCollection , model .HistoryDatasetCollectionAssociation ]] = []
19351939 self .dataset_id_to_path : Dict [int , Tuple [Optional [str ], Optional [str ]]] = {}
19361940
19371941 self .job_output_dataset_associations : Dict [int , Dict [str , model .DatasetInstance ]] = {}
@@ -2298,8 +2302,7 @@ def export_collection(
22982302 def add_dataset_collection (
22992303 self , collection : Union [model .DatasetCollection , model .HistoryDatasetCollectionAssociation ]
23002304 ) -> None :
2301- self .collections_attrs .append (collection )
2302- self .included_collections .append (collection )
2305+ self .included_collections [collection ] = collection
23032306
23042307 def add_implicit_conversion_dataset (
23052308 self ,
@@ -2362,7 +2365,7 @@ def to_json(attributes):
23622365
23632366 collections_attrs_filename = os .path .join (export_directory , ATTRS_FILENAME_COLLECTIONS )
23642367 with open (collections_attrs_filename , "w" ) as collections_attrs_out :
2365- collections_attrs_out .write (to_json (self .collections_attrs ))
2368+ collections_attrs_out .write (to_json (self .included_collections . values () ))
23662369
23672370 conversions_attrs_filename = os .path .join (export_directory , ATTRS_FILENAME_CONVERSIONS )
23682371 with open (conversions_attrs_filename , "w" ) as conversions_attrs_out :
@@ -2383,12 +2386,12 @@ def to_json(attributes):
23832386 #
23842387
23852388 # Get all jobs associated with included HDAs.
2386- jobs_dict : Dict [str , model .Job ] = {}
2389+ jobs_dict : Dict [int , model .Job ] = {}
23872390 implicit_collection_jobs_dict = {}
23882391
23892392 def record_job (job ):
2390- if not job :
2391- # No viable job.
2393+ if not job or job . id in jobs_dict :
2394+ # No viable job or job already recorded .
23922395 return
23932396
23942397 jobs_dict [job .id ] = job
@@ -2414,10 +2417,11 @@ def record_associated_jobs(obj):
24142417 )
24152418 job_hda = hda
24162419 while job_hda .copied_from_history_dataset_association : # should this check library datasets as well?
2420+ # record job (if one exists) even if dataset was copied
2421+ # copy could have been created manually through UI/API or using database operation tool,
2422+ # in which case we have a relevant job to export.
2423+ record_associated_jobs (job_hda )
24172424 job_hda = job_hda .copied_from_history_dataset_association
2418- if not job_hda .creating_job_associations :
2419- # No viable HDA found.
2420- continue
24212425
24222426 record_associated_jobs (job_hda )
24232427
0 commit comments