@@ -65,16 +65,15 @@ def __init__(self, workflow: WorkflowConfig, trigger_act: WorkflowProcessNode, m
6565 self .workflow = workflow
6666 self .trigger_act = trigger_act
6767 self .trigger_id = trigger_act .id
68-
69- # Default is typically the trigger act's was_informed_by
7068 self .informed_by = trigger_act .was_informed_by
7169
7270 # Default has no manifest
7371 self .manifest = None
7472
75- # However, if we see a manifest associated with the WorkflorProcessNode that triggered the job
76- # (which is set for DataGeneration workflowprocess nodes), then we want to override the default
77- # and look for the list of DataGeneration ID that are found in the manifest map
73+ # Set the manifest if found; DataGeneration workflowprocess nodes need their
74+ # was_informed_by list assigned from the manifest_map
75+ # Note: was_informed_by will be properly set from trigger_act.was_informed_by
76+ # for jobs downstream of readsqc (non-dgns wf records)
7877
7978 if len (trigger_act .manifest ) == 1 :
8079
@@ -87,10 +86,12 @@ def __init__(self, workflow: WorkflowConfig, trigger_act: WorkflowProcessNode, m
8786 # It will return None if the key doesn't exist.
8887 mapped_value = manifest_map .get (manifest_key )
8988
89+ # For dgns wfp nodes
9090 # Check if a value was found, is a dict, and contains the required key.
9191 # This will be the associated data_generation_set IDs with the manifest set
9292 if isinstance (mapped_value , dict ) and 'data_generation_set' in mapped_value :
93- self .informed_by = mapped_value ['data_generation_set' ]
93+ if self .trigger_id in mapped_value ['data_generation_set' ]:
94+ self .informed_by = mapped_value ['data_generation_set' ]
9495
9596
9697
@@ -147,7 +148,7 @@ def create_job_rec(self, job: SchedulerJob, manifest_map: Dict[str, List[str]]):
147148 # If manifest is not empty, then this is a data generation stored in the WorkflowProcessNode
148149 # Note: Currently only support one manifest per workflowprocessnode/datagen
149150 #
150- if len (next_act .manifest ) == 1 :
151+ if len (next_act .manifest ) == 1 and job . trigger_id in manifest_map [ next_act . manifest [ 0 ]][ 'data_generation_set' ] :
151152
152153 # Find the data objects associated with the manifest using manifest_map
153154 for data_object in manifest_map [next_act .manifest [0 ]]['data_object_set' ]:
@@ -351,36 +352,41 @@ def find_new_jobs(self, wfp_node: WorkflowProcessNode, manifest_map: Dict[str, L
351352 self ._messages .append (msg )
352353 continue
353354
355+ #
356+ # This check is only for wfp_nodes that are data_generation_set records to avoid duplicate scheduling
357+ #
354358 # If current wfp_node.id is not in existing jobs, see if this has a manifest record,
355359 # then check for other associated data generation records jobs that exist for this wf
356360 found_existing_manifest_job = False
357361 associated_wfp_node_id = None
358362 if len (wfp_node .manifest ) == 1 :
359- for dgns_id in manifest_map [wfp_node .manifest [0 ]]['data_generation_set' ]:
360- # Only need to check for others dgns since already checked itself above
361- if dgns_id != wfp_node .id :
362- if dgns_id in self .get_existing_jobs (wf ):
363- found_existing_manifest_job = True
364- associated_wfp_node_id = dgns_id
365- break
366-
367- # If not found, also check if it was just added to list of all jobs
368- if not found_existing_manifest_job :
369- for new_job in all_jobs :
370- if new_job .manifest :
371- if new_job .manifest == wfp_node .manifest [0 ]:
372- if new_job .workflow .name == wf .name :
373- found_existing_manifest_job = True
374- associated_wfp_node_id = new_job .trigger_id
375- break
376-
377-
378- if found_existing_manifest_job :
379- msg = f"Skipping existing job due to associated data generation record { associated_wfp_node_id } for { wfp_node .id } { wf .name } :{ wf .version } "
380- if msg not in self ._messages :
381- logger .info (msg )
382- self ._messages .append (msg )
383- continue
363+ if wfp_node .id in manifest_map [wfp_node .manifest [0 ]]['data_generation_set' ]:
364+
365+ for dgns_id in manifest_map [wfp_node .manifest [0 ]]['data_generation_set' ]:
366+ # Only need to check for others dgns since already checked itself above
367+ if dgns_id != wfp_node .id :
368+ if dgns_id in self .get_existing_jobs (wf ):
369+ found_existing_manifest_job = True
370+ associated_wfp_node_id = dgns_id
371+ break
372+
373+ # If not found, also check if it was just added to list of all jobs
374+ if not found_existing_manifest_job :
375+ for new_job in all_jobs :
376+ if new_job .manifest :
377+ if new_job .manifest == wfp_node .manifest [0 ]:
378+ if new_job .workflow .name == wf .name :
379+ found_existing_manifest_job = True
380+ associated_wfp_node_id = new_job .trigger_id
381+ break
382+
383+
384+ if found_existing_manifest_job :
385+ msg = f"Skipping existing job due to associated data generation record { associated_wfp_node_id } for { wfp_node .id } { wf .name } :{ wf .version } "
386+ if msg not in self ._messages :
387+ logger .info (msg )
388+ self ._messages .append (msg )
389+ continue
384390
385391
386392
0 commit comments