microbiomedata
diff --git a/‎nmdc_automation/workflow_automation/sched.py‎
Lines changed: 38 additions & 32 deletions b/‎nmdc_automation/workflow_automation/sched.py‎
Lines changed: 38 additions & 32 deletions
diff --git a/‎nmdc_automation/workflow_automation/watch_nmdc.py‎
Lines changed: 0 additions & 1 deletion b/‎nmdc_automation/workflow_automation/watch_nmdc.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎nmdc_automation/workflow_automation/wfutils.py‎
Lines changed: 1 addition & 0 deletions b/‎nmdc_automation/workflow_automation/wfutils.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎nmdc_automation/workflow_automation/workflow_process.py‎
Lines changed: 23 additions & 0 deletions b/‎nmdc_automation/workflow_automation/workflow_process.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎poetry.lock‎
Lines changed: 21 additions & 28 deletions b/‎poetry.lock‎
Lines changed: 21 additions & 28 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
@@ -65,16 +65,15 @@ def __init__(self, workflow: WorkflowConfig, trigger_act: WorkflowProcessNode, m
         self.workflow = workflow
         self.trigger_act = trigger_act
         self.trigger_id = trigger_act.id
-
-        # Default is typically the trigger act's was_informed_by
         self.informed_by = trigger_act.was_informed_by
 
         # Default has no manifest 
         self.manifest = None
 
-        # However, if we see a manifest associated with the WorkflorProcessNode that triggered the job
-        # (which is set for DataGeneration workflowprocess nodes), then we want to override the default
-        # and look for the list of DataGeneration ID that are found in the manifest map
+        # Set the manifest if found; DataGeneration workflowprocess nodes need their
+        # was_informed_by list assigned from the manifest_map
+        # Note: was_informed_by will be properly set from trigger_act.was_informed_by 
+        # for jobs downstream of readsqc (non-dgns wf records)
 
         if len(trigger_act.manifest) == 1:
 
@@ -87,10 +86,12 @@ def __init__(self, workflow: WorkflowConfig, trigger_act: WorkflowProcessNode, m
             # It will return None if the key doesn't exist.
             mapped_value = manifest_map.get(manifest_key)
 
+            # For dgns wfp nodes
             # Check if a value was found, is a dict, and contains the required key.
             # This will be the associated data_generation_set IDs with the manifest set
             if isinstance(mapped_value, dict) and 'data_generation_set' in mapped_value:
-                self.informed_by = mapped_value['data_generation_set']
+                if self.trigger_id in mapped_value['data_generation_set']:
+                    self.informed_by = mapped_value['data_generation_set']
 
 
 
@@ -147,7 +148,7 @@ def create_job_rec(self, job: SchedulerJob, manifest_map: Dict[str, List[str]]):
             # If manifest is not empty, then this is a data generation stored in the WorkflowProcessNode
             # Note: Currently only support one manifest per workflowprocessnode/datagen
             #
-            if len(next_act.manifest) == 1:
+            if len(next_act.manifest) == 1 and job.trigger_id in manifest_map[next_act.manifest[0]]['data_generation_set']:
 
                 # Find the data objects associated with the manifest using manifest_map
                 for data_object in manifest_map[next_act.manifest[0]]['data_object_set']:
@@ -351,36 +352,41 @@ def find_new_jobs(self, wfp_node: WorkflowProcessNode, manifest_map: Dict[str, L
                     self._messages.append(msg)
                 continue
 
+            #
+            # This check is only for wfp_nodes that are data_generation_set records to avoid duplicate scheduling
+            # 
             # If current wfp_node.id is not in existing jobs, see if this has a manifest record,
             # then check for other associated data generation records jobs that exist for this wf
             found_existing_manifest_job = False
             associated_wfp_node_id = None
             if len(wfp_node.manifest) == 1:
-                for dgns_id in manifest_map[wfp_node.manifest[0]]['data_generation_set']:
-                    # Only need to check for others dgns since already checked itself above
-                    if dgns_id != wfp_node.id:
-                        if dgns_id in self.get_existing_jobs(wf):
-                            found_existing_manifest_job = True
-                            associated_wfp_node_id = dgns_id
-                            break
-                
-                # If not found, also check if it was just added to list of all jobs 
-                if not found_existing_manifest_job:
-                    for new_job in all_jobs:
-                        if new_job.manifest:
-                            if new_job.manifest == wfp_node.manifest[0]:
-                                if new_job.workflow.name == wf.name:
-                                    found_existing_manifest_job = True
-                                    associated_wfp_node_id = new_job.trigger_id
-                                    break
-
-
-            if found_existing_manifest_job:
-                msg = f"Skipping existing job due to associated data generation record {associated_wfp_node_id} for {wfp_node.id} {wf.name}:{wf.version}"
-                if msg not in self._messages:
-                    logger.info(msg)
-                    self._messages.append(msg)
-                continue
+                if wfp_node.id in manifest_map[wfp_node.manifest[0]]['data_generation_set']:
+
+                    for dgns_id in manifest_map[wfp_node.manifest[0]]['data_generation_set']:
+                        # Only need to check for others dgns since already checked itself above
+                        if dgns_id != wfp_node.id:
+                            if dgns_id in self.get_existing_jobs(wf):
+                                found_existing_manifest_job = True
+                                associated_wfp_node_id = dgns_id
+                                break
+                    
+                    # If not found, also check if it was just added to list of all jobs 
+                    if not found_existing_manifest_job:
+                        for new_job in all_jobs:
+                            if new_job.manifest:
+                                if new_job.manifest == wfp_node.manifest[0]:
+                                    if new_job.workflow.name == wf.name:
+                                        found_existing_manifest_job = True
+                                        associated_wfp_node_id = new_job.trigger_id
+                                        break
+
+
+                    if found_existing_manifest_job:
+                        msg = f"Skipping existing job due to associated data generation record {associated_wfp_node_id} for {wfp_node.id} {wf.name}:{wf.version}"
+                        if msg not in self._messages:
+                            logger.info(msg)
+                            self._messages.append(msg)
+                        continue
 
 
 
 
@@ -244,7 +244,6 @@ def get_finished_jobs(self)->Tuple[List[WorkflowJob], List[WorkflowJob]]:
                     continue
                 elif status in ("failed", "null"):
                     job.workflow.last_status = status
-                    job.workflow.failed_count += 1
                     failed_jobs.append(job)
                     continue
                 else:
 
@@ -176,6 +176,7 @@ def submit_job(self, force: bool = False) -> Optional[int]:
             elif len(self.workflow.was_informed_by) == 1:
                 tag_value = self.workflow.was_informed_by[0] + "/" + self.workflow.workflow_execution_id
 
+            # This will work to schedule but shouldn't go to this block unless bug or new feature support
             else:
                 tag_value = ":".join(self.workflow.was_informed_by) + "/" + self.workflow.workflow_execution_id
 
 
@@ -223,6 +223,20 @@ def get_current_workflow_process_nodes(
 
             workflow_process_nodes.add(wfp_node)
 
+    # Build the list of data_generation_id_sets to manifest id for non-dgns processing workflows
+    # so we can add the manifest property to wfp_nodes when was_informed_by > 1
+    dg_set_to_manifest_map = {} 
+    for manifest_id, manifest_data in manifest_map.items():
+        dg_ids_list = manifest_data.get('data_generation_set') 
+        
+        if dg_ids_list:
+            # 1. Sort the list and convert it to a hashable tuple
+            key_tuple = tuple(sorted(dg_ids_list)) #ex: ('id1', 'id2')
+            
+            if key_tuple not in dg_set_to_manifest_map:
+                dg_set_to_manifest_map[key_tuple] = manifest_id
+
+
     for wf in workflow_execution_workflows:
         q = {}
         if wf.git_repo:
@@ -259,6 +273,15 @@ def get_current_workflow_process_nodes(
                     # Join the sorted elements with "_" as the separator
                     current_found_rec_key = "_".join(sorted_was_informed_by)
 
+                    # Look for the manifest ID to add to the workflow process node
+                    # Normalize the list: sort and convert to tuple
+                    current_manifest = None
+                    key_tuple = tuple(sorted(rec["was_informed_by"])) # Result: ('id1', 'id2')
+                    if key_tuple in dg_set_to_manifest_map:
+                        current_manifest = dg_set_to_manifest_map[key_tuple]
+                    if current_manifest:
+                        wfp_node.add_to_manifest(current_manifest)
+
                 # if there is already a wfp_node added for this workflow type, check if version is more recent
                 # then add it and replace previous one.
                 if current_found_rec_key in found_wfs:
 
@@ -29,7 +29,7 @@ python = ">=3.10,<3.12"
 pymongo = "^4.3.3"
 pyYAML = "^6.0"
 requests = "^2.28.2"
-nmdc-schema = "^11.13.0"
+nmdc-schema = "^11.14.0"
 deepdiff = ">=7.0.1"
 pytz = ">=2024.1"
 python-dotenv = "^1.0.0"
@@ -73,4 +73,4 @@ build-backend = "poetry.core.masonry.api"
 [tool.pytest.ini_options]
 markers = [
     "integration: mark test as integration test",
-]
+]