Skip to content

Commit a10e96a

Browse files
committed
Merge remote-tracking branch 'origin/main' into tahoma_urls
Merging in main updates to branch
2 parents 56d669a + d3871a5 commit a10e96a

File tree

10 files changed

+463
-17
lines changed

10 files changed

+463
-17
lines changed

nmdc_automation/workflow_automation/workflow_process.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def get_current_workflow_process_nodes(
242242
if wf.git_repo:
243243
q = {"git_url": wf.git_repo}
244244
# override query with allowlist
245-
if allowlist: # TODO test this -jlp 20250718
245+
if allowlist:
246246
q = {"was_informed_by": {"$in": list(allowlist)}}
247247

248248
#records = db[wf.collection].find(q)
@@ -254,11 +254,20 @@ def get_current_workflow_process_nodes(
254254
continue
255255
if _is_missing_required_input_output(wf, rec, data_objects_by_id):
256256
continue
257+
258+
# Deprecated
257259
#Iterate through was_informed_by list and only if all are valid do we add the wpn
258-
wib_set_valid = True
260+
#wib_set_valid = True
261+
#for wib_id in rec["was_informed_by"]:
262+
# if wib_id not in data_generation_ids:
263+
# wib_set_valid = False
264+
265+
# For manifest sets, any was_informed_by ID could be in the allow list, which is stored in data_generation_ids
266+
# so just check if any exist for the set to be valid.
267+
wib_set_valid = False
259268
for wib_id in rec["was_informed_by"]:
260-
if wib_id not in data_generation_ids:
261-
wib_set_valid = False
269+
if wib_id in data_generation_ids:
270+
wib_set_valid = True
262271

263272
if wib_set_valid == True:
264273
wfp_node = WorkflowProcessNode(rec, wf)
@@ -507,7 +516,7 @@ def _map_manifest_to_data_generation_set(api, manifest_map):
507516
}
508517

509518
resp = api.run_query(data_object_agg)
510-
logging.info(f"queries:run response: {resp}")
519+
logging.debug(f"queries:run response: {resp}")
511520

512521
# Log any issues
513522
if len(resp) == 0:
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
[{
2+
"analyte_category": "metagenome",
3+
"associated_studies": [
4+
"nmdc:sty-11-hht5sb92"
5+
],
6+
"has_input": [
7+
"nmdc:procsm-12-3a5rx033"
8+
],
9+
"has_output": [
10+
"nmdc:dobj-11-cnyvjb30jlp",
11+
"nmdc:dobj-11-jky4k370jlp"
12+
],
13+
"id": "nmdc:dgns-11-9ss0vs34jlp",
14+
"instrument_used": [
15+
"nmdc:inst-14-xz5tb342"
16+
],
17+
"name": "Surface water microbial communities - CRAM.C2.20210809.DNA-DNA1",
18+
"ncbi_project_name": "PRJNA406977",
19+
"processing_institution": "Battelle",
20+
"type": "nmdc:NucleotideSequencing"
21+
},
22+
{
23+
"analyte_category": "metagenome",
24+
"associated_studies": [
25+
"nmdc:sty-11-hht5sb92"
26+
],
27+
"has_input": [
28+
"nmdc:procsm-12-3a5rx033"
29+
],
30+
"has_output": [
31+
"nmdc:dobj-11-dht5bk30jlp",
32+
"nmdc:dobj-11-9eyeq730jlp"
33+
],
34+
"id": "nmdc:dgns-11-d4er8763jlp",
35+
"instrument_used": [
36+
"nmdc:inst-14-xz5tb342"
37+
],
38+
"name": "Surface water microbial communities - CRAM.C2.20210809.DNA-DNA1",
39+
"ncbi_project_name": "PRJNA406977",
40+
"processing_institution": "Battelle",
41+
"type": "nmdc:NucleotideSequencing"
42+
}]
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
[
2+
{
3+
"data_object_type": "Metagenome Raw Read 1",
4+
"description": "sequencing results for BMI_22A_01_0002_mms_HNFFKBGXN_R1",
5+
"id": "nmdc:dobj-11-9eyeq730jlp",
6+
"in_manifest": [
7+
"nmdc:manif-11-jyrzrd96"
8+
],
9+
"name": "BMI_22A_01_0002_mms_HNFFKBGXN_R1.fastq.gz",
10+
"type": "nmdc:DataObject",
11+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R1/BMI_22A_01_0002_mms_HNFFKBGXN_R1.fastq.gz",
12+
"data_category": "instrument_data"
13+
},
14+
{
15+
"data_object_type": "Metagenome Raw Read 2",
16+
"description": "sequencing results for BMI_22A_01_0002_R2",
17+
"id": "nmdc:dobj-11-cnyvjb30jlp",
18+
"in_manifest": [
19+
"nmdc:manif-11-jyrzrd96"
20+
],
21+
"name": "BMI_22A_01_0002_R2.fastq.gz",
22+
"type": "nmdc:DataObject",
23+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R2/BMI_22A_01_0002_R2.fastq.gz",
24+
"data_category": "instrument_data"
25+
},
26+
{
27+
"data_object_type": "Metagenome Raw Read 2",
28+
"description": "sequencing results for BMI_22A_01_0002_mms_HNFFKBGXN_R2",
29+
"id": "nmdc:dobj-11-dht5bk30jlp",
30+
"in_manifest": [
31+
"nmdc:manif-11-jyrzrd96"
32+
],
33+
"name": "BMI_22A_01_0002_mms_HNFFKBGXN_R2.fastq.gz",
34+
"type": "nmdc:DataObject",
35+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R2/BMI_22A_01_0002_mms_HNFFKBGXN_R2.fastq.gz",
36+
"data_category": "instrument_data"
37+
},
38+
{
39+
"data_object_type": "Metagenome Raw Read 1",
40+
"description": "sequencing results for BMI_22A_01_0002_R1",
41+
"id": "nmdc:dobj-11-jky4k370jlp",
42+
"in_manifest": [
43+
"nmdc:manif-11-jyrzrd96"
44+
],
45+
"name": "BMI_22A_01_0002_R1.fastq.gz",
46+
"type": "nmdc:DataObject",
47+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R1/BMI_22A_01_0002_R1.fastq.gz",
48+
"data_category": "instrument_data"
49+
},
50+
{
51+
"id": "nmdc:dobj-15-35az7w26",
52+
"type": "nmdc:DataObject",
53+
"name": "nmdc_wfrqc-15-x027k720.1_readsQC.info",
54+
"description": "Read filtering info for nmdc:wfrqc-15-x027k720.1",
55+
"data_category": "processed_data",
56+
"data_object_type": "Read Filtering Info File",
57+
"file_size_bytes": 620,
58+
"md5_checksum": "436b791048f5c24824d1a29b02e6004b",
59+
"url": "https://data.microbiomedata.org/data/dev/nmdc:manif-11-jyrzrd96/nmdc:wfrqc-15-x027k720.1/nmdc_wfrqc-15-x027k720.1_readsQC.info",
60+
"was_generated_by": "nmdc:wfrqc-15-x027k720.1"
61+
},
62+
{
63+
"id": "nmdc:dobj-15-8m16cw57",
64+
"type": "nmdc:DataObject",
65+
"name": "nmdc_wfrqc-15-x027k720.1_filtered.fastq.gz",
66+
"description": "Reads QC for nmdc:wfrqc-15-x027k720.1",
67+
"data_category": "processed_data",
68+
"data_object_type": "Filtered Sequencing Reads",
69+
"file_size_bytes": 462030159,
70+
"md5_checksum": "d09b6b265e6564793aead3203f653c83",
71+
"url": "https://data.microbiomedata.org/data/dev/nmdc:manif-11-jyrzrd96/nmdc:wfrqc-15-x027k720.1/nmdc_wfrqc-15-x027k720.1_filtered.fastq.gz",
72+
"was_generated_by": "nmdc:wfrqc-15-x027k720.1"
73+
},
74+
{
75+
"id": "nmdc:dobj-15-d29j5y50",
76+
"type": "nmdc:DataObject",
77+
"name": "nmdc_wfrqc-15-x027k720.1_filterStats.txt",
78+
"description": "Reads QC summary for nmdc:wfrqc-15-x027k720.1",
79+
"data_category": "processed_data",
80+
"data_object_type": "QC Statistics",
81+
"file_size_bytes": 287,
82+
"md5_checksum": "a970cbeaf7f1028d27988f3017fee0c2",
83+
"url": "https://data.microbiomedata.org/data/dev/nmdc:manif-11-jyrzrd96/nmdc:wfrqc-15-x027k720.1/nmdc_wfrqc-15-x027k720.1_filterStats.txt",
84+
"was_generated_by": "nmdc:wfrqc-15-x027k720.1"
85+
}
86+
]
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
[
2+
{
3+
"workflow": {
4+
"id": "Reads QC Interleave: v1.0.20"
5+
},
6+
"id": "nmdc:sys0qpe2ch39",
7+
"created_at": {
8+
"$date": "2025-12-17T06:40:18.982Z"
9+
},
10+
"config": {
11+
"git_repo": "https://github.com/microbiomedata/ReadsQC",
12+
"release": "v1.0.20",
13+
"wdl": "interleave_rqcfilter.wdl",
14+
"activity_id": "nmdc:wfrqc-15-x027k720.1",
15+
"activity_set": "workflow_execution_set",
16+
"was_informed_by": [
17+
"nmdc:dgns-11-d4er8763jlp",
18+
"nmdc:dgns-11-9ss0vs34jlp"
19+
],
20+
"trigger_activity": "nmdc:dgns-11-d4er8763jlp",
21+
"iteration": 1,
22+
"input_prefix": "nmdc_rqcfilter",
23+
"inputs": {
24+
"proj": "nmdc:wfrqc-15-x027k720.1",
25+
"input_fastq1": [
26+
"https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R1/BMI_22A_01_0002_mms_HNFFKBGXN_R1.fastq.gz",
27+
"https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R1/BMI_22A_01_0002_R1.fastq.gz"
28+
],
29+
"input_fastq2": [
30+
"https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R2/BMI_22A_01_0002_mms_HNFFKBGXN_R2.fastq.gz",
31+
"https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R2/BMI_22A_01_0002_R2.fastq.gz"
32+
]
33+
},
34+
"input_data_objects": [
35+
{
36+
"id": "nmdc:dobj-11-9eyeq730jlp",
37+
"type": "nmdc:DataObject",
38+
"name": "BMI_22A_01_0002_mms_HNFFKBGXN_R1.fastq.gz",
39+
"description": "sequencing results for BMI_22A_01_0002_mms_HNFFKBGXN_R1",
40+
"data_category": "instrument_data",
41+
"data_object_type": "Metagenome Raw Read 1",
42+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R1/BMI_22A_01_0002_mms_HNFFKBGXN_R1.fastq.gz",
43+
"in_manifest": [
44+
"nmdc:manif-11-jyrzrd96"
45+
]
46+
},
47+
{
48+
"id": "nmdc:dobj-11-jky4k370jlp",
49+
"type": "nmdc:DataObject",
50+
"name": "BMI_22A_01_0002_R1.fastq.gz",
51+
"description": "sequencing results for BMI_22A_01_0002_R1",
52+
"data_category": "instrument_data",
53+
"data_object_type": "Metagenome Raw Read 1",
54+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R1/BMI_22A_01_0002_R1.fastq.gz",
55+
"in_manifest": [
56+
"nmdc:manif-11-jyrzrd96"
57+
]
58+
},
59+
{
60+
"id": "nmdc:dobj-11-dht5bk30jlp",
61+
"type": "nmdc:DataObject",
62+
"name": "BMI_22A_01_0002_mms_HNFFKBGXN_R2.fastq.gz",
63+
"description": "sequencing results for BMI_22A_01_0002_mms_HNFFKBGXN_R2",
64+
"data_category": "instrument_data",
65+
"data_object_type": "Metagenome Raw Read 2",
66+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2023/HNFFKBGXN_R2/BMI_22A_01_0002_mms_HNFFKBGXN_R2.fastq.gz",
67+
"in_manifest": [
68+
"nmdc:manif-11-jyrzrd96"
69+
]
70+
},
71+
{
72+
"id": "nmdc:dobj-11-cnyvjb30jlp",
73+
"type": "nmdc:DataObject",
74+
"name": "BMI_22A_01_0002_R2.fastq.gz",
75+
"description": "sequencing results for BMI_22A_01_0002_R2",
76+
"data_category": "instrument_data",
77+
"data_object_type": "Metagenome Raw Read 2",
78+
"url": "https://storage.neonscience.org/neon-microbial-raw-seq-files/2024/HTLW3DSX7_PoolA_R2/BMI_22A_01_0002_R2.fastq.gz",
79+
"in_manifest": [
80+
"nmdc:manif-11-jyrzrd96"
81+
]
82+
}
83+
],
84+
"activity": {
85+
"name": "Read QC for {id}",
86+
"input_read_bases": "{outputs.stats.input_read_bases}",
87+
"input_read_count": "{outputs.stats.input_read_count}",
88+
"output_read_bases": "{outputs.stats.output_read_bases}",
89+
"output_read_count": "{outputs.stats.output_read_count}",
90+
"type": "nmdc:ReadQcAnalysis"
91+
},
92+
"outputs": [
93+
{
94+
"output": "filtered_final",
95+
"name": "Reads QC result fastq (clean data)",
96+
"data_object_type": "Filtered Sequencing Reads",
97+
"description": "Reads QC for {id}",
98+
"id": "nmdc:dobj-15-8m16cw57"
99+
},
100+
{
101+
"output": "filtered_stats_final",
102+
"name": "Reads QC summary statistics",
103+
"data_object_type": "QC Statistics",
104+
"description": "Reads QC summary for {id}",
105+
"id": "nmdc:dobj-15-d29j5y50"
106+
},
107+
{
108+
"output": "rqc_info",
109+
"name": "File containing read filtering information",
110+
"data_object_type": "Read Filtering Info File",
111+
"description": "Read filtering info for {id}",
112+
"id": "nmdc:dobj-15-35az7w26"
113+
}
114+
],
115+
"manifest": "nmdc:manif-11-jyrzrd96"
116+
},
117+
"claims": [
118+
{
119+
"op_id": "nmdc:sys0ceccqf55",
120+
"site_id": "NERSC"
121+
}
122+
]
123+
}
124+
]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[{
2+
"id": "nmdc:manif-11-jyrzrd96",
3+
"manifest_category": "poolable_replicates",
4+
"type": "nmdc:Manifest"
5+
}]
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[{
2+
"id": "nmdc:wfrqc-15-x027k720.1",
3+
"type": "nmdc:ReadQcAnalysis",
4+
"name": "Read QC for nmdc:wfrqc-15-x027k720.1",
5+
"has_input": [
6+
"nmdc:dobj-11-9eyeq730jlp",
7+
"nmdc:dobj-11-jky4k370jlp",
8+
"nmdc:dobj-11-dht5bk30jlp",
9+
"nmdc:dobj-11-cnyvjb30jlp"
10+
],
11+
"has_output": [
12+
"nmdc:dobj-15-8m16cw57",
13+
"nmdc:dobj-15-d29j5y50",
14+
"nmdc:dobj-15-35az7w26"
15+
],
16+
"processing_institution": "NMDC",
17+
"git_url": "https://github.com/microbiomedata/ReadsQC",
18+
"started_at_time": "2025-12-16 22:47:54",
19+
"was_informed_by": [
20+
"nmdc:dgns-11-d4er8763jlp",
21+
"nmdc:dgns-11-9ss0vs34jlp"
22+
],
23+
"ended_at_time": "2025-12-18 10:41:40",
24+
"execution_resource": "NERSC-Perlmutter",
25+
"version": "v1.0.20"
26+
}]

tests/fixtures/nmdc_db/workflow_execution_manifest_asm.json renamed to tests/fixtures/nmdc_db/workflow_execution_manifest_readsqc.json

File renamed without changes.

tests/test_data/allow.lst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nmdc:dgns-11-qmpge038

tests/test_data/allow2.lst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nmdc:dgns-11-d4er8763jlp

0 commit comments

Comments
 (0)