Skip to content

Commit bc01b10

Browse files
authored
Merge pull request #21 from Sage-Bionetworks-Workflows/add-patch-release
[GEN-863] Add patch release
2 parents e16bee8 + 264bf13 commit bc01b10

File tree

10 files changed

+741
-0
lines changed

10 files changed

+741
-0
lines changed

modules/compare_releases.nf

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// Compares two GENIE releases given two synapse ids
2+
process compare_releases {
3+
container "$params.patch_release_docker"
4+
secret 'SYNAPSE_AUTH_TOKEN'
5+
6+
input:
7+
val previous
8+
val release_synid
9+
val new_release_synid
10+
11+
output:
12+
stdout
13+
14+
script:
15+
"""
16+
python3 /patch_release/compare_patch.py \
17+
--original_synid $release_synid \
18+
--new_synid $new_release_synid
19+
"""
20+
}

modules/create_dashboard_html.nf

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Create data guide
2+
process create_dashboard_html {
3+
debug true
4+
container "$params.main_pipeline_docker"
5+
secret 'SYNAPSE_AUTH_TOKEN'
6+
7+
input:
8+
val previous
9+
val release
10+
val production
11+
12+
output:
13+
stdout
14+
// path "data_guide.pdf"
15+
16+
script:
17+
if (production) {
18+
"""
19+
cd /root/Genie
20+
Rscript ./R/dashboard_markdown_generator.R $release \
21+
--template_path ./templates/dashboardTemplate.Rmd
22+
"""
23+
} else {
24+
"""
25+
cd /root/Genie
26+
Rscript ./R/dashboard_markdown_generator.R $release \
27+
--template_path ./templates/dashboardTemplate.Rmd \
28+
--staging
29+
"""
30+
}
31+
}

modules/patch_release.nf

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Patch release
2+
process patch_release {
3+
container "$params.patch_release_docker"
4+
secret 'SYNAPSE_AUTH_TOKEN'
5+
6+
input:
7+
val release_synid
8+
val new_release_synid
9+
val retracted_sample_synid
10+
val production
11+
12+
output:
13+
stdout
14+
15+
script:
16+
if (production) {
17+
"""
18+
python3 /patch_release/patch.py \
19+
$release_synid \
20+
$new_release_synid \
21+
$retracted_sample_synid \
22+
--production
23+
"""
24+
}
25+
else {
26+
"""
27+
python3 /patch_release/patch.py \
28+
$release_synid \
29+
$new_release_synid \
30+
$retracted_sample_synid \
31+
"""
32+
}
33+
}

nextflow.config

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ profiles {
2222
main_release_utils_docker = "sagebionetworks/main-genie-release-utils"
2323
find_maf_artifacts_docker = "sagebionetworks/genie-artifact-finder"
2424
create_data_guide_docker = "sagebionetworks/genie-data-guide"
25+
patch_release_docker = "sagebionetworks/genie-patch-main-release"
2526
}
2627
}
2728
aws_prod {
@@ -38,10 +39,18 @@ profiles {
3839
memory = 16.GB
3940
cpus = 4
4041
}
42+
withName: patch_release {
43+
memory = 16.GB
44+
cpus = 4
45+
}
4146
withName: create_consortium_release {
4247
memory = 32.GB
4348
cpus = 4
4449
}
50+
withName: create_dashboard_html {
51+
memory = 32.GB
52+
cpus = 4
53+
}
4554
withName: create_public_release {
4655
memory = 16.GB
4756
cpus = 4
@@ -57,6 +66,7 @@ profiles {
5766
main_release_utils_docker = "sagebionetworks/main-genie-release-utils"
5867
find_maf_artifacts_docker = "sagebionetworks/genie-artifact-finder"
5968
create_data_guide_docker = "sagebionetworks/genie-data-guide"
69+
patch_release_docker = "sagebionetworks/genie-patch-main-release"
6070
}
6171
}
6272
}

nextflow_schema_patch_release.json

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema",
3+
"$id": "https://raw.githubusercontent.com/Sage-Bionetworks-Workflows/nf-genie/master/nextflow_schema.json",
4+
"title": "Sage-Bionetworks-Workflows/nf-genie pipeline parameters",
5+
"description": "Nextflow pipeline for main GENIE processing",
6+
"type": "object",
7+
"definitions": {
8+
"patch_release": {
9+
"title": "patch_release",
10+
"type": "object",
11+
"description": "Patch release configurations",
12+
"default": "",
13+
"properties": {
14+
"release_synid": {
15+
"type": "string",
16+
"description": "Existing consortium release synapse folder id"
17+
},
18+
"new_release_synid": {
19+
"type": "string",
20+
"description": "New consoritum release synapse folder id"
21+
},
22+
"retracted_sample_synid": {
23+
"type": "string",
24+
"description": "samples_to_retract.csv of 3rd consortium release"
25+
},
26+
"release": {
27+
"type": "string",
28+
"description": "Release name. E.g: 13.1-consortium",
29+
"default": "TEST.consortium",
30+
"pattern": "\\d+[.]\\d+-(consortium)$"
31+
},
32+
"project_id": {
33+
"type": "string",
34+
"description": "Synapse GENIE internal projects."
35+
}
36+
}
37+
}
38+
},
39+
"allOf": [
40+
{
41+
"$ref": "#/definitions/patch_release"
42+
}
43+
]
44+
}

patch_release_main.nf

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#!/usr/bin/env nextflow
2+
// Ensure DSL2
3+
nextflow.enable.dsl = 2
4+
5+
// IMPORT MODULES
6+
include { patch_release } from './modules/patch_release'
7+
include { create_data_guide } from './modules/create_data_guide'
8+
include { create_dashboard_html } from './modules/create_dashboard_html'
9+
include { compare_releases } from './modules/compare_releases'
10+
11+
params.release_synid = "syn53170398" // 15.4-consortium
12+
params.new_release_synid = "syn62069187" // 15.6-consortium (in staging)
13+
params.retracted_sample_synid = "syn54082015" // 16.3-consortium samples_to_retract.csv
14+
params.release = "15.6-consortium"
15+
// project_id = "syn7208886"
16+
params.project_id = "syn22033066" // staging project
17+
if (params.project_id == "syn22033066") {
18+
is_production = false
19+
} else if (params.project_id == "syn3380222") {
20+
is_production = true
21+
} else {
22+
exit 1, "project_id must be syn22033066 or syn3380222"
23+
}
24+
25+
workflow {
26+
ch_release_synid = Channel.value(params.release_synid)
27+
ch_new_release_synid = Channel.value(params.new_release_synid)
28+
ch_retracted_sample_synid = Channel.value(params.retracted_sample_synid)
29+
ch_release = Channel.value(params.release)
30+
ch_project_id = Channel.value(params.project_id)
31+
patch_release(ch_release_synid, ch_new_release_synid, ch_retracted_sample_synid, is_production)
32+
create_dashboard_html(patch_release.out, ch_release, is_production)
33+
create_data_guide(patch_release.out, ch_release, ch_project_id)
34+
// This syn55146141 is hard coded because the ch_release used will certainly
35+
// definitely be different from ch_new_release_synid because that is the patch.
36+
// TODO: we will want to implement a different comparison report to look at diffs
37+
// This current comparison looks at similarities and it good for staging pipeline.
38+
if (!is_production) {
39+
compare_releases(create_data_guide.out, "syn55146141", ch_new_release_synid)
40+
}
41+
}

scripts/patch_release/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
FROM sagebionetworks/genie:version-16.4.0
2+
3+
WORKDIR /patch_release
4+
5+
COPY . .

scripts/patch_release/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Public Patch Release
2+
3+
All sample / patient retractions have to occur within 3 months of the public release. If problems are found with older public releases, we will not patch, but will add information to the release notes. The patch releases are not meant to resolve data issues, but just for removing samples that are retracted consent.
4+
5+
1. Create another consortium release
6+
1. Generate the data guide, dashboard html, and release notes
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
The command ran:
3+
python patch.py syn53170398 syn62069187 syn54082015
4+
In leu of lack of unit or integration tests, the above command replicates the
5+
this is to test 15.5-consortium (syn55146141) and 15.6-consortium (Staging syn62069187)
6+
that they are the same.
7+
8+
python compare_patch.py --original_synid syn55146141 --new_synid syn62069187
9+
"""
10+
import argparse
11+
12+
import synapseclient
13+
import synapseutils as synu
14+
15+
16+
def _get_file_dict(syn: synapseclient.Synapse, synid: str):
17+
"""
18+
This function generates a dictionary of files from a Synapse ID.
19+
20+
Args:
21+
syn (synapseclient.Synapse): A Synapse client object.
22+
synid (str): The Synapse ID of the files to retrieve.
23+
24+
Returns:
25+
dict[str, str]: A dictionary mapping Synapse IDs to file names.
26+
"""
27+
all_files = synu.walk(syn, synid)
28+
file_list = {}
29+
for _, _, files in all_files:
30+
files = {name: syn.get(synid, downloadFile=False) for name, synid in files}
31+
file_list.update(files)
32+
return file_list
33+
34+
35+
def compare_releases(original_synid: str, new_synid: str):
36+
"""
37+
This function compares two folders that should have identifical files
38+
with each file's MD5s
39+
40+
Args:
41+
original_synid (str): The Synapse ID of the original release.
42+
new_synid (str): The Synapse ID of the new release.
43+
44+
Returns:
45+
tuple: A tuple containing the original release entity, the new release entity,
46+
and a list of retracted entities.
47+
"""
48+
49+
# Log in to Synapse
50+
syn = synapseclient.login()
51+
52+
# Get the entities for the original and new releases
53+
# original_ent = syn.get(original_synid)
54+
# original_files = synu.walk(original_synid)
55+
original_file_list = _get_file_dict(syn, original_synid)
56+
# new_ent = syn.get(new_synid)
57+
# new_files = synu.walk(new_synid)
58+
new_file_list = _get_file_dict(syn, new_synid)
59+
60+
# Check that the two folders have the same number of files
61+
print("Number of files in old folder: ", len(original_file_list))
62+
print("Number of files in new folder: ", len(new_file_list))
63+
for filename in new_file_list.keys():
64+
if original_file_list.get(filename) is None:
65+
print("File not found in old folder: ", filename)
66+
67+
for filename in original_file_list.keys():
68+
if new_file_list.get(filename) is None:
69+
print("File not found in new folder: ", filename)
70+
else:
71+
if original_file_list[filename].md5 != new_file_list[filename].md5:
72+
print("Files are different: ", filename)
73+
74+
def main():
75+
parser = argparse.ArgumentParser(description='Compare two Synapse releases.')
76+
parser.add_argument('--original_synid', type=str, help='The Synapse ID of the original release')
77+
parser.add_argument('--new_synid', type=str, help='The Synapse ID of the new release')
78+
79+
args = parser.parse_args()
80+
81+
compare_releases(args.original_synid, args.new_synid)
82+
83+
if __name__ == "__main__":
84+
main()

0 commit comments

Comments
 (0)