Skip to content

Commit 4645e9c

Browse files
authored
Merge pull request #79 from mskcc/dev
Release 1.3.1
2 parents bee1da8 + 76f0b10 commit 4645e9c

File tree

9 files changed

+56
-145
lines changed

9 files changed

+56
-145
lines changed

modules.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
},
3838
"neoantigenutils/neoantigeninput": {
3939
"branch": "develop",
40-
"git_sha": "8b8e8d28b628b6244f4539cc405f2561fd733967",
40+
"git_sha": "dfec5efe0608379c3f7417693f3ef9deafc5d63a",
4141
"installed_by": ["modules"]
4242
},
4343
"netmhc3": {
@@ -102,7 +102,7 @@
102102
"nf-core": {
103103
"multiqc": {
104104
"branch": "master",
105-
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
105+
"git_sha": "7b50cb7be890e4b28cffb82e438cc6a8d7805d3f",
106106
"installed_by": ["modules"]
107107
}
108108
}

modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py

Lines changed: 33 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,7 @@ def makeChild(subTree, start):
123123

124124
else:
125125
missense = 0
126-
127-
126+
128127
if (
129128
row["Variant_Type"] == "SNP"
130129
or row["Variant_Type"] == "DNP"
@@ -463,9 +462,8 @@ def find_most_similar_string(target, strings):
463462
first_AA_same_score,
464463
max_score,
465464
)
466-
467465
NMD_dict = {}
468-
466+
469467
for index_mut, row_mut in neoantigen_mut_in.iterrows():
470468
row_MUT_identity = trim_id(row_mut["Identity"])
471469
IDsplit = row_MUT_identity.split("_")
@@ -534,7 +532,7 @@ def find_most_similar_string(target, strings):
534532
#This block takes care of Missense mutations caused by polymorphisims
535533
matchfound = True
536534
best_pepmatch = WTdict[WTid]["peptide"]
537-
535+
538536
else:
539537
# Here we take care of INDELS and everything else
540538

@@ -579,26 +577,21 @@ def find_most_similar_string(target, strings):
579577
+ 1
580578
)
581579

582-
580+
583581
chrom, pos = mutation_dict[row_mut["Identity"]].split("_")[0:2]
584-
585-
586-
582+
587583
if frameshift:
588584
mut_pos = "Frameshifted peptide"
589585
num_windows = len(list(WTdict[no_positon_ID]["peptides"].keys()))
590586
num_windows_li = len(list(WTdict[no_positon_ID]["peptides"].values()))
591-
# [pep.split(",") for pep in num_windows_li]
592-
593-
# print(WTdict[no_positon_ID]["peptides"])
587+
594588
print("FRAMESHIFT")
595-
589+
596590
if no_positon_ID in NMD_dict:
597591
#NMD must only be calculated once per mutation
598592
pass
599-
else:
593+
else:
600594
split_mutation_dict_ID = mutation_dict[row_MUT_identity].split("_")
601-
print(split_mutation_dict_ID)
602595
if split_mutation_dict_ID[2] == "I":
603596
len_indel = len(split_mutation_dict_ID[3])
604597
elif split_mutation_dict_ID[3] == "D":
@@ -607,10 +600,10 @@ def find_most_similar_string(target, strings):
607600
len_indel = 0
608601
transcriptID = chrom_pos_dict[mutation_dict[row_MUT_identity]]["transcript"]
609602
NMD_dict[no_positon_ID] = determine_NMD(chrom, pos,num_windows,len_indel,ensembl,transcriptID)
610-
603+
611604
else:
612605
NMD_dict[no_positon_ID] = "False"
613-
606+
614607
if SV:
615608
neo_dict = {
616609
"id": row_MUT_identity
@@ -993,7 +986,7 @@ def ensembl_load(release, gtf_file, cdna_file):
993986
# ensembl = EnsemblRelease(int(release))
994987
# ensembl.download()
995988
# ensembl.index()
996-
989+
997990
# else:
998991
ensembl = Genome(gtf_path_or_url=gtf_file,
999992
transcript_fasta_paths_or_urls=cdna_file,
@@ -1011,18 +1004,17 @@ def get_exons_from_transcriptID(transcriptid, ensembl, complete=True):
10111004
:param complete: only consider complete transcripts
10121005
:return: a list of tuples of exon ranges
10131006
"""
1014-
1007+
10151008
transcripts= ensembl.exon_ids_of_transcript_id(transcriptid)
1016-
1009+
10171010
exon_ranges = []
10181011
for exonid in transcripts:
10191012
exon = ensembl.exon_by_id(exonid)
10201013
exon_ranges.append((exon.start, exon.end))
10211014

10221015
return exon_ranges
1023-
1024-
1025-
1016+
1017+
10261018
def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
10271019
"""
10281020
:param chrom: chromosome where alteration takes place
@@ -1034,7 +1026,7 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
10341026
:param cdna_file: the path of cdna file if release == custom
10351027
:return: NMD value
10361028
"""
1037-
1029+
10381030
if transcriptID == None:
10391031
#do these if maf was not annotated
10401032
transcript = get_transcript(chrom, pos, ensembl)
@@ -1044,62 +1036,50 @@ def determine_NMD(chrom, pos,num_windows,len_indel, ensembl, transcriptID=None):
10441036

10451037
NMD = "False"
10461038

1047-
pos = int(pos)
1039+
pos = int(pos) + 1
10481040
for i in range(0,len(exon_ranges)):
1049-
if pos>exon_ranges[i][0] and pos<exon_ranges[i][1]:
1050-
1051-
exon_ranges_dist = [exon_ranges[p][1]-exon_ranges[p][0] for p in range(i,len(exon_ranges))]
1052-
1053-
mut_to_stop_dist = (num_windows*3)+len_indel+1
1054-
1041+
if pos>=exon_ranges[i][0] and pos<=exon_ranges[i][1]:
1042+
exon_ranges_dist = [exon_ranges[p][1]-exon_ranges[p][0] for p in range(0,len(exon_ranges))]
1043+
mut_to_stop_dist = (num_windows*3)+len_indel+1
1044+
10551045
for d in range(i,len(exon_ranges_dist)):
10561046
if exon_ranges_dist[d] == exon_ranges_dist[i]:
10571047
dist = exon_ranges_dist[d] - (exon_ranges[i][1]-pos)
1058-
10591048
else:
10601049
dist = exon_ranges_dist[d]
1061-
1062-
if exon_ranges_dist[d] - mut_to_stop_dist >= 0:
1063-
1064-
PTC_exon = exon_ranges[d]
1050+
1051+
if dist - mut_to_stop_dist >= 0:
1052+
PTC_exon = exon_ranges[d]
1053+
PTC_pos = exon_ranges[d][0] + mut_to_stop_dist
1054+
break
1055+
elif len(exon_ranges_dist)-1 == d and dist - mut_to_stop_dist < 0:
1056+
PTC_exon = exon_ranges[d]
10651057
PTC_pos = exon_ranges[d][0] + mut_to_stop_dist
1066-
# print(("Found everything!",PTC_exon,PTC_pos,mut_to_stop_dist))
10671058
else:
10681059
mut_to_stop_dist = mut_to_stop_dist - dist
1069-
# print((exon_ranges_dist[d],mut_to_stop_dist))
10701060

10711061
if PTC_exon == exon_ranges[-1]:
10721062
# "on the last exon"
10731063
NMD = "Last Exon"
1074-
print("on the last exon")
1075-
else:
1064+
else:
10761065
if exon_ranges[0][0] - PTC_pos < 150:
10771066
# less than 150 nt away from the start exon
10781067
NMD = "Start-proximal"
1079-
# print((exon_ranges[0][0] , PTC_pos,exon_ranges[0][0] - PTC_pos))
1080-
# print("less than 150 nt away from the start exon")
10811068
else:
10821069
if (PTC_exon[1] - PTC_exon[0]) > 407:
10831070
# in a long exon with more than 407 nt
10841071
NMD = "Long Exon"
1085-
print("in a long exon with more than 407 nt")
10861072
else:
10871073
# it is in the last 50 nt of the penultimate exon
10881074
if PTC_exon == exon_ranges[-2] and (exon_ranges[-2][0] - PTC_pos) < 50 :
10891075
NMD = "50nt Rule"
1090-
# print(num_windows)
1091-
# print(exon_ranges)
1092-
# print(("mut EXON:",exon_ranges[i]))
1093-
# print(("PTC EXON",PTC_exon))
1094-
# print(exon_ranges[i][-2:])
1095-
print("within 50 nt of the penultimate exon")
10961076
else:
10971077
NMD = "Trigger NMD"
10981078

10991079
return NMD
1100-
1101-
1102-
1080+
1081+
1082+
11031083
def parse_args():
11041084
parser = argparse.ArgumentParser(description="Process input files and parameters")
11051085
parser.add_argument("--maf_file", required=True, help="Path to the MAF file")
@@ -1125,7 +1105,7 @@ def parse_args():
11251105
help='GTF file for the reference.')
11261106
parser.add_argument('-cf', '--cdna-file', dest='cdna_file', metavar='CDNA_FILE', default=None,
11271107
help='cDNA file for the reference.')
1128-
1108+
11291109
parser.add_argument("--id", required=True, help="ID")
11301110
parser.add_argument("--patient_id", required=True, help="Patient ID")
11311111
parser.add_argument("--cohort", required=True, help="Cohort")

modules/msk/neoantigenutils/neoantigeninput/tests/main.nf.test.snap

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
],
3131
"meta": {
3232
"nf-test": "0.9.2",
33-
"nextflow": "24.04.4"
33+
"nextflow": "24.10.5"
3434
},
35-
"timestamp": "2025-03-07T11:55:57.067412639"
35+
"timestamp": "2025-03-14T16:09:55.398027522"
3636
},
3737
"neoantigenutils_neoantigeninput - bedpe,json,tsv": {
3838
"content": [
@@ -65,9 +65,9 @@
6565
],
6666
"meta": {
6767
"nf-test": "0.9.2",
68-
"nextflow": "24.04.4"
68+
"nextflow": "24.10.5"
6969
},
70-
"timestamp": "2025-03-07T11:52:19.151192995"
70+
"timestamp": "2025-03-14T16:06:45.523848459"
7171
},
7272
"neoantigenutils_neoantigeninput - json,tsv": {
7373
"content": [
@@ -78,7 +78,7 @@
7878
"id": "test",
7979
"single_end": false
8080
},
81-
"test_patient_test_input.json:md5,f386dd295154740155b34883a40ae54e"
81+
"test_patient_test_input.json:md5,0c9e3ec012d543da7c2655ee6665cff3"
8282
]
8383
],
8484
"1": [
@@ -90,7 +90,7 @@
9090
"id": "test",
9191
"single_end": false
9292
},
93-
"test_patient_test_input.json:md5,f386dd295154740155b34883a40ae54e"
93+
"test_patient_test_input.json:md5,0c9e3ec012d543da7c2655ee6665cff3"
9494
]
9595
],
9696
"versions": [
@@ -100,8 +100,8 @@
100100
],
101101
"meta": {
102102
"nf-test": "0.9.2",
103-
"nextflow": "24.04.4"
103+
"nextflow": "24.10.5"
104104
},
105-
"timestamp": "2025-03-07T11:55:45.944757678"
105+
"timestamp": "2025-03-14T16:09:43.102608135"
106106
}
107107
}

modules/msk/netmhc/meta.yml

Lines changed: 0 additions & 67 deletions
This file was deleted.

modules/nf-core/multiqc/environment.yml

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

modules/nf-core/multiqc/main.nf

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)