Skip to content

Commit 56a1411

Browse files
authored
Merge pull request #71 from mskcc/dev
Release 1.2
2 parents d0e4ae9 + d30c04b commit 56a1411

File tree

16 files changed

+89
-82
lines changed

16 files changed

+89
-82
lines changed

.github/workflows/linting_comment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
runs-on: ubuntu-latest
1212
steps:
1313
- name: Download lint results
14-
uses: dawidd6/action-download-artifact@80620a5d27ce0ae443b965134db88467fc607b43 # v7
14+
uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
1515
with:
1616
workflow: linting.yml
1717
workflow_conclusion: completed

.nf-core.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ lint:
1919
multiqc_config:
2020
- report_comment
2121
nextflow_config: false
22-
nf_core_version: 3.1.2
22+
nf_core_version: 3.2.0
2323
repository_type: pipeline
2424
template:
2525
author: Nikhil Kumar

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ repos:
77
- prettier@3.2.5
88

99
- repo: https://github.com/editorconfig-checker/editorconfig-checker.python
10-
rev: "3.0.3"
10+
rev: "3.1.2"
1111
hooks:
1212
- id: editorconfig-checker
1313
alias: ec

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ sample,maf,facets_hisens_cncf,hla_file
3636
tumor_normal,temp_test_somatic_unfiltered.maf,facets_hisens.cncf.txt,winners.hla.txt
3737
tumor_normal2,temp_test_somatic_unfiltered.maf,facets_hisens.cncf.txt,winners.hla.txt
3838
```
39-
-->
4039

4140
Now, you can run the pipeline using:
4241

conf/modules.config

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,8 @@ process {
2828
withName: 'PHYLOWGS_WRITERESULTS' {
2929
ext.args = '--max-multiprimary 1.0'
3030
}
31+
32+
withName: 'NEOANTIGENUTILS_NEOANTIGENINPUT' {
33+
ext.args = "--kD_cutoff ${params.kd_cutoff}"
34+
}
3135
}

conf/prod.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ params {
3636
phylo_burnin_samples = 1000
3737
phylo_mcmc_samples = 2500
3838
phylo_num_chains = 15
39+
kd_cutoff = 500
3940

4041
iedbfasta = 'https://raw.githubusercontent.com/mskcc/NeoantigenEditing/refs/heads/main/data/iedb.fasta'
4142
cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz'

conf/test.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ params {
3232
phylo_mcmc_samples = 2
3333
phylo_num_chains = 2
3434
netmhc3 = true
35+
kd_cutoff = 500
3536

3637
iedbfasta = 'https://raw.githubusercontent.com/mskcc-omics-workflows/test-datasets/neoantigen/neoantigen/neoantigenEditing/data/iedb.fasta'
3738
cds = 'https://github.com/mskcc-omics-workflows/test-datasets/raw/neoantigen/neoantigen/Homo_sapiens.GRCh37.75.cds.all.fa.gz'

docs/output.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## Introduction
44

5-
This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline.
5+
This document describes the output produced by the neoantigen pipeline.
66

77
The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory.
88

modules.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
},
3838
"neoantigenutils/neoantigeninput": {
3939
"branch": "develop",
40-
"git_sha": "003587a171d6cfa80bc894950d212add9f206f88",
40+
"git_sha": "ba014f40a3aaccd6a78db44f62d697b77a790eb8",
4141
"installed_by": ["modules"]
4242
},
4343
"netmhc3": {
@@ -107,7 +107,7 @@
107107
"nf-core": {
108108
"multiqc": {
109109
"branch": "master",
110-
"git_sha": "f80914f78fb7fa1c00b14cfeb29575ee12240d9c",
110+
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
111111
"installed_by": ["modules"]
112112
}
113113
}

modules/msk/neoantigenutils/neoantigeninput/resources/usr/bin/generate_input.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from Bio.pairwise2 import format_alignment
99
import numpy as np
1010

11-
VERSION = 1.8
11+
VERSION = 1.9
1212

1313

1414
def main(args):
@@ -340,7 +340,7 @@ def find_first_difference_index(str1, str2):
340340
WTdict = {}
341341
SVWTdict = {}
342342
for index_WT, row_WT in neoantigen_WT_in.iterrows():
343-
noposID = ""
343+
no_positon_ID = ""
344344
id = ""
345345
wtsvid = ""
346346
row_WT_identity = trim_id(row_WT["Identity"])
@@ -358,7 +358,7 @@ def find_first_difference_index(str1, str2):
358358
+ "_"
359359
+ str(row_WT["pos"])
360360
)
361-
noposID = (
361+
no_positon_ID = (
362362
IDsplit[0]
363363
+ "_"
364364
+ IDsplit[1][0:7]
@@ -372,16 +372,16 @@ def find_first_difference_index(str1, str2):
372372
"peptide": row_WT["peptide"],
373373
}
374374
id = wtsvid
375-
if noposID not in WTdict:
376-
WTdict[noposID] = {
375+
if no_positon_ID not in WTdict:
376+
WTdict[no_positon_ID] = {
377377
"peptides": {
378378
row_WT["peptide"]: id
379379
}, # This is a dict so we can match the peptide with the actual ID later
380380
"affinity": row_WT["affinity"],
381381
}
382382

383383
else:
384-
WTdict[noposID]["peptides"][row_WT["peptide"]] = id
384+
WTdict[no_positon_ID]["peptides"][row_WT["peptide"]] = id
385385

386386
else:
387387
id = (
@@ -394,7 +394,7 @@ def find_first_difference_index(str1, str2):
394394
+ str(row_WT["pos"])
395395
)
396396

397-
noposID = (
397+
no_positon_ID = (
398398
row_WT_identity[:-2]
399399
+ "_"
400400
+ str(len(row_WT["peptide"]))
@@ -404,16 +404,16 @@ def find_first_difference_index(str1, str2):
404404
WTdict[id] = {"affinity": row_WT["affinity"], "peptide": row_WT["peptide"]}
405405

406406
# This is used as last resort for the matching. We will preferentially find the peptide matching in length as well as POS. Worst case we will default to the WT pos 0
407-
if noposID not in WTdict:
408-
WTdict[noposID] = {
407+
if no_positon_ID not in WTdict:
408+
WTdict[no_positon_ID] = {
409409
"peptides": {
410410
row_WT["peptide"]: id
411411
}, # This is a dict so we can match the peptide with the ID later
412412
"affinity": row_WT["affinity"],
413413
}
414414

415415
else:
416-
WTdict[noposID]["peptides"][row_WT["peptide"]] = id
416+
WTdict[no_positon_ID]["peptides"][row_WT["peptide"]] = id
417417

418418
def find_most_similar_string(target, strings):
419419
max_score = -1
@@ -457,9 +457,10 @@ def find_most_similar_string(target, strings):
457457
row_MUT_identity = trim_id(row_mut["Identity"])
458458
IDsplit = row_MUT_identity.split("_")
459459
SV = False
460-
if row_mut["affinity"] < 500:
460+
if row_mut["affinity"] < float(args.kD_cutoff):
461461
peplen = len(row_mut["peptide"])
462462
matchfound = False
463+
frameshift= False
463464
if IDsplit[1][0] == "S" and IDsplit[1][1] != "p":
464465
# If it is a silent mutation. Silent mutations can either be S or SY. These include intron mutations. Splices can be Sp
465466
continue
@@ -476,7 +477,7 @@ def find_most_similar_string(target, strings):
476477
+ "_"
477478
+ str(row_mut["pos"])
478479
)
479-
noposID = (
480+
no_positon_ID = (
480481
IDsplit[0]
481482
+ "_"
482483
+ IDsplit[1][0:8]
@@ -500,46 +501,40 @@ def find_most_similar_string(target, strings):
500501
+ "_"
501502
+ str(row_mut["pos"])
502503
)
503-
noposID = (
504+
no_positon_ID = (
504505
row_MUT_identity[:-2]
505506
+ "_"
506507
+ str(peplen)
507508
+ "_"
508509
+ row_mut["MHC"].split("-")[1].replace(":", "").replace("*", "")
509510
)
510511
if (
511-
WTid in WTdict
512-
and ("M" == IDsplit[1][0] and "Sp" not in row_MUT_identity)
512+
("M" == IDsplit[1][0] and "Sp" not in row_MUT_identity)
513513
or SV == False
514514
):
515515
# match
516-
matchfound = True
517-
best_pepmatch = WTdict[WTid]["peptide"]
518-
frameshift = False
519-
520-
else:
521516
if (
522-
"-" in row_MUT_identity
523-
or "+" in row_MUT_identity
524-
and WTid in WTdict
525-
or SV == False
526-
):
527-
# Means there is a frame shift and we don't need to do a analysis of 5' end and 3' end as 3' end is no longer recognizeable/comparable to the WT sequence at all
528-
# We can just move the windows along together. There will likely be little to no match with the WT peptides.
517+
(WTid in WTdict)
518+
and IDsplit[1][0] != "I"
519+
):
520+
#This block takes care of Missense mutations caused by polymorphisims
529521
matchfound = True
530522
best_pepmatch = WTdict[WTid]["peptide"]
531-
frameshift = False
523+
532524
else:
533-
# Here we take care of frameshifted peptides
534-
frameshift = True
525+
# Here we take care of INDELS and everything else
526+
527+
if ("-" in IDsplit[1] or "+" in IDsplit[1]):
528+
frameshift = True
529+
535530
(
536531
best_pepmatch,
537532
best_pepmatch2,
538533
first_AA_same,
539534
first_AA_same_score,
540535
match_score,
541536
) = find_most_similar_string(
542-
row_mut["peptide"], list(WTdict[noposID]["peptides"].keys())
537+
row_mut["peptide"], list(WTdict[no_positon_ID]["peptides"].keys())
543538
)
544539
if (
545540
best_pepmatch == row_mut["peptide"]
@@ -556,14 +551,14 @@ def find_most_similar_string(target, strings):
556551
best_pepmatch[-1] != row_mut["peptide"][-1]
557552
and best_pepmatch2[-1] == row_mut["peptide"][-1]
558553
):
559-
# We should preferentially match the first AA if we can. I have found that the pairwise alignment isnt always the best at this.
554+
# We should preferentially match the first AA if we can. Sometimes the pairwise alignment isnt the best at this so we do a little check here.
560555
# It will also do this when the last AA of the best match doesnt match but the last A of the second best match does
561556
best_pepmatch = best_pepmatch2
562557

563-
WTid = WTdict[noposID]["peptides"][best_pepmatch]
558+
WTid = WTdict[no_positon_ID]["peptides"][best_pepmatch]
564559
matchfound = True
565560

566-
if matchfound == True:
561+
if matchfound == True and best_pepmatch != row_mut["peptide"]:
567562
mut_pos = (
568563
find_first_difference_index(
569564
row_mut["peptide"], best_pepmatch # WTdict[WTid]["peptide"]
@@ -934,6 +929,10 @@ def parse_args():
934929
"-v", "--version", action="version", version="%(prog)s {}".format(VERSION)
935930
)
936931

932+
parser.add_argument(
933+
"--kD_cutoff", default=500, help="Cutoff value for the kD, default is 500",
934+
)
935+
937936
return parser.parse_args()
938937

939938

@@ -948,6 +947,7 @@ def parse_args():
948947
print("Cohort:", args.cohort)
949948
print("HLA Genes File:", args.HLA_genes)
950949
print("netMHCpan Files:", args.netMHCpan_MUT_input, args.netMHCpan_WT_input)
950+
print("kD Cutoff Value:", args.kD_cutoff)
951951
if args.patient_data_file:
952952
print("patient_data_file File:", args.patient_data_file)
953953

0 commit comments

Comments
 (0)