2020Program info can be found in the docstring of the main function.
2121Details can also be obtained by running the script with -h .
2222"""
23- from __future__ import print_function
23+
2424
2525import argparse
2626import errno
3333import time
3434from collections import defaultdict , Counter
3535from multiprocessing import cpu_count
36- from urlparse import urlparse
36+ from urllib . parse import urlparse
3737
3838from pysam import Samfile
3939
@@ -78,7 +78,7 @@ def parse_config_file(job, config_file):
7878 # along with it's parameters.
7979 for groupname , group_params in tool_specific_param_generator (job , conf ):
8080 if groupname == 'patient' :
81- if 'patient_id' not in group_params .keys ():
81+ if 'patient_id' not in list ( group_params .keys () ):
8282 raise ParameterError ('A patient group is missing the patient_id flag.' )
8383 sample_set [group_params ['patient_id' ]] = group_params
8484 elif groupname == 'Universal_Options' :
@@ -104,7 +104,7 @@ def parse_config_file(job, config_file):
104104 raise ParameterError (' The following tools have no arguments in the config file : \n ' +
105105 '\n ' .join (missing_tools ))
106106 # Start a job for each sample in the sample set
107- for patient_id in sample_set .keys ():
107+ for patient_id in list ( sample_set .keys () ):
108108 job .addFollowOnJobFn (pipeline_launchpad , sample_set [patient_id ], univ_options , tool_options )
109109 return None
110110
@@ -248,7 +248,7 @@ def delete_fastqs(job, fastqs):
248248 +- 'normal_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
249249 """
250250 for fq_type in ['tumor_rna' , 'tumor_dna' , 'normal_dna' ]:
251- for i in xrange (0 ,2 ):
251+ for i in range (0 ,2 ):
252252 job .fileStore .deleteGlobalFile (fastqs [fq_type ][i ])
253253 return None
254254
@@ -727,7 +727,7 @@ def spawn_radia(job, rna_bam, tumor_bam, normal_bam, univ_options, radia_options
727727 'normal_dnai' : normal_bam ['normal_dna_fix_pg_sorted.bam.bai' ]}
728728 # Make a dict object to hold the return values for each of the chromosome jobs. Then run radia
729729 # on each chromosome.
730- chromosomes = ['' .join (['chr' , str (x )]) for x in range (1 , 23 ) + ['X' , 'Y' ]]
730+ chromosomes = ['' .join (['chr' , str (x )]) for x in list ( range (1 , 23 ) ) + ['X' , 'Y' ]]
731731 perchrom_radia = defaultdict ()
732732 for chrom in chromosomes :
733733 perchrom_radia [chrom ] = job .addChildJobFn (run_radia , bams , univ_options , radia_options ,
@@ -755,11 +755,11 @@ def merge_radia(job, perchrom_rvs):
755755 work_dir = job .fileStore .getLocalTempDir ()
756756 # We need to squash the input dict of dicts to a single dict such that it can be passed to
757757 # get_files_from_filestore
758- input_files = {filename : jsid for perchrom_files in perchrom_rvs .values ()
759- for filename , jsid in perchrom_files .items ()}
758+ input_files = {filename : jsid for perchrom_files in list ( perchrom_rvs .values () )
759+ for filename , jsid in list ( perchrom_files .items () )}
760760 input_files = get_files_from_filestore (job , input_files , work_dir ,
761761 docker = False )
762- chromosomes = ['' .join (['chr' , str (x )]) for x in range (1 , 23 ) + ['X' , 'Y' ]]
762+ chromosomes = ['' .join (['chr' , str (x )]) for x in list ( range (1 , 23 ) ) + ['X' , 'Y' ]]
763763 with open ('/' .join ([work_dir , 'radia_calls.vcf' ]), 'w' ) as radfile , \
764764 open ('/' .join ([work_dir , 'radia_filter_passing_calls.vcf' ]), 'w' ) as radpassfile :
765765 for chrom in chromosomes :
@@ -961,7 +961,7 @@ def spawn_mutect(job, tumor_bam, normal_bam, univ_options, mutect_options):
961961 job .fileStore .logToMaster ('Running spawn_mutect on %s' % univ_options ['patient' ])
962962 # Make a dict object to hold the return values for each of the chromosome
963963 # jobs. Then run mutect on each chromosome.
964- chromosomes = ['' .join (['chr' , str (x )]) for x in range (1 , 23 ) + ['X' , 'Y' ]]
964+ chromosomes = ['' .join (['chr' , str (x )]) for x in list ( range (1 , 23 ) ) + ['X' , 'Y' ]]
965965 perchrom_mutect = defaultdict ()
966966 for chrom in chromosomes :
967967 perchrom_mutect [chrom ] = job .addChildJobFn (run_mutect , tumor_bam , normal_bam , univ_options ,
@@ -987,10 +987,10 @@ def merge_mutect(job, perchrom_rvs):
987987 work_dir = job .fileStore .getLocalTempDir ()
988988 # We need to squash the input dict of dicts to a single dict such that it can be passed to
989989 # get_files_from_filestore
990- input_files = {filename : jsid for perchrom_files in perchrom_rvs .values ()
991- for filename , jsid in perchrom_files .items ()}
990+ input_files = {filename : jsid for perchrom_files in list ( perchrom_rvs .values () )
991+ for filename , jsid in list ( perchrom_files .items () )}
992992 input_files = get_files_from_filestore (job , input_files , work_dir , docker = False )
993- chromosomes = ['' .join (['chr' , str (x )]) for x in range (1 , 23 ) + ['X' , 'Y' ]]
993+ chromosomes = ['' .join (['chr' , str (x )]) for x in list ( range (1 , 23 ) ) + ['X' , 'Y' ]]
994994 with open ('/' .join ([work_dir , 'mutect_calls.vcf' ]), 'w' ) as mutvcf , \
995995 open ('/' .join ([work_dir , 'mutect_calls.out' ]), 'w' ) as mutout , \
996996 open ('/' .join ([work_dir , 'mutect_passing_calls.vcf' ]), 'w' ) as mutpassvcf :
@@ -1139,7 +1139,7 @@ def run_mutation_aggregator(job, fusion_output, radia_output, mutect_output, ind
11391139 input_files .pop ('fusion.vcf' )
11401140 # read files into memory
11411141 vcf_file = defaultdict ()
1142- mutcallers = input_files .keys ()
1142+ mutcallers = list ( input_files .keys () )
11431143 with open ('' .join ([work_dir , '/' , univ_options ['patient' ], '_merged_mutations.vcf' ]),
11441144 'w' ) as merged_mut_file :
11451145 for mut_caller in mutcallers :
@@ -1571,8 +1571,8 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
15711571 mhci_files = get_files_from_filestore (job , mhci_preds , work_dir )
15721572 # First split mhcii_preds into prediction files and predictors and maintain keys so we can later
15731573 # reference them in pairs
1574- mhcii_predictors = {x : y [1 ] for x , y in mhcii_preds .items ()}
1575- mhcii_files = {x : y [0 ] for x , y in mhcii_preds .items ()}
1574+ mhcii_predictors = {x : y [1 ] for x , y in list ( mhcii_preds .items () )}
1575+ mhcii_files = {x : y [0 ] for x , y in list ( mhcii_preds .items () )}
15761576 mhcii_files = get_files_from_filestore (job , mhcii_files , work_dir )
15771577 # Get peptide files
15781578 pept_files = get_files_from_filestore (job , pept_files , work_dir )
@@ -1584,7 +1584,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
15841584 pepmap = json .load (mapfile )
15851585 # Incorporate peptide names into the merged calls
15861586 with open ('/' .join ([work_dir , 'mhci_merged_files.list' ]), 'w' ) as mhci_resfile :
1587- for mhcifile in mhci_files .values ():
1587+ for mhcifile in list ( mhci_files .values () ):
15881588 with open (mhcifile , 'r' ) as mf :
15891589 for line in mf :
15901590 # Skip header lines
@@ -1605,7 +1605,7 @@ def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files):
16051605 # Incorporate peptide names into the merged calls
16061606 with open ('/' .join ([work_dir , 'mhcii_merged_files.list' ]), 'w' ) as \
16071607 mhcii_resfile :
1608- for mhciifile in mhcii_files .keys ():
1608+ for mhciifile in list ( mhcii_files .keys () ):
16091609 core_col = None # Variable to hold the column number with the core
16101610 if mhcii_predictors [mhciifile ] == 'Consensus' :
16111611 with open (mhcii_files [mhciifile ], 'r' ) as mf :
@@ -1814,7 +1814,7 @@ def prepare_samples(job, fastqs, univ_options):
18141814 'normal_dna_fastq_prefix' }
18151815 if set (fastqs .keys ()).difference (allowed_samples ) != {'patient_id' }:
18161816 raise ParameterError ('Sample with the following parameters has an error:\n ' +
1817- '\n ' .join (fastqs .values ()))
1817+ '\n ' .join (list ( fastqs .values () )))
18181818 # For each sample type, check if the prefix is an S3 link or a regular file
18191819 # Download S3 files.
18201820 for sample_type in ['tumor_dna' , 'tumor_rna' , 'normal_dna' ]:
@@ -1877,7 +1877,7 @@ def get_files_from_filestore(job, files, work_dir, cache=True, docker=False):
18771877 work_dir is the location where the file should be stored
18781878 cache indiciates whether caching should be used
18791879 """
1880- for name in files .keys ():
1880+ for name in list ( files .keys () ):
18811881 outfile = job .fileStore .readGlobalFile (files [name ], '/' .join ([work_dir , name ]), cache = cache )
18821882 # If the file pointed to a tarball, extract it to WORK_DIR
18831883 if tarfile .is_tarfile (outfile ) and file_xext (outfile ).startswith ('.tar' ):
@@ -1924,15 +1924,15 @@ def most_probable_alleles(allele_list):
19241924 except KeyError :
19251925 all_alleles [allele ] = [float (pvalue )]
19261926 # If there are less than 2 alleles, report all
1927- if len (all_alleles .keys ()) <= 2 :
1928- return all_alleles .keys ()
1927+ if len (list ( all_alleles .keys () )) <= 2 :
1928+ return list ( all_alleles .keys () )
19291929 # Else, get the two with most evidence. Evidence is gauged by
19301930 # a) How many files (of the 3) thought that Allele was present
19311931 # b) In a tie, who has a lower avg p value
19321932 # In the lambda function, if 2 alleles have the same number of calls, the sum of the p values is
19331933 # a measure of the avg because avg = sum / n and n is equal in both of them.
19341934 else :
1935- return sorted (all_alleles .keys (), key = lambda x : \
1935+ return sorted (list ( all_alleles .keys () ), key = lambda x : \
19361936 (- len (all_alleles [x ]), sum (all_alleles [x ])))[0 :2 ]
19371937
19381938
@@ -2111,7 +2111,7 @@ def print_mhc_peptide(neoepitope_info, peptides, pepmap, outfile):
21112111
21122112 """
21132113 allele , pept , pred , core = neoepitope_info
2114- peptide_names = [x for x , y in peptides .items () if pept in y ]
2114+ peptide_names = [x for x , y in list ( peptides .items () ) if pept in y ]
21152115 # For each peptide, append the ensembl gene
21162116 for peptide_name in peptide_names :
21172117 print (allele , pept , peptide_name , core , '0' , pred , pepmap [peptide_name ], sep = '\t ' ,
@@ -2514,7 +2514,7 @@ def strip_xext(filepath):
25142514 :return str filepath: Path to the file with the compression extension stripped off.
25152515 """
25162516 ext_size = len (file_xext (filepath ).split ('.' )) - 1
2517- for i in xrange (0 , ext_size ):
2517+ for i in range (0 , ext_size ):
25182518 filepath = os .path .splitext (filepath )[0 ]
25192519 return filepath
25202520
0 commit comments