2727
2828
2929def add_check_recipe (p ):
30- """Argument method used to add check-recipes as a module arugment/function """
30+ """Argument method used to add check-recipe as a module arugment/function """
3131 import argparse
3232
3333 c = p .add_parser (
@@ -159,7 +159,7 @@ def _build(path, recipe, debug=False):
159159 [d ], ggd_jdict
160160 ) ## .uninstall method to remove extra ggd files
161161 print (
162- "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes \n "
162+ "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe \n "
163163 )
164164 ## Exit
165165 sys .exit (5 )
@@ -203,16 +203,13 @@ def _install(bz2, recipe_name, debug=False):
203203 3) If the installation fails progam exits. ggd data handeling is initated to remove any new/updated files from the installation process
204204 """
205205 import traceback
206- from .utils import get_required_conda_version
206+ from .utils import get_conda_package_list , get_required_conda_version
207207
208208 conda_version = get_required_conda_version ()
209209 conda_install = "conda=" + conda_version
210210
211211 ## See if it is already installed
212- pkg_out = sp .check_output (["conda list {}" .format (recipe_name )], shell = True ).decode (
213- "utf8"
214- )
215- if recipe_name in pkg_out : ## If already installed
212+ if recipe_name in get_conda_package_list (conda_root (),include_local = True ).keys ():
216213 return False
217214
218215 ## Set CONDA_SOURCE_PREFIX environment variable
@@ -302,7 +299,7 @@ def _install(bz2, recipe_name, debug=False):
302299 print (e )
303300
304301 print (
305- "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes \n "
302+ "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe \n "
306303 )
307304 ## Exit
308305 sys .exit (1 )
@@ -437,7 +434,20 @@ def check_recipe(parser, args):
437434
438435 ## Check if previous package is already installed or it is a new installation
439436 if new_installed :
440- check_files (
437+
438+ ## Check that the file has a header
439+ if not check_header (install_path ):
440+ print ("\n :ggd:check-recipe: !!ERROR!!" )
441+ print (
442+ "\n \t !!!!!!!!!!!!!!!!!!!!!!!\n \t ! FAILED recipe check !\n \t !!!!!!!!!!!!!!!!!!!!!!!\n "
443+ )
444+ print (
445+ "\n \t !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n \t ! Recipe NOT ready for Pull Requests !\n \t !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n "
446+ )
447+ remove_package_after_install (bz2 , recipe_name , 1 )
448+
449+ ## Get the extra files
450+ extra = check_files (
441451 install_path ,
442452 species ,
443453 build ,
@@ -449,7 +459,7 @@ def check_recipe(parser, args):
449459
450460 ## Add final files and md5sum
451461 if args .dont_add_md5sum_for_checksum == False :
452- recipe = add_final_files (install_path , recipe , args .recipe_path )
462+ recipe = add_final_files (install_path , recipe , args .recipe_path , extra )
453463 add_to_checksum_md5sums (
454464 install_path , recipe , op .join (args .recipe_path , "checksums_file.txt" )
455465 )
@@ -598,7 +608,7 @@ def add_to_checksum_md5sums(installed_dir_path, yaml_file, recipe_checksum_file_
598608 return True
599609
600610
601- def add_final_files (installed_dir_path , yaml_dict , recipe_path ):
611+ def add_final_files (installed_dir_path , yaml_dict , recipe_path , extra_files ):
602612 """Method to add the final data files to the meta.yaml file of a recipe
603613
604614 add_final_files
@@ -618,6 +628,7 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
618628 1) installed_dir_path: The directory path to the installed data files
619629 2) yaml_dict: A dictionary of the meta.yaml file for the recipe
620630 3) recipe_path: The directory path to the recipe being checked
631+ 4) extra_files: The name of the extra files found from check_files method
621632
622633 Returns:
623634 ++++++++
@@ -659,6 +670,7 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
659670 "mzml" ,
660671 "cvs" ,
661672 "tsv" ,
673+ "txt" ,
662674 "bim" ,
663675 "fam" ,
664676 "ped" ,
@@ -689,6 +701,11 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
689701 yaml_dict ["about" ]["tags" ]["final-files" ]
690702 )
691703
704+ ## Add extra files if they exists
705+ if extra_files :
706+ print (":ggd:check-recipe: Attempting to add the extra files not already added in the mat.yaml file\n " )
707+ yaml_dict ["extra" ]["extra-files" ] = extra_files
708+
692709 ## Rewrite yaml file with new tags and new final files
693710 with open (os .path .join (recipe_path , "meta.yaml" ), "w" ) as newFile :
694711 for key in sorted (yaml_dict .keys ()):
@@ -804,12 +821,173 @@ def remove_package_after_install(bz2, recipe_name, exit_num):
804821 print (e )
805822
806823 print (
807- "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes \n "
824+ "\n :ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe \n "
808825 )
809826 ## Exit
810827 sys .exit (exit_num )
811828
812829
830+ def check_header (install_path ):
831+ """Method to check the final genomic headers have a header or not
832+
833+ check_header
834+ ============
835+ This method is going to go through each of the files that were created by the recipe,
836+ and it will check if the those files have a header or not.
837+
838+ sam/bam/cram, vcf/bcf, gtf/gff/gff3, bed/bedGraph, csv, txt files require a header and if no header is provided
839+ check-recipe will fail.
840+
841+ Other files that don't have header will be given a warning. GGD expects most files to have
842+ a header. Some files are okay not to have headers, but if a header can be added it should be.
843+
844+ For each file, the file header and first 5 lines of the file body will be provided to stdout.
845+
846+ Parameters:
847+ -----------
848+ 1) install_path: The path to the directory where the files have been installed into.
849+
850+ Returns:
851+ +++++++
852+ True or False.
853+ - True if a header exist or if only a warning was given
854+ - False if a header does not exists and is required
855+
856+ """
857+
858+ print (":ggd:check-recipe: Checking that the final files have headers if appropriate\n " )
859+
860+ installed_files = os .listdir (install_path )
861+
862+ for file_name in [x for x in installed_files if os .path .isfile (os .path .join (install_path ,x ))]:
863+
864+ f_path = os .path .join (install_path , file_name )
865+
866+ ## Check for an index file
867+ if file_name .strip ().split ("." )[- 1 ] in set (["tbi" ,"bai" ,"crai" ,"fai" ,"tar" ,"bz2" ,"bw" ]):
868+ continue
869+
870+ ## Skip fasta or fastq files
871+ if any (x in file_name for x in [".fasta" ,".fa" ,".fastq" ,".fq" ]):
872+ continue
873+
874+ ## Check for sam/bam/cram files
875+ if any (x in file_name for x in [".sam" ,".bam" ,".cram" ]):
876+ import pysam
877+
878+ try :
879+ samfile = pysam .AlignmentFile (f_path , check_sq = False )
880+ header = samfile .header
881+ if any (header .lengths ):
882+ print (":ggd:check-recipe: Header found in file {name}\n " .format (name = file_name ))
883+ print ("Head of file:" )
884+ print ("---------------------------" )
885+ print (str (header ).strip ())
886+ for i ,read in enumerate (samfile ):
887+ print (read )
888+ if i >= 4 :
889+ break
890+ print ("---------------------------\n " )
891+
892+ else :
893+ print (":ggd:check-recipe: !!ERROR!! No header found for file {name}\n " .format (name = file_name ))
894+ print (":ggd:check-recipe: !!ERROR!! A header is required for sam/bam/cram files\n " )
895+ return (False )
896+
897+ except (ValueError , IOError , Exception ) as e :
898+ print (str (e ))
899+ print (":ggd:check-recipe: !!ERROR!! No header found for file {name}\n " .format (name = file_name ))
900+ print (":ggd:check-recipe: !!ERROR!! A header is required for sam/bam/cram files\n " )
901+ return (False )
902+
903+
904+ ## Check vcf/bcf files
905+ elif any (x in file_name for x in [".vcf" ,".bcf" ]):
906+ from cyvcf2 import VCF
907+ try :
908+ vcffile = VCF (f_path )
909+ header = str (vcffile .raw_header )
910+
911+ if header :
912+ print (":ggd:check-recipe: Header found in file {name}\n " .format (name = file_name ))
913+ print ("Head of file:" )
914+ print ("---------------------------" )
915+ print (str (header ).strip ())
916+ for i ,var in enumerate (vcffile ):
917+ print (var )
918+ if i >= 4 :
919+ break
920+ print ("---------------------------\n " )
921+
922+ else :
923+ print (":ggd:check-recipe: !!ERROR!! No header found for file {name}\n " .format (name = file_name ))
924+ print (":ggd:check-recipe: !!ERROR!! A header is required for vcf/bcf files\n " )
925+ return (False )
926+
927+ except IOError as e :
928+ print (str (e ))
929+ print (":ggd:check-recipe: !!ERROR!! No header found for file {name}\n " .format (name = file_name ))
930+ print (":ggd:check-recipe: !!ERROR!! A header is required for vcf/bcf files\n " )
931+ return (False )
932+
933+ ## Check other files
934+ else :
935+ import gzip
936+ try :
937+ file_handler = gzip .open (f_path ) if f_path .endswith (".gz" ) else open (f_path )
938+ header = []
939+ body = []
940+ try :
941+ for line in file_handler :
942+
943+ if type (line ) != str :
944+ line = line .strip ().decode ("utf-8" )
945+
946+ if len (line ) > 0 and str (line )[0 ] == "#" :
947+ header .append (str (line ))
948+
949+ else :
950+ body .append (str (line ))
951+ if len (body ) > 4 :
952+ break
953+
954+ except UnicodeDecodeError :
955+ print (":ggd:check-recipe: Cannot decode file contents into unicode.\n " )
956+ pass
957+
958+
959+ if header :
960+ print (":ggd:check-recipe: Header found in file {name}\n " .format (name = file_name ))
961+ print ("Head of file:" )
962+ print ("---------------------------" )
963+ print ("\n " .join (header ))
964+ print ("\n " .join (body ))
965+ print ("---------------------------\n " )
966+ elif any (x in file_name for x in [".gtf" , ".gff" , ".gff3" , ".bed" , ".bedGraph" , ".csv" , ".txt" ]):
967+ print (":ggd:check-recipe: !!ERROR!! No header found for file {name}\n " .format (name = file_name ))
968+ print (":ggd:check-recipe: !!ERROR!! A header is required for this type of file\n " )
969+ print ("First 5 lines of file body:" )
970+ print ("---------------------------" )
971+ print ("\n " .join (body ))
972+ print ("---------------------------\n " )
973+ return (False )
974+ else :
975+ print (":ggd:check-recipe: !!WARNING!! No header found for file {name}\n " .format (name = file_name ))
976+ print ("First 5 lines of file body:" )
977+ print ("---------------------------" )
978+ print ("\n " .join (body ))
979+ print ("---------------------------\n " )
980+ print (":ggd:check-recipe: !!WARNING!! GGD requires that any file that can have a header should. Please either add a header or if the file cannot have a header move forward.\n " )
981+ print (":ggd:check-recipe: !!WARNING!! IF you move forwared without adding a header when one should be added, this recipe will be rejected until a header is added.\n " )
982+
983+ except IOError as e :
984+ print (":ggd:check-recipe: !!ERROR!!" )
985+ print (str (e ))
986+ return False
987+
988+ return (True )
989+
990+
813991def check_files (
814992 install_path , species , build , recipe_name , extra_files , before_files , bz2
815993):
@@ -827,8 +1005,9 @@ def check_files(
8271005 print (":ggd:check-recipe: modified files:\n \t :: %s\n \n " % "\n \t :: " .join (files ))
8281006
8291007 tbis = [x for x in files if x .endswith (".tbi" )] # all tbi files
1008+ tbis = [x for x in files if x .endswith ((".tbi" ,".csi" ))] # all tbi files
8301009
831- nons = [x for x in files if not x .endswith (".tbi" )] # all non tbi files
1010+ nons = [x for x in files if not x .endswith (( ".tbi" , ".csi" ) )] # all non tbi files
8321011
8331012 tbxs = [x [:- 4 ] for x in tbis if x [:- 4 ] in nons ] # names of files tabixed
8341013
@@ -868,6 +1047,8 @@ def check_files(
8681047 missing = []
8691048 not_tabixed = []
8701049 not_faidxed = []
1050+ add_extra = False
1051+ add_extra_files = []
8711052 for n in nons :
8721053 print (":ggd:check-recipe: > checking %s" % n )
8731054 if n .endswith (
@@ -903,16 +1084,21 @@ def check_files(
9031084 elif op .basename (n ) not in extra_files and not any (
9041085 fnmatch (op .basename (n ), e ) for e in extra_files
9051086 ):
906- missing .append (
907- ":ggd:check-recipe: !!ERROR!!: %s(%s) unknown file and not in the extra/extra-files section of the yaml\n "
908- % (P , n )
909- )
1087+ print (
1088+ "\n :ggd:check-recipe: !!WARNING!!: %s(%s) unknown file and not in the extra/extra-files section of the yaml\n "
1089+ % (P , n )
1090+ )
1091+ add_extra_files .append (op .basename (n ))
1092+ add_extra = True
9101093
9111094 if missing or not_tabixed or not_faidxed :
9121095 print ("\n " .join (missing + not_tabixed + not_faidxed ), file = sys .stderr )
9131096 remove_package_after_install (bz2 , recipe_name , 2 )
9141097
915- return True
1098+ if add_extra :
1099+ return add_extra_files
1100+ else :
1101+ return []
9161102
9171103
9181104def check_yaml (recipe ):
0 commit comments