Skip to content

Commit 08fe6a0

Browse files
authored
Cli update (#34)
* Update extra file error to a warning and attempt to fix the problem * Update stdout for user * Update check-recipe - Improve conda list pkg searching - Add method to check for header in genomic files - Add .csi support * Update check-recipe test - New tests for check-header function - Update helper function for header tests - New test for add_files to add extra files to the yaml - New test for check-files to account for extra files - test for .csi files * Update utils with new methods - Method to check that conda pkg dir for installed ggd metadata. If missing, will replace * Update utils tests - New test for checking conda pkg dir for ggd installed metadata - Update conda version test for the newest version of conda 4.8.2 * Bump ggd version to 0.1.2 * Update install test to use better conda info searching * Update and add new requirements - Newest version of conda = 4.8.2 - Newest version of conda-buld = 3.8.12 - Add pysam - Add cyvcf2 * Update check-recipe test - Support for header check - Support for extra file handling * Update post link file exists handling * Update post link file exists handling * Update Error Hanlding
1 parent 1913276 commit 08fe6a0

File tree

8 files changed

+752
-45
lines changed

8 files changed

+752
-45
lines changed

ggd/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
__version__ = "0.1.1"
1+
__version__ = "0.1.2"
2+

ggd/check_recipe.py

Lines changed: 204 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828

2929
def add_check_recipe(p):
30-
"""Argument method used to add check-recipes as a module arugment/function """
30+
"""Argument method used to add check-recipe as a module arugment/function """
3131
import argparse
3232

3333
c = p.add_parser(
@@ -159,7 +159,7 @@ def _build(path, recipe, debug=False):
159159
[d], ggd_jdict
160160
) ## .uninstall method to remove extra ggd files
161161
print(
162-
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes\n"
162+
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe\n"
163163
)
164164
## Exit
165165
sys.exit(5)
@@ -203,16 +203,13 @@ def _install(bz2, recipe_name, debug=False):
203203
3) If the installation fails progam exits. ggd data handeling is initated to remove any new/updated files from the installation process
204204
"""
205205
import traceback
206-
from .utils import get_required_conda_version
206+
from .utils import get_conda_package_list, get_required_conda_version
207207

208208
conda_version = get_required_conda_version()
209209
conda_install = "conda=" + conda_version
210210

211211
## See if it is already installed
212-
pkg_out = sp.check_output(["conda list {}".format(recipe_name)], shell=True).decode(
213-
"utf8"
214-
)
215-
if recipe_name in pkg_out: ## If already installed
212+
if recipe_name in get_conda_package_list(conda_root(),include_local=True).keys():
216213
return False
217214

218215
## Set CONDA_SOURCE_PREFIX environment variable
@@ -302,7 +299,7 @@ def _install(bz2, recipe_name, debug=False):
302299
print(e)
303300

304301
print(
305-
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes\n"
302+
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe\n"
306303
)
307304
## Exit
308305
sys.exit(1)
@@ -437,7 +434,20 @@ def check_recipe(parser, args):
437434

438435
## Check if previous package is already installed or it is a new installation
439436
if new_installed:
440-
check_files(
437+
438+
## Check that the file has a header
439+
if not check_header(install_path):
440+
print("\n:ggd:check-recipe: !!ERROR!!")
441+
print(
442+
"\n\t!!!!!!!!!!!!!!!!!!!!!!!\n\t! FAILED recipe check !\n\t!!!!!!!!!!!!!!!!!!!!!!!\n"
443+
)
444+
print(
445+
"\n\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\t! Recipe NOT ready for Pull Requests !\n\t!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
446+
)
447+
remove_package_after_install(bz2, recipe_name, 1)
448+
449+
## Get the extra files
450+
extra = check_files(
441451
install_path,
442452
species,
443453
build,
@@ -449,7 +459,7 @@ def check_recipe(parser, args):
449459

450460
## Add final files and md5sum
451461
if args.dont_add_md5sum_for_checksum == False:
452-
recipe = add_final_files(install_path, recipe, args.recipe_path)
462+
recipe = add_final_files(install_path, recipe, args.recipe_path, extra)
453463
add_to_checksum_md5sums(
454464
install_path, recipe, op.join(args.recipe_path, "checksums_file.txt")
455465
)
@@ -598,7 +608,7 @@ def add_to_checksum_md5sums(installed_dir_path, yaml_file, recipe_checksum_file_
598608
return True
599609

600610

601-
def add_final_files(installed_dir_path, yaml_dict, recipe_path):
611+
def add_final_files(installed_dir_path, yaml_dict, recipe_path, extra_files):
602612
"""Method to add the final data files to the meta.yaml file of a recipe
603613
604614
add_final_files
@@ -618,6 +628,7 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
618628
1) installed_dir_path: The directory path to the installed data files
619629
2) yaml_dict: A dictionary of the meta.yaml file for the recipe
620630
3) recipe_path: The directory path to the recipe being checked
631+
4) extra_files: The name of the extra files found from check_files method
621632
622633
Returns:
623634
++++++++
@@ -659,6 +670,7 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
659670
"mzml",
660671
"cvs",
661672
"tsv",
673+
"txt",
662674
"bim",
663675
"fam",
664676
"ped",
@@ -689,6 +701,11 @@ def add_final_files(installed_dir_path, yaml_dict, recipe_path):
689701
yaml_dict["about"]["tags"]["final-files"]
690702
)
691703

704+
## Add extra files if they exists
705+
if extra_files:
706+
print(":ggd:check-recipe: Attempting to add the extra files not already added in the mat.yaml file\n")
707+
yaml_dict["extra"]["extra-files"] = extra_files
708+
692709
## Rewrite yaml file with new tags and new final files
693710
with open(os.path.join(recipe_path, "meta.yaml"), "w") as newFile:
694711
for key in sorted(yaml_dict.keys()):
@@ -804,12 +821,173 @@ def remove_package_after_install(bz2, recipe_name, exit_num):
804821
print(e)
805822

806823
print(
807-
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipes\n"
824+
"\n:ggd:check-recipe: Review the STDOUT and STDERR, correct the errors, and re-run $ggd check-recipe\n"
808825
)
809826
## Exit
810827
sys.exit(exit_num)
811828

812829

830+
def check_header(install_path):
831+
"""Method to check the final genomic headers have a header or not
832+
833+
check_header
834+
============
835+
This method is going to go through each of the files that were created by the recipe,
836+
and it will check if the those files have a header or not.
837+
838+
sam/bam/cram, vcf/bcf, gtf/gff/gff3, bed/bedGraph, csv, txt files require a header and if no header is provided
839+
check-recipe will fail.
840+
841+
Other files that don't have header will be given a warning. GGD expects most files to have
842+
a header. Some files are okay not to have headers, but if a header can be added it should be.
843+
844+
For each file, the file header and first 5 lines of the file body will be provided to stdout.
845+
846+
Parameters:
847+
-----------
848+
1) install_path: The path to the directory where the files have been installed into.
849+
850+
Returns:
851+
+++++++
852+
True or False.
853+
- True if a header exist or if only a warning was given
854+
- False if a header does not exists and is required
855+
856+
"""
857+
858+
print(":ggd:check-recipe: Checking that the final files have headers if appropriate\n")
859+
860+
installed_files = os.listdir(install_path)
861+
862+
for file_name in [x for x in installed_files if os.path.isfile(os.path.join(install_path,x))]:
863+
864+
f_path = os.path.join(install_path, file_name)
865+
866+
## Check for an index file
867+
if file_name.strip().split(".")[-1] in set(["tbi","bai","crai","fai","tar","bz2","bw"]):
868+
continue
869+
870+
## Skip fasta or fastq files
871+
if any(x in file_name for x in [".fasta",".fa",".fastq",".fq"]):
872+
continue
873+
874+
## Check for sam/bam/cram files
875+
if any(x in file_name for x in [".sam",".bam",".cram"]):
876+
import pysam
877+
878+
try:
879+
samfile = pysam.AlignmentFile(f_path, check_sq=False)
880+
header = samfile.header
881+
if any(header.lengths):
882+
print(":ggd:check-recipe: Header found in file {name}\n".format(name=file_name))
883+
print("Head of file:")
884+
print("---------------------------")
885+
print(str(header).strip())
886+
for i,read in enumerate(samfile):
887+
print(read)
888+
if i >= 4:
889+
break
890+
print("---------------------------\n")
891+
892+
else:
893+
print(":ggd:check-recipe: !!ERROR!! No header found for file {name}\n".format(name=file_name))
894+
print(":ggd:check-recipe: !!ERROR!! A header is required for sam/bam/cram files\n")
895+
return(False)
896+
897+
except (ValueError, IOError, Exception) as e:
898+
print(str(e))
899+
print(":ggd:check-recipe: !!ERROR!! No header found for file {name}\n".format(name=file_name))
900+
print(":ggd:check-recipe: !!ERROR!! A header is required for sam/bam/cram files\n")
901+
return(False)
902+
903+
904+
## Check vcf/bcf files
905+
elif any(x in file_name for x in [".vcf",".bcf"]):
906+
from cyvcf2 import VCF
907+
try:
908+
vcffile = VCF(f_path)
909+
header = str(vcffile.raw_header)
910+
911+
if header:
912+
print(":ggd:check-recipe: Header found in file {name}\n".format(name=file_name))
913+
print("Head of file:")
914+
print("---------------------------")
915+
print(str(header).strip())
916+
for i,var in enumerate(vcffile):
917+
print(var)
918+
if i >= 4:
919+
break
920+
print("---------------------------\n")
921+
922+
else:
923+
print(":ggd:check-recipe: !!ERROR!! No header found for file {name}\n".format(name=file_name))
924+
print(":ggd:check-recipe: !!ERROR!! A header is required for vcf/bcf files\n")
925+
return(False)
926+
927+
except IOError as e:
928+
print(str(e))
929+
print(":ggd:check-recipe: !!ERROR!! No header found for file {name}\n".format(name=file_name))
930+
print(":ggd:check-recipe: !!ERROR!! A header is required for vcf/bcf files\n")
931+
return(False)
932+
933+
## Check other files
934+
else:
935+
import gzip
936+
try:
937+
file_handler = gzip.open(f_path) if f_path.endswith(".gz") else open(f_path)
938+
header = []
939+
body = []
940+
try:
941+
for line in file_handler:
942+
943+
if type(line) != str:
944+
line = line.strip().decode("utf-8")
945+
946+
if len(line) > 0 and str(line)[0] == "#":
947+
header.append(str(line))
948+
949+
else:
950+
body.append(str(line))
951+
if len(body) > 4:
952+
break
953+
954+
except UnicodeDecodeError:
955+
print(":ggd:check-recipe: Cannot decode file contents into unicode.\n")
956+
pass
957+
958+
959+
if header:
960+
print(":ggd:check-recipe: Header found in file {name}\n".format(name=file_name))
961+
print("Head of file:")
962+
print("---------------------------")
963+
print("\n".join(header))
964+
print("\n".join(body))
965+
print("---------------------------\n")
966+
elif any(x in file_name for x in [".gtf", ".gff", ".gff3", ".bed", ".bedGraph", ".csv", ".txt"]):
967+
print(":ggd:check-recipe: !!ERROR!! No header found for file {name}\n".format(name=file_name))
968+
print(":ggd:check-recipe: !!ERROR!! A header is required for this type of file\n")
969+
print("First 5 lines of file body:")
970+
print("---------------------------")
971+
print("\n".join(body))
972+
print("---------------------------\n")
973+
return(False)
974+
else:
975+
print(":ggd:check-recipe: !!WARNING!! No header found for file {name}\n".format(name=file_name))
976+
print("First 5 lines of file body:")
977+
print("---------------------------")
978+
print("\n".join(body))
979+
print("---------------------------\n")
980+
print(":ggd:check-recipe: !!WARNING!! GGD requires that any file that can have a header should. Please either add a header or if the file cannot have a header move forward.\n")
981+
print(":ggd:check-recipe: !!WARNING!! IF you move forwared without adding a header when one should be added, this recipe will be rejected until a header is added.\n")
982+
983+
except IOError as e:
984+
print(":ggd:check-recipe: !!ERROR!!")
985+
print(str(e))
986+
return False
987+
988+
return(True)
989+
990+
813991
def check_files(
814992
install_path, species, build, recipe_name, extra_files, before_files, bz2
815993
):
@@ -827,8 +1005,9 @@ def check_files(
8271005
print(":ggd:check-recipe: modified files:\n\t :: %s\n\n" % "\n\t :: ".join(files))
8281006

8291007
tbis = [x for x in files if x.endswith(".tbi")] # all tbi files
1008+
tbis = [x for x in files if x.endswith((".tbi",".csi"))] # all tbi files
8301009

831-
nons = [x for x in files if not x.endswith(".tbi")] # all non tbi files
1010+
nons = [x for x in files if not x.endswith((".tbi",".csi"))] # all non tbi files
8321011

8331012
tbxs = [x[:-4] for x in tbis if x[:-4] in nons] # names of files tabixed
8341013

@@ -868,6 +1047,8 @@ def check_files(
8681047
missing = []
8691048
not_tabixed = []
8701049
not_faidxed = []
1050+
add_extra = False
1051+
add_extra_files = []
8711052
for n in nons:
8721053
print(":ggd:check-recipe: > checking %s" % n)
8731054
if n.endswith(
@@ -903,16 +1084,21 @@ def check_files(
9031084
elif op.basename(n) not in extra_files and not any(
9041085
fnmatch(op.basename(n), e) for e in extra_files
9051086
):
906-
missing.append(
907-
":ggd:check-recipe: !!ERROR!!: %s(%s) unknown file and not in the extra/extra-files section of the yaml\n"
908-
% (P, n)
909-
)
1087+
print(
1088+
"\n:ggd:check-recipe: !!WARNING!!: %s(%s) unknown file and not in the extra/extra-files section of the yaml\n"
1089+
% (P, n)
1090+
)
1091+
add_extra_files.append(op.basename(n))
1092+
add_extra = True
9101093

9111094
if missing or not_tabixed or not_faidxed:
9121095
print("\n".join(missing + not_tabixed + not_faidxed), file=sys.stderr)
9131096
remove_package_after_install(bz2, recipe_name, 2)
9141097

915-
return True
1098+
if add_extra:
1099+
return add_extra_files
1100+
else:
1101+
return []
9161102

9171103

9181104
def check_yaml(recipe):

ggd/make_bash.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,7 @@ def make_bash(parser, args):
163163
assert (
164164
args.summary.strip() != ""
165165
), "Please provide a thorough summary of the data package"
166-
print(name)
167-
print(
168-
"{0}-{1}-{2}-v{3}".format(
166+
print("\n:ggd:make-recipe: Name of recipe: {0}-{1}-{2}-v{3}\n".format(
169167
args.genome_build.lower(),
170168
args.name.lower(),
171169
data_provider.lower(),

0 commit comments

Comments
 (0)