Skip to content

Commit 005f6cb

Browse files
authored
Merge pull request #144 from dvitale199/output_file_lines_patch
if statements to check if outputs from hap and hwe variant pruning cr…
2 parents 1df0b70 + b759079 commit 005f6cb

File tree

1 file changed

+30
-15
lines changed

1 file changed

+30
-15
lines changed

genotools/qc.py

+30-15
Original file line numberDiff line numberDiff line change
@@ -807,21 +807,30 @@ def run_haplotype_prune(self, p_threshold=1e-4):
807807
listOfFiles = [f'{hap_tmp}.log']
808808
concat_logs(step, out_path, listOfFiles)
809809

810-
# read .missing.hap file and grab flanking snps for P <= 0.0001. write flanking snps to file to exclude w bash
811-
mis_hap = pd.read_csv(f'{hap_tmp}.missing.hap', sep='\s+')
812-
mis_hap_snps = list(mis_hap[mis_hap.P <= p_threshold].loc[:,'FLANKING'].str.split('|'))
813-
snp_ls_df = pd.DataFrame({'snp':[rsid for ls in mis_hap_snps for rsid in ls]})
814-
snp_ls_df['snp'].to_csv(f'{hap_tmp}.exclude',sep='\t', header=False, index=False)
810+
if os.path.isfile(f'{hap_tmp}.missing.hap'):
815811

816-
plink_cmd2 = f"{plink2_exec} --bfile {geno_path} --exclude {hap_tmp}.exclude --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}"
817-
shell_do(plink_cmd2)
812+
# read .missing.hap file and grab flanking snps for P <= 0.0001. write flanking snps to file to exclude w bash
813+
mis_hap = pd.read_csv(f'{hap_tmp}.missing.hap', sep='\s+')
814+
mis_hap_snps = list(mis_hap[mis_hap.P <= p_threshold].loc[:,'FLANKING'].str.split('|'))
815+
snp_ls_df = pd.DataFrame({'snp':[rsid for ls in mis_hap_snps for rsid in ls]})
816+
snp_ls_df['snp'].to_csv(f'{hap_tmp}.exclude',sep='\t', header=False, index=False)
818817

819-
listOfFiles = [f'{out_path}.log']
820-
concat_logs(step, out_path, listOfFiles)
818+
plink_cmd2 = f"{plink2_exec} --bfile {geno_path} --exclude {hap_tmp}.exclude --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out {out_path}"
819+
shell_do(plink_cmd2)
821820

822-
# hap pruned count
823-
hap_snp_count = count_file_lines(f'{out_path}.pvar') - 1
824-
hap_rm_count = initial_snp_count - hap_snp_count
821+
listOfFiles = [f'{out_path}.log']
822+
concat_logs(step, out_path, listOfFiles)
823+
824+
# hap pruned count
825+
hap_snp_count = count_file_lines(f'{out_path}.pvar') - 1
826+
hap_rm_count = initial_snp_count - hap_snp_count
827+
828+
else:
829+
print(f'Haplotype pruning failed!')
830+
print(f'Check {hap_tmp}.log for more information')
831+
process_complete = False
832+
hap_snp_count = count_file_lines(f'{geno_path}.bim')
833+
hap_rm_count = 0
825834

826835
outfiles_dict = {
827836
'plink_out': out_path
@@ -913,9 +922,15 @@ def run_hwe_prune(self, hwe_threshold=1e-4, filter_controls=False):
913922
listOfFiles = [f'{hwe_tmp}.log', f'{out_path}.log']
914923
concat_logs(step, out_path, listOfFiles)
915924

916-
# hwe pruned count
917-
final_snp_count = count_file_lines(f'{out_path}.pvar') - 1
918-
hwe_rm_count = initial_snp_count - final_snp_count
925+
if os.path.isfile(f'{out_path}.pvar'):
926+
# hwe pruned count
927+
final_snp_count = count_file_lines(f'{out_path}.pvar') - 1
928+
hwe_rm_count = initial_snp_count - final_snp_count
929+
else:
930+
print(f'HWE pruning failed!')
931+
print(f'Check {hwe_tmp}.log or {out_path}.log for more information')
932+
process_complete = False
933+
hwe_rm_count = 0
919934

920935
outfiles_dict = {
921936
'plink_out': out_path

0 commit comments

Comments
 (0)