@@ -807,21 +807,30 @@ def run_haplotype_prune(self, p_threshold=1e-4):
807
807
listOfFiles = [f'{ hap_tmp } .log' ]
808
808
concat_logs (step , out_path , listOfFiles )
809
809
810
- # read .missing.hap file and grab flanking snps for P <= 0.0001. write flanking snps to file to exclude w bash
811
- mis_hap = pd .read_csv (f'{ hap_tmp } .missing.hap' , sep = '\s+' )
812
- mis_hap_snps = list (mis_hap [mis_hap .P <= p_threshold ].loc [:,'FLANKING' ].str .split ('|' ))
813
- snp_ls_df = pd .DataFrame ({'snp' :[rsid for ls in mis_hap_snps for rsid in ls ]})
814
- snp_ls_df ['snp' ].to_csv (f'{ hap_tmp } .exclude' ,sep = '\t ' , header = False , index = False )
810
+ if os .path .isfile (f'{ hap_tmp } .missing.hap' ):
815
811
816
- plink_cmd2 = f"{ plink2_exec } --bfile { geno_path } --exclude { hap_tmp } .exclude --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } "
817
- shell_do (plink_cmd2 )
812
+ # read .missing.hap file and grab flanking snps for P <= 0.0001. write flanking snps to file to exclude w bash
813
+ mis_hap = pd .read_csv (f'{ hap_tmp } .missing.hap' , sep = '\s+' )
814
+ mis_hap_snps = list (mis_hap [mis_hap .P <= p_threshold ].loc [:,'FLANKING' ].str .split ('|' ))
815
+ snp_ls_df = pd .DataFrame ({'snp' :[rsid for ls in mis_hap_snps for rsid in ls ]})
816
+ snp_ls_df ['snp' ].to_csv (f'{ hap_tmp } .exclude' ,sep = '\t ' , header = False , index = False )
818
817
819
- listOfFiles = [ f' { out_path } .log' ]
820
- concat_logs ( step , out_path , listOfFiles )
818
+ plink_cmd2 = f" { plink2_exec } --bfile { geno_path } --exclude { hap_tmp } .exclude --make-pgen psam-cols=fid,parents,sex,pheno1,phenos --out { out_path } "
819
+ shell_do ( plink_cmd2 )
821
820
822
- # hap pruned count
823
- hap_snp_count = count_file_lines (f'{ out_path } .pvar' ) - 1
824
- hap_rm_count = initial_snp_count - hap_snp_count
821
+ listOfFiles = [f'{ out_path } .log' ]
822
+ concat_logs (step , out_path , listOfFiles )
823
+
824
+ # hap pruned count
825
+ hap_snp_count = count_file_lines (f'{ out_path } .pvar' ) - 1
826
+ hap_rm_count = initial_snp_count - hap_snp_count
827
+
828
+ else :
829
+ print (f'Haplotype pruning failed!' )
830
+ print (f'Check { hap_tmp } .log for more information' )
831
+ process_complete = False
832
+ hap_snp_count = count_file_lines (f'{ geno_path } .bim' )
833
+ hap_rm_count = 0
825
834
826
835
outfiles_dict = {
827
836
'plink_out' : out_path
@@ -913,9 +922,15 @@ def run_hwe_prune(self, hwe_threshold=1e-4, filter_controls=False):
913
922
listOfFiles = [f'{ hwe_tmp } .log' , f'{ out_path } .log' ]
914
923
concat_logs (step , out_path , listOfFiles )
915
924
916
- # hwe pruned count
917
- final_snp_count = count_file_lines (f'{ out_path } .pvar' ) - 1
918
- hwe_rm_count = initial_snp_count - final_snp_count
925
+ if os .path .isfile (f'{ out_path } .pvar' ):
926
+ # hwe pruned count
927
+ final_snp_count = count_file_lines (f'{ out_path } .pvar' ) - 1
928
+ hwe_rm_count = initial_snp_count - final_snp_count
929
+ else :
930
+ print (f'HWE pruning failed!' )
931
+ print (f'Check { hwe_tmp } .log or { out_path } .log for more information' )
932
+ process_complete = False
933
+ hwe_rm_count = 0
919
934
920
935
outfiles_dict = {
921
936
'plink_out' : out_path
0 commit comments