from pysam import (tabix_compress,
tabix_index,
bcftools)
def norm_vcf(vcf_path: str,
ref_fa_path: str) -> str:
vcf_gz_path = f'{vcf_path}.gz'
norm_vcf_path = f'{vcf_path.rsplit(".vcf")[0]}_norm.vcf'
tabix_compress(filename_in=vcf_path,
filename_out=vcf_gz_path)
tabix_index(filename=vcf_gz_path,
preset='vcf')
print(vcf_gz_path)
print(os.path.isfile(vcf_gz_path)) #True
print('')
bcftools.norm('-Ov', '-o', norm_vcf_path,
'--multiallelics', '-any',
'-c', 'sw',
'-f', ref_fa_path,
vcf_gz_path)
print(norm_vcf_path)
print(os.path.isfile(norm_vcf_path)) #True
print('')
return norm_vcf_path
/home/pbykadorov/family/metrics/ajefcrqiaqumjjmdi504/000000000500_enzyme_T20.MGI.cutadapt.bwa.MarkDuplicates.DownsampleSam-x30.DeepVariant.norm.vcf.gz
True
/home/pbykadorov/family/metrics/ajefcrqiaqumjjmdi504/000000000500_enzyme_T20.MGI.cutadapt.bwa.MarkDuplicates.DownsampleSam-x30.DeepVariant.norm_norm.vcf
False #It must be True