-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcyvcf2_test.py
43 lines (27 loc) · 1.05 KB
/
cyvcf2_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from cyvcf2 import VCF
for variant in VCF('some.vcf.gz'): # or VCF('some.bcf')
variant.REF, variant.ALT # e.g. REF='A', ALT=['C', 'T']
variant.CHROM, variant.start, variant.end, variant.ID, \
variant.FILTER, variant.QUAL
# numpy arrays of specific things we pull from the sample fields.
# gt_types is array of 0,1,2,3==HOM_REF, HET, UNKNOWN, HOM_ALT
variant.gt_types, variant.gt_ref_depths, variant.gt_alt_depths # numpy arrays
variant.gt_phases, variant.gt_quals, variant.gt_bases # numpy array
## INFO Field.
## extract from the info field by it's name:
variant.INFO.get('DP') # int
variant.INFO.get('FS') # float
variant.INFO.get('AC') # float
# convert back to a string.
str(variant)
## sample info...
# Get a numpy array of the depth per sample:
dp = variant.format('DP')
# or of any other format field:
sb = variant.format('SB')
assert sb.shape == (n_samples, 4) # 4-values per
# to do a region-query:
vcf = VCF('some.vcf.gz')
for v in vcf('11:435345-556565'):
if v.INFO["AF"] > 0.1: continue
print(str(v))