11import pandas as pd
2- import h5py
32import allel
43import numpy as np
54import logging
98# FUNCTIONS
109def load_hdf5_data (hdf5_fn , chrom , s1 , s2 , gdistkey = None ):
1110
11+ import hdf5
12+
1213 samples1 = get_sample_ids (s1 )
1314 samples2 = get_sample_ids (s2 )
1415
@@ -30,6 +31,32 @@ def load_hdf5_data(hdf5_fn, chrom, s1, s2, gdistkey=None):
3031 return g .take (idx1 , axis = 1 ), g .take (idx2 , axis = 1 ), pos , gdist
3132
3233
34+ def load_zarr_data (zarr_fn , chrom , s1 , s2 , gdistkey = None ):
35+
36+ import zarr
37+
38+ samples1 = get_sample_ids (s1 )
39+ samples2 = get_sample_ids (s2 )
40+
41+ zfh = zarr .open_group (zarr_fn , mode = "r" )[chrom ]
42+
43+ samples_x = zfh ["samples" ][:]
44+ sample_name = [sid .decode () for sid in samples_x .tolist ()]
45+
46+ idx1 = np .array ([sample_name .index (sid ) for sid in samples1 ])
47+ idx2 = np .array ([sample_name .index (sid ) for sid in samples2 ])
48+
49+ g = allel .GenotypeChunkedArray (zfh ["calldata" ]["genotype" ])
50+
51+ pos = allel .SortedIndex (zfh ["variants" ]["POS" ][:])
52+ if gdistkey is not None :
53+ gdist = h5fh ["variants" ][gdistkey ][:]
54+ else :
55+ gdist = None
56+
57+ return g .take (idx1 , axis = 1 ), g .take (idx2 , axis = 1 ), pos , gdist
58+
59+
3360def load_text_format_data (mapfn , pop_a_fn , pop_b_fn ):
3461
3562 tbl = pd .read_csv (mapfn , sep = " " ,
0 commit comments