@@ -49,6 +49,10 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
4949 g = allel .GenotypeChunkedArray (zfh ["calldata" ]["genotype" ])
5050
5151 pos = allel .SortedIndex (zfh ["variants" ]["POS" ][:])
52+
53+
54+
55+
5256 if gdistkey is not None :
5357 gdist = h5fh ["variants" ][gdistkey ][:]
5458 else :
@@ -59,17 +63,31 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
5963
6064def load_text_format_data (mapfn , pop_a_fn , pop_b_fn ):
6165
62- tbl = pd .read_csv (mapfn , sep = " " ,
63- names = ["ID" , "CHROM" , "GDist" , "POS" , "REF" , "ALT" ])
66+ tbl = pd .read_csv (mapfn , sep = "\t " , header = None , engine = "c" )
67+
68+ try :
69+ tbl .columns = ["ID" , "CHROM" , "GDist" , "POS" , "REF" , "ALT" ]
70+ except ValueError :
71+ logger .info ("File not tab delimited as expected- trying with spaces" )
72+ tbl = pd .read_csv (
73+ mapfn , sep = " " , header = None , engine = "c" , names = ["ID" , "CHROM" , "GDist" , "POS" , "REF" , "ALT" ])
6474
65- vartbl = allel .VariantChunkedTable (tbl .to_records (), index = "POS" )
75+ try :
76+ vartbl = allel .VariantChunkedTable (tbl .to_records (), index = "POS" )
77+ except ValueError :
78+ tbl = tbl .sort_values (["CHROM" , "POS" ])
79+ logger .warning ("Possible SNPs file is not sorted. Attempting to sort. This is likely to be inefficient" )
80+ vartbl = allel .VariantChunkedTable (tbl .to_records (), index = "POS" )
6681
6782 d1 = np .loadtxt (pop_a_fn , dtype = "int8" )
6883 geno1 = allel .GenotypeChunkedArray (d1 .reshape ((d1 .shape [0 ], - 1 , 2 )))
6984
7085 d2 = np .loadtxt (pop_b_fn , dtype = "int8" )
7186 geno2 = allel .GenotypeChunkedArray (d2 .reshape ((d2 .shape [0 ], - 1 , 2 )))
7287
88+ pos = allel .SortedIndex (vartbl .POS [:])
89+ assert np .isnan (pos ).sum () == 0 , "nans values are not supported"
90+
7391 return geno1 , geno2 , allel .SortedIndex (vartbl .POS [:]), vartbl .GDist [:]
7492
7593
0 commit comments