Fix issue described in 44 and bump version

Nick Harding · Nick Harding · commit dc470638161a · 2019-12-19T11:40:28.000Z
diff --git a/bin/xpclr b/bin/xpclr
@@ -41,13 +41,13 @@ def main():
 
     # data inputs for text format
     psr.add_argument('--map', required=False, default=None, action='store',
-                     help='input map file as per XPCLR specs')
+                     help='If using XPCLR-style text format. Input map file as per XPCLR specs (tab separated)')
 
     psr.add_argument('--popA', required=False, default=None, action='store',
-                     help='filepath to population A genotypes')
+                     help='If using XPCLR-style text format. Filepath to population A genotypes (space separated)')
 
     psr.add_argument('--popB', required=False, default=None, action='store',
-                     help='filepath to population A genotypes')
+                     help='If using XPCLR-style text format. Filepath to population B genotypes (space separated)')
 
     # parameters
     # chrom
@@ -174,6 +174,7 @@ def main():
     # determine windows
     if args.stop is None:
         args.stop = positions[-1]
+
     spacing = np.arange(args.start, args.stop, args.step)
     scan_windows = np.vstack([spacing, spacing - 1 + args.size]).T
 
diff --git a/setup.py b/setup.py
@@ -22,7 +22,7 @@ def get_version(source='xpclr/__init__.py'):
 
 MAINTAINER = 'Nicholas Harding',
 
-MAINTAINER_EMAIL = 'njh@well.ox.ac.uk',
+MAINTAINER_EMAIL = 'nicholas.harding@bdi.ox.ac.uk',
 
 URL = 'https://github.com/hardingnj/xpclr'
 
diff --git a/xpclr/__init__.py b/xpclr/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.1.1"
+__version__ = "1.1.2"
 
 from xpclr import methods
 from xpclr import util
diff --git a/xpclr/util.py b/xpclr/util.py
@@ -49,6 +49,10 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
     g = allel.GenotypeChunkedArray(zfh["calldata"]["genotype"])
 
     pos = allel.SortedIndex(zfh["variants"]["POS"][:])
+
+
+
+
     if gdistkey is not None:
         gdist = h5fh["variants"][gdistkey][:]
     else:
@@ -59,17 +63,31 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
 
 def load_text_format_data(mapfn, pop_a_fn, pop_b_fn):
 
-    tbl = pd.read_csv(mapfn, sep=" ",
-                      names=["ID", "CHROM", "GDist", "POS", "REF", "ALT"])
+    tbl = pd.read_csv(mapfn, sep="\t", header=None, engine="c")
+
+    try:
+        tbl.columns = ["ID", "CHROM", "GDist", "POS", "REF", "ALT"]
+    except ValueError:
+        logger.info("File not tab delimited as expected- trying with spaces")
+        tbl = pd.read_csv(
+            mapfn, sep=" ", header=None, engine="c", names=["ID", "CHROM", "GDist", "POS", "REF", "ALT"])
 
-    vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
+    try:
+        vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
+    except ValueError:
+        tbl = tbl.sort_values(["CHROM", "POS"])
+        logger.warning("Possible SNPs file is not sorted. Attempting to sort. This is likely to be inefficient")
+        vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
 
     d1 = np.loadtxt(pop_a_fn, dtype="int8")
     geno1 = allel.GenotypeChunkedArray(d1.reshape((d1.shape[0], -1, 2)))
 
     d2 = np.loadtxt(pop_b_fn, dtype="int8")
     geno2 = allel.GenotypeChunkedArray(d2.reshape((d2.shape[0], -1, 2)))
 
+    pos = allel.SortedIndex(vartbl.POS[:])
+    assert np.isnan(pos).sum() == 0, "nans values are not supported"
+
     return geno1, geno2, allel.SortedIndex(vartbl.POS[:]), vartbl.GDist[:]