Skip to content

Commit dc47063

Browse files
author
Nick Harding
committed
Fix issue described in 44 and bump version
1 parent df2a359 commit dc47063

File tree

4 files changed

+27
-8
lines changed

4 files changed

+27
-8
lines changed

bin/xpclr

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,13 @@ def main():
4141

4242
# data inputs for text format
4343
psr.add_argument('--map', required=False, default=None, action='store',
44-
help='input map file as per XPCLR specs')
44+
help='If using XPCLR-style text format. Input map file as per XPCLR specs (tab separated)')
4545

4646
psr.add_argument('--popA', required=False, default=None, action='store',
47-
help='filepath to population A genotypes')
47+
help='If using XPCLR-style text format. Filepath to population A genotypes (space separated)')
4848

4949
psr.add_argument('--popB', required=False, default=None, action='store',
50-
help='filepath to population A genotypes')
50+
help='If using XPCLR-style text format. Filepath to population B genotypes (space separated)')
5151

5252
# parameters
5353
# chrom
@@ -174,6 +174,7 @@ def main():
174174
# determine windows
175175
if args.stop is None:
176176
args.stop = positions[-1]
177+
177178
spacing = np.arange(args.start, args.stop, args.step)
178179
scan_windows = np.vstack([spacing, spacing - 1 + args.size]).T
179180

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_version(source='xpclr/__init__.py'):
2222

2323
MAINTAINER = 'Nicholas Harding',
2424

25-
MAINTAINER_EMAIL = 'njh@well.ox.ac.uk',
25+
MAINTAINER_EMAIL = 'nicholas.harding@bdi.ox.ac.uk',
2626

2727
URL = 'https://github.com/hardingnj/xpclr'
2828

xpclr/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.1.1"
1+
__version__ = "1.1.2"
22

33
from xpclr import methods
44
from xpclr import util

xpclr/util.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
4949
g = allel.GenotypeChunkedArray(zfh["calldata"]["genotype"])
5050

5151
pos = allel.SortedIndex(zfh["variants"]["POS"][:])
52+
53+
54+
55+
5256
if gdistkey is not None:
5357
gdist = h5fh["variants"][gdistkey][:]
5458
else:
@@ -59,17 +63,31 @@ def load_zarr_data(zarr_fn, chrom, s1, s2, gdistkey=None):
5963

6064
def load_text_format_data(mapfn, pop_a_fn, pop_b_fn):
6165

62-
tbl = pd.read_csv(mapfn, sep=" ",
63-
names=["ID", "CHROM", "GDist", "POS", "REF", "ALT"])
66+
tbl = pd.read_csv(mapfn, sep="\t", header=None, engine="c")
67+
68+
try:
69+
tbl.columns = ["ID", "CHROM", "GDist", "POS", "REF", "ALT"]
70+
except ValueError:
71+
logger.info("File not tab delimited as expected- trying with spaces")
72+
tbl = pd.read_csv(
73+
mapfn, sep=" ", header=None, engine="c", names=["ID", "CHROM", "GDist", "POS", "REF", "ALT"])
6474

65-
vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
75+
try:
76+
vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
77+
except ValueError:
78+
tbl = tbl.sort_values(["CHROM", "POS"])
79+
logger.warning("Possible SNPs file is not sorted. Attempting to sort. This is likely to be inefficient")
80+
vartbl = allel.VariantChunkedTable(tbl.to_records(), index="POS")
6681

6782
d1 = np.loadtxt(pop_a_fn, dtype="int8")
6883
geno1 = allel.GenotypeChunkedArray(d1.reshape((d1.shape[0], -1, 2)))
6984

7085
d2 = np.loadtxt(pop_b_fn, dtype="int8")
7186
geno2 = allel.GenotypeChunkedArray(d2.reshape((d2.shape[0], -1, 2)))
7287

88+
pos = allel.SortedIndex(vartbl.POS[:])
89+
assert np.isnan(pos).sum() == 0, "nans values are not supported"
90+
7391
return geno1, geno2, allel.SortedIndex(vartbl.POS[:]), vartbl.GDist[:]
7492

7593

0 commit comments

Comments
 (0)