Skip to content

Commit cd060ab

Browse files
committed
autoinfer regionparser
1 parent c413891 commit cd060ab

1 file changed

Lines changed: 6 additions & 2 deletions

File tree

scprinter/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def regionparser(
359359
regions: str | Path | pd.DataFrame | pyranges.PyRanges | list[str],
360360
printer=None,
361361
width: int | None = None,
362-
header: bool = False,
362+
header: bool | None = None,
363363
):
364364
"""
365365
This function parses the regions specification and returns a dataframe with the first three columns ['Chromosome', 'Start', 'End']
@@ -381,7 +381,7 @@ def regionparser(
381381
If None, the width will be the same as the input regions, and would be 1000bp when regions are specified by gene names.
382382
header: bool
383383
Only when you input a file path. If True, the first row of the regions dataframe is considered as the header. If False, the first row is considered as data.
384-
This is useful when the regions are specified by a file that has a header.
384+
This is useful when the regions are specified by a file that has a header. If None, if the filename ends with '.bed' or '.bed.gz' , it will be considered as True, otherwise False.
385385
Returns
386386
-------
387387
regions: pd.DataFrame
@@ -408,6 +408,9 @@ def regionparser(
408408
elif type(regions) is pd.core.series.Series:
409409
regions = pd.DataFrame(regions.values[None])
410410
elif type(regions) is str:
411+
if (regions.endswith(".bed")) or (regions.endswith(".bed.gz")):
412+
header = True if header is None else header
413+
411414
if ":" in regions and "-" in regions:
412415
# regions = pd.DataFrame([re.split(':|-', regions)], columns=['Chromosome', 'Start', 'End'])
413416
regions = pd.DataFrame([re.split(":|-", regions)])
@@ -424,6 +427,7 @@ def regionparser(
424427
regions = pd.DataFrame({"Chromosome": chrom, "Start": start})
425428
regions["End"] = regions["Start"] + int(printer.gene_region_width / 2)
426429
regions["Start"] -= int(printer.gene_region_width / 2)
430+
# regions_pr = dftopyranges(regions)
427431
else:
428432
# regions_pr = pyranges.readers.read_bed(regions)
429433
regions = pd.read_csv(regions, sep="\t", header=0 if header else None)

0 commit comments

Comments
 (0)