Skip to content

Commit 8375fdc

Browse files
authored
Merge pull request #16 from PopovIILab/dev
v1.0.1
2 parents 12295dd + 4dc34dc commit 8375fdc

21 files changed

Lines changed: 231 additions & 131 deletions

codecov.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
coverage:
2+
patch:
3+
target: 78%
4+
informational: true

krakenparser/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
from .kpplot.clustermap import clustermap
12
from .kpplot.stackedbar import stacked_barplot
23
from .kpplot.streamgraph import streamgraph
3-
from .kpplot.clustermap import clustermap
44

55
__all__ = [
66
"stacked_barplot",

krakenparser/counts/convert2csv.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import argparse
44
import logging
55
from pathlib import Path
6+
67
import pandas as pd
78

89
_log = logging.getLogger(__name__)

krakenparser/counts/processing_script.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#!/usr/bin/env python
22

3+
import argparse
34
import os
4-
import sys
55
import tempfile
6-
import argparse
76
from pathlib import Path
87

98

109
def modify_taxa_names(line):
1110
prefixes = ["s__", "g__", "f__", "o__", "c__", "p__"]
1211
for prefix in prefixes:
1312
if line.startswith(prefix):
14-
parts = line[len(prefix):].split("\t")
13+
parts = line[len(prefix) :].split("\t")
1514
parts[0] = parts[0].replace("_", " ")
1615
return "\t".join(parts)
1716
return line

krakenparser/counts/split_mpa.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,27 @@
77
import argparse
88
import logging
99
import re
10-
import sys
1110
from pathlib import Path
1211

1312
_log = logging.getLogger(__name__)
1413

1514

1615
_RANKS = [
1716
("species", "s__", []),
18-
("genus", "g__", ["s__"]),
19-
("family", "f__", ["s__", "g__"]),
20-
("order", "o__", ["s__", "g__", "f__"]),
21-
("class", "c__", ["s__", "g__", "f__", "o__"]),
22-
("phylum", "p__", ["s__", "g__", "f__", "o__", "c__"]),
17+
("genus", "g__", ["s__"]),
18+
("family", "f__", ["s__", "g__"]),
19+
("order", "o__", ["s__", "g__", "f__"]),
20+
("class", "c__", ["s__", "g__", "f__", "o__"]),
21+
("phylum", "p__", ["s__", "g__", "f__", "o__", "c__"]),
2322
]
2423

2524
_HUMAN_TAXA = {
2625
"species": "s__Homo_sapiens",
27-
"genus": "g__Homo",
28-
"family": "f__Hominidae",
29-
"order": "o__Primates",
30-
"class": "c__Mammalia",
31-
"phylum": "p__Chordata",
26+
"genus": "g__Homo",
27+
"family": "f__Hominidae",
28+
"order": "o__Primates",
29+
"class": "c__Mammalia",
30+
"phylum": "p__Chordata",
3231
}
3332

3433
_ACCESSION_RE = re.compile(r"(SRS|SRR|SRX|ERS|ERR|ERX|DRS|DRR|DRX)\d*-")
@@ -41,7 +40,7 @@ def _strip_path_prefix(line: str) -> str:
4140
return line
4241
path, rest = line[:tab], line[tab:]
4342
pipe = path.rfind("|")
44-
segment = path[pipe + 1:] if pipe != -1 else path
43+
segment = path[pipe + 1 :] if pipe != -1 else path
4544
return _ACCESSION_RE.sub("", segment + rest)
4645

4746

@@ -105,7 +104,12 @@ def main() -> None:
105104
help="Do not filter human-related taxa (default: filtered)",
106105
)
107106
args = parser.parse_args()
108-
split_mpa(args.input, args.output, viruses_only=args.viruses_only, keep_human=args.keep_human)
107+
split_mpa(
108+
args.input,
109+
args.output,
110+
viruses_only=args.viruses_only,
111+
keep_human=args.keep_human,
112+
)
109113

110114

111115
if __name__ == "__main__":

krakenparser/kpplot/base.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1+
from typing import Optional
2+
13
import matplotlib.pyplot as plt
24
import pandas as pd
3-
from typing import Optional
45

56

67
class KpPlotBase:
@@ -44,9 +45,7 @@ def aggregate_by_metadata(
4445
raise ValueError("metadata must contain 'Sample_id' column")
4546
if metadata_group not in metadata.columns:
4647
raise ValueError(f"'{metadata_group}' column not found in metadata")
47-
df = df.merge(
48-
metadata[["Sample_id", metadata_group]], on="Sample_id", how="left"
49-
)
48+
df = df.merge(metadata[["Sample_id", metadata_group]], on="Sample_id", how="left")
5049
df = (
5150
df.groupby([metadata_group, "taxon"], as_index=False)["rel_abund_perc"]
5251
.mean()

krakenparser/kpplot/clustermap.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
import pandas as pd
1+
from typing import List, Optional, Tuple
2+
23
import matplotlib.pyplot as plt
4+
import pandas as pd
35
import seaborn as sns
4-
from typing import Optional, Tuple, Union, List
6+
57
from .base import KpPlotBase, aggregate_by_metadata
68

79

krakenparser/kpplot/stackedbar.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
import pandas as pd
1+
from typing import List, Optional, Tuple, Union
2+
23
import matplotlib.pyplot as plt
3-
import seaborn as sns
44
import numpy as np
5-
from typing import Optional, Tuple, Union, List
5+
import pandas as pd
6+
import seaborn as sns
7+
68
from .base import KpPlotBase, aggregate_by_metadata
79

810

krakenparser/kpplot/streamgraph.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
import pandas as pd
1+
from typing import List, Optional, Tuple, Union
2+
23
import matplotlib.pyplot as plt
3-
import seaborn as sns
44
import numpy as np
5-
from typing import Optional, Tuple, Union, List
5+
import pandas as pd
6+
import seaborn as sns
7+
68
from .base import KpPlotBase, aggregate_by_metadata
79

810

krakenparser/krakenparser.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import argparse
22
import logging
33
import subprocess
4-
from pathlib import Path
54
import sys
6-
from importlib.metadata import version as _pkg_version, PackageNotFoundError as _PNF
5+
from importlib.metadata import PackageNotFoundError as _PNF
6+
from importlib.metadata import version as _pkg_version
7+
from pathlib import Path
8+
79
try:
810
__version__ = _pkg_version("krakenparser")
911
except _PNF:
@@ -78,23 +80,28 @@ def main():
7880

7981
# Map flags to (script_path, base_args_to_prepend)
8082
command_map = {
81-
"complete": (package_dir / "pipeline.py", []),
82-
"kreport2mpa": (package_dir / "mpa" / "transform2mpa.py", []),
83-
"combine_mpa": (package_dir / "mpa" / "mpa_table.py", []),
84-
"deconstruct": (package_dir / "counts" / "split_mpa.py", []),
85-
"deconstruct_viruses":(package_dir / "counts" / "split_mpa.py", ["--viruses-only"]),
86-
"process": (package_dir / "counts" / "processing_script.py", []),
87-
"txt2csv": (package_dir / "counts" / "convert2csv.py", []),
88-
"relabund": (package_dir / "stats" / "relabund.py", []),
89-
"diversity": (package_dir / "stats" / "diversity.py", []),
83+
"complete": (package_dir / "pipeline.py", []),
84+
"kreport2mpa": (package_dir / "mpa" / "transform2mpa.py", []),
85+
"combine_mpa": (package_dir / "mpa" / "mpa_table.py", []),
86+
"deconstruct": (package_dir / "counts" / "split_mpa.py", []),
87+
"deconstruct_viruses": (
88+
package_dir / "counts" / "split_mpa.py",
89+
["--viruses-only"],
90+
),
91+
"process": (package_dir / "counts" / "processing_script.py", []),
92+
"txt2csv": (package_dir / "counts" / "convert2csv.py", []),
93+
"relabund": (package_dir / "stats" / "relabund.py", []),
94+
"diversity": (package_dir / "stats" / "diversity.py", []),
9095
}
9196

9297
if "-h" in sys.argv or "--help" in sys.argv:
9398
if not any(getattr(args, key) for key in command_map):
9499
parser.print_help()
95100
return
96101

97-
def _build_cmd(script: Path, base_args: list[str], user_args: list[str]) -> list[str]:
102+
def _build_cmd(
103+
script: Path, base_args: list[str], user_args: list[str]
104+
) -> list[str]:
98105
if script.suffix == ".py":
99106
# Run as module (-m) so the krakenparser package stays importable.
100107
# Derive dotted module name from path relative to the package root.
@@ -113,7 +120,9 @@ def _build_cmd(script: Path, base_args: list[str], user_args: list[str]) -> list
113120
# Default to full pipeline when -i/--input is given without a subcommand
114121
if "-i" in extra_args or "--input" in extra_args:
115122
complete_script, complete_base = command_map["complete"]
116-
subprocess.run(_build_cmd(complete_script, complete_base, extra_args), check=True)
123+
subprocess.run(
124+
_build_cmd(complete_script, complete_base, extra_args), check=True
125+
)
117126
return
118127

119128
parser.print_help()

0 commit comments

Comments
 (0)