Skip to content

Commit d2ecffa

Browse files
authored
Merge pull request #41 from microBioRust/main
PR for microbiorust-py pytests and __init__.py setup improvements
2 parents f93013e + bf1729b commit d2ecffa

3 files changed

Lines changed: 151 additions & 1 deletion

File tree

microbiorust-py/microbiorust/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from . import microbiorust as _base
88

99
#list of submodules importable
10-
submodules = ["gbk", "embl", "align", "seqmetrics"]
10+
submodules = ["gbk", "embl", "align", "seqmetrics", "blast"]
1111

1212
for sub_name in submodules:
1313
#get the submodule from the base binary
Binary file not shown.

microbiorust-py/tests/test_mbr.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import pytest
2+
import os
3+
import microbiorust
4+
import textwrap
5+
from pathlib import Path
6+
7+
# --- FIXTURES: Generating Mock Data ---
8+
9+
@pytest.fixture
10+
def mock_gbk(tmp_path):
11+
"""Creates a minimal valid GenBank file."""
12+
path = tmp_path / "test.gbk"
13+
content = textwrap.dedent("""
14+
LOCUS source_1 910 bp DNA linear CON 01-NOV-2024
15+
DEFINITION Escherichia coli K-12 substr. MG1655.
16+
ACCESSION source_1
17+
VERSION source_1
18+
KEYWORDS .
19+
SOURCE Escherichia coli K-12 substr. MG1655
20+
ORGANISM Escherichia coli K-12 substr. MG1655
21+
FEATURES Location/Qualifiers
22+
source 1..910
23+
/organism="K-12 substr. MG1655"
24+
/mol_type="DNA"
25+
gene complement(1..354)
26+
/locus_tag="b3304"
27+
CDS complement(1..354)
28+
/locus_tag="b3304"
29+
/codon_start=1
30+
/gene="rplR"
31+
/translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSLVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALDAAREAGLQ"
32+
/product="50S ribosomal subunit protein L18"
33+
gene complement(364..897)
34+
/locus_tag="b3305"
35+
CDS complement(364..897)
36+
/locus_tag="b3305"
37+
/codon_start=1
38+
/gene="rplF"
39+
/translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHNTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSGFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADVVRTKEAKK"
40+
/product="50S ribosomal subunit protein L6"
41+
ORIGIN
42+
1 TTAGAACTGA AGGCCAGCTT CACGGGCAGC ATCTGCCAGT GCCTGGACAC GACCATGATA
43+
61 TTGGAACCCG GAACGGTCAA AGGATACATC TTTGATGCCT TTTTCCAGAG CGCGTTCAGC
44+
121 GACAGCTTTA CCCACAGCTG CAGCCGCGTC TTTGTTACCG GTGTACTTCA GTTGTTCAGC
45+
181 GATAGCTTTT TCTACAGTAG AAGCAGCTAC CAGAACTTCA GAACCGTTCG GTGCAATTAC
46+
241 CTGTGCGTAA ATGTGACGCG GGGTACGATG TACCACCAGG CGAGTTGCGC CCAGCTCCTG
47+
301 GAGCTTGCGG CGTGCGCGGG TCGCACGACG GATACGAGCA GATTTCTTAT CCATAGTGTT
48+
361 ACCTTACTTC TTCTTAGCCT CTTTGGTACG CACGACTTCG TCGGCGTAAC GAACACCCTT
49+
421 GCCTTTATAA GGCTCAGGAC GACGGTAGGC GCGCAGATCC GCTGCAACCT GGCCGATCAC
50+
481 CTGCTTATCA GCGCCTTTCA GCACGATTTC AGTCTGAGTC GGACATTCAG CAGTGATACC
51+
541 CGCAGGCAGC TGATGGTCAA CAGGATGAGA GAAACCCAGA GACAGGTTAA TCACATTGCC
52+
601 TTTAACCGCT GCACGGTAAC CTACACCAAC CAGCTGCAGC TTCTTAGTGA AGCCTTCGGT
53+
661 AACACCGATA ACCATTGAGT TCAGCAGGGC ACGCGCGGTA CCAGCCTGTG CCCAACCGTC
54+
721 TGCGTAACCA TCACGCGGAC CGAAGGTCAG GGTATTATCT GCATGTTTAA CTTCAACAGC
55+
781 ATCGTTGAGA GTACGAGTCA GCTCGCCGTT TTTACCTTTG ATCGTAATAA CCTGACCGTT
56+
841 GATTTTTACG TCAACGCCGG CAGGAACAAC GACCGGTGCT TTAGCAACAC GAGACA
57+
//
58+
""")
59+
path.write_text(content)
60+
return str(path)
61+
62+
@pytest.fixture
63+
def mock_msa(tmp_path):
64+
"""Creates a mock FASTA alignment file."""
65+
path = tmp_path / "align.fasta"
66+
content = (
67+
">Seq1\nATGC--AT\n"
68+
">Seq2\nATGC--TT\n"
69+
">Seq3\nATGCGGTT\n"
70+
)
71+
path.write_text(content)
72+
return str(path)
73+
74+
@pytest.fixture
75+
def mock_blast_tab(tmp_path):
76+
"""Creates a mock BLAST tabular file."""
77+
path = tmp_path / "results.tab"
78+
# qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore
79+
content = "seqA\tseqB\t99.0\t100\t1\t0\t1\t100\t1\t100\t1e-10\t200.0\n"
80+
path.write_text(content)
81+
return str(path)
82+
83+
#tests for parsers (gbk, embl)
84+
85+
def test_gbk_conversions(mock_gbk):
86+
#test protein sequence extraction
87+
#with open(mock_gbk, 'rb') as f:
88+
# print(f"\n[DEBUG] Raw File Content: {f.read()}")
89+
faa = microbiorust.gbk_to_faa(mock_gbk)
90+
assert os.path.exists(mock_gbk)
91+
assert any("b3304" in line for line in faa)
92+
93+
#test count
94+
count = microbiorust.gbk_to_faa_count(mock_gbk)
95+
assert count == 2
96+
97+
#test nucleotide sequence extraction
98+
fna = microbiorust.gbk_to_fna(mock_gbk)
99+
assert any("ttagaactga" in line.lower() for line in fna)
100+
101+
def test_gbk_to_gff(mock_gbk):
102+
#this function writes to {filename}.gff and reads again to check
103+
microbiorust.gbk_to_gff(mock_gbk, dna=True)
104+
gff_path = f"{mock_gbk}.gff"
105+
assert os.path.exists(gff_path)
106+
with open(gff_path, "r") as f:
107+
assert "source_source_1_1" in f.read()
108+
109+
#tests for multiple sequence alignment
110+
111+
def test_subset_msa(mock_msa):
112+
#subset mock alignment: Rows 0-2 (Seq1 & Seq2), Cols 0-4 (ATGC)
113+
subset = microbiorust.subset_msa_alignment(mock_msa, (0, 2), (0, 4))
114+
print("subset", subset)
115+
assert len(subset) == 2 # 2 headers + 2 sequences
116+
assert ">Seq1" in subset[0]
117+
assert "ATGC" in subset[1]
118+
119+
def test_purge_gaps(mock_msa, tmp_path):
120+
out_path = str(tmp_path / "purged.fasta")
121+
#threshold 0.5 should remove the '--' columns in Seq1 and Seq2 and write to file
122+
microbiorust.purge_gaps(mock_msa, out_path, 0.5)
123+
assert os.path.exists(out_path)
124+
125+
def test_get_consensus(mock_msa):
126+
#given 'ATGC' is constant in the mock, it should be in consensus
127+
consensus = microbiorust.get_consensus(mock_msa)
128+
assert consensus.startswith("ATGC")
129+
130+
#tests for Sequence Metrics
131+
132+
def test_hydrophobicity():
133+
seq = "MALWMRLLPLLALLALWGPDPAAAFVN"
134+
scores = microbiorust.hydrophobicity(seq, window_size=3)
135+
assert len(scores) > 0
136+
assert all(isinstance(s, float) for s in scores)
137+
138+
def test_amino_counts():
139+
seq = "MATAG"
140+
counts = microbiorust.amino_counts(seq)
141+
assert counts['M'] == 1
142+
assert counts['A'] == 2
143+
144+
#test for Async Tabular Parser
145+
146+
def test_parse_tabular(mock_blast_tab):
147+
results = microbiorust.parse_tabular(mock_blast_tab)
148+
assert len(results) == 1
149+
assert results[0]['qseqid'] == "seqA"
150+
assert results[0]['bitscore'] == 200.0

0 commit comments

Comments
 (0)