Skip to content

Commit bd187dd

Browse files
authored
version 0.1.4 final (#49)
* adding line for command to run python tests * Deleted test gbk which was invalid * adding to .gitignore * change of path * editing .gitignore * fix file test paths after directory move * adding an example * removing test generated file, tidying imports for example * checking packages in exclude * adding new microBioRust version to cargo.toml * improving gitignore * adding to exclude list * adding to exclude for cargo * adding to exclude for cargo * adding to exclude for cargo * adding LICENSE specifically for microbiorust-py
1 parent f754120 commit bd187dd

27 files changed

Lines changed: 279 additions & 51 deletions

.gitignore

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
# Generated by Cargo
22
# will have compiled files and executables
3-
debug/
43
target/
4+
dist/
5+
*.egg-info/
6+
__pycache__/
7+
*.pyc
8+
debug/
9+
10+
*.so
11+
*.abi3.so
12+
*.dylib
13+
*.pyd
14+
515
expand.rs
616
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
717
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
@@ -25,3 +35,4 @@ env/
2535
venv/
2636
site/
2737
static/
38+
*.gff

microBioRust/Cargo.toml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "microBioRust"
3-
version = "0.1.3"
4-
edition = "2021"
3+
version = "0.1.4"
4+
edition = "2024"
55
license = "MIT"
66
keywords = ["bioinformatics", "micro", "bio", "genomics", "sequence-analysis"]
77
description = "Microbiology friendly bioinformatics Rust functions"
@@ -14,7 +14,7 @@ categories = [
1414
]
1515
readme = "README.md"
1616
authors = ["Lisa Crossman and microBioRust community"]
17-
exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/data/"]
17+
exclude = [".git", ".gitignore", ".dribble.example.embl", "tests/", "src/bin","config.toml","K12_ribo.gbk","Rhiz3841.gbk.gb","Rhiz3841.gbk.gb_out.faa","rust_via_python_countgbk2faa.py","rust_via_python_gbk2faa.py","asv.conf.json","benchmarks/"]
1818
repository = "https://github.com/microBioRust/microBioRust"
1919
documentation = "https://microbiorust.github.io/docs/"
2020

@@ -27,7 +27,11 @@ path = "src/lib.rs"
2727

2828
[[example]]
2929
name = "blast-example"
30-
path = "examples/src/blast_parse.rs"
30+
path = "examples/blast_parse.rs"
31+
32+
[[example]]
33+
name = "convert-to-faa"
34+
path = "examples/convert_to_faa.rs"
3135

3236
[dependencies]
3337
clap = { version = "4.5.19", features = ["derive"] }
Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
1-
use anyhow::{Context, Result};
2-
use async_compression::tokio::bufread::GzipDecoder as AsyncGzDecoder;
1+
use anyhow::Result;
32
use clap::Parser;
4-
use quick_xml::events::Event;
5-
use quick_xml::reader::Reader;
6-
use quick_xml::escape::unescape;
7-
use serde::Serialize;
8-
use serde_json::ser::Serializer as JsonSerializer;
93
use microBioRust::blast::*;
10-
use std::io::Cursor;
11-
use tokio::io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader};
4+
use tokio::io::AsyncWriteExt;
125

136
#[derive(Parser, Debug)]
14-
#[command(name = "blast-parsers", author, version, about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)")]
7+
#[command(
8+
name = "blast-parsers",
9+
author,
10+
version,
11+
about = "async microBioRust BLAST parsers: for outfmt6 (single line tabular) and outfmt5 (xml)"
12+
)]
1513
struct Cli {
1614
///Use .gz for gzip-compressed files.
1715
#[arg(short, long, default_value = "-")]
@@ -44,7 +42,11 @@ async fn main() -> Result<()> {
4442
buf.push(b'\n');
4543
tokio::io::stdout().write_all(&buf).await?;
4644
} else {
47-
println!("query {:?} hits {}", iter_rec.query_def, iter_rec.hits.len());
45+
println!(
46+
"query {:?} hits {}",
47+
iter_rec.query_def,
48+
iter_rec.hits.len()
49+
);
4850
}
4951
}
5052
Err(e) => eprintln!("xml parse error: {}", e),
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
use clap::Parser;
2+
use std::{
3+
fs::File,
4+
io::{Write, BufWriter},
5+
};
6+
use microBioRust::{
7+
genbank,
8+
};
9+
10+
#[derive(Parser, Debug)]
11+
#[clap(author, version, about)]
12+
struct Arguments {
13+
#[clap(short, long)]
14+
filename: String,
15+
#[clap(short, long)]
16+
output: String,
17+
}
18+
19+
fn main() -> Result<(), anyhow::Error> {
20+
let args = Arguments::parse();
21+
let records = genbank!(&args.filename);
22+
let file = File::create(&args.output)?;
23+
let mut writer = BufWriter::new(file);
24+
for record in records {
25+
for (k, _v) in &record.cds.attributes {
26+
if let Some(seq) = record.seq_features.get_sequence_faa(k) {
27+
writeln!(writer, ">{}|{}\n{}", &record.id, &k, seq)?;
28+
}
29+
}
30+
}
31+
writer.flush()?;
32+
Ok(())
33+
}
34+

microBioRust/src/embl.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,14 @@
263263
//!```
264264
//!
265265
266-
use anyhow::{anyhow, Context};
266+
use anyhow::{Context, anyhow};
267267
use bio::alphabets::dna::revcomp;
268268
use chrono::prelude::*;
269269
use lazy_static::lazy_static;
270270
use paste::paste;
271271
use protein_translate::translate;
272-
use serde::Serialize;
273272
use regex::Regex;
273+
use serde::Serialize;
274274
use std::{
275275
collections::{BTreeMap, HashSet},
276276
convert::{AsRef, TryInto},
@@ -600,8 +600,8 @@ where
600600
//println!("designated codon start {:?} {:?}", &codon_start, &locus_tag);
601601
}
602602
if self.line_buffer.contains("/gene=") {
603-
let gen: Vec<&str> = self.line_buffer.split('\"').collect();
604-
gene = gen[1].to_string();
603+
let genes: Vec<&str> = self.line_buffer.split('\"').collect();
604+
gene = genes[1].to_string();
605605
//println!("gene designated {:?} {:?}", &gene, &locus_tag);
606606
}
607607
if self.line_buffer.contains("/product") {
@@ -1542,7 +1542,7 @@ mod tests {
15421542
#[allow(unused_assignments)]
15431543
#[allow(unused_imports)]
15441544
fn test_read_file() {
1545-
let content = std::fs::read_to_string("example.embl").expect("error reading file");
1545+
let content = std::fs::read_to_string("tests/example.embl").expect("error reading file");
15461546
assert!(content.contains("ID"));
15471547
assert!(content.len() > 0);
15481548
}
@@ -1553,7 +1553,7 @@ mod tests {
15531553
#[allow(unused_assignments)]
15541554
#[allow(unused_imports)]
15551555
fn test_parse_embl() {
1556-
let file_embl = "example.embl";
1556+
let file_embl = "tests/example.embl";
15571557
let records = embl!(&file_embl);
15581558
assert!(records.len() > 0);
15591559
}
@@ -1564,7 +1564,7 @@ mod tests {
15641564
#[allow(unused_assignments)]
15651565
#[allow(unused_imports)]
15661566
fn test_parse_source_attributes() {
1567-
let file_embl = "example.embl";
1567+
let file_embl = "tests/example.embl";
15681568
let records = embl!(&file_embl);
15691569
if let Some(record) = records.first() {
15701570
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
@@ -1579,7 +1579,7 @@ mod tests {
15791579
#[allow(unused_assignments)]
15801580
#[allow(unused_imports)]
15811581
fn test_parse_cds_attributes() {
1582-
let file_embl = "example.embl";
1582+
let file_embl = "tests/example.embl";
15831583
let records = embl!(&file_embl);
15841584
if let Some(record) = records.first() {
15851585
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
@@ -1598,7 +1598,7 @@ mod tests {
15981598
#[allow(unused_assignments)]
15991599
#[allow(unused_imports)]
16001600
fn test_parse_sequence_attributes() {
1601-
let file_embl = "example.embl";
1601+
let file_embl = "tests/example.embl";
16021602
let records = embl!(&file_embl);
16031603
if let Some(record) = records.first() {
16041604
if let Some((key, vals)) = record.cds.attributes.first_key_value() {

microBioRust/src/gbk.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@
126126
//! let mut read_counter: u32 = 0;
127127
//! let mut seq_region: BTreeMap<String, (u32,u32)> = BTreeMap::new();
128128
//! let mut record_vec: Vec<Record> = Vec::new();
129-
//! loop {
129+
//! loop {
130130
//! match records.next() {
131131
//! Some(Ok(mut record)) => {
132132
//! println!("next record");
@@ -275,7 +275,7 @@
275275
//!```
276276
//!
277277
278-
use anyhow::{anyhow, Context};
278+
use anyhow::{Context, anyhow};
279279
use bio::alphabets::dna::revcomp;
280280
use chrono::prelude::*;
281281
use itertools::Itertools;
@@ -681,8 +681,8 @@ where
681681
//println!("designated codon start {:?} {:?}", &codon_start, &locus_tag);
682682
}
683683
if self.line_buffer.contains("/gene=") {
684-
let gen: Vec<&str> = self.line_buffer.split('\"').collect();
685-
gene = gen[1].to_string();
684+
let genes: Vec<&str> = self.line_buffer.split('\"').collect();
685+
gene = genes[1].to_string();
686686
//println!("gene designated {:?} {:?}", &gene, &locus_tag);
687687
}
688688
if self.line_buffer.contains("/product") {
@@ -1732,7 +1732,7 @@ mod tests {
17321732
#[allow(unused_assignments)]
17331733
#[allow(unused_imports)]
17341734
fn test_read_file() {
1735-
let content = std::fs::read_to_string("K12_ribo.gbk").expect("error reading file");
1735+
let content = std::fs::read_to_string("tests/K12_ribo.gbk").expect("error reading file");
17361736
assert!(content.contains("LOCUS"));
17371737
assert!(content.len() > 0);
17381738
}
@@ -1743,7 +1743,7 @@ mod tests {
17431743
#[allow(unused_assignments)]
17441744
#[allow(unused_imports)]
17451745
fn test_parse_gbk() {
1746-
let file_gbk = "K12_ribo.gbk";
1746+
let file_gbk = "tests/K12_ribo.gbk";
17471747
let records = genbank!(&file_gbk);
17481748
assert!(records.len() > 0);
17491749
}
@@ -1754,7 +1754,7 @@ mod tests {
17541754
#[allow(unused_assignments)]
17551755
#[allow(unused_imports)]
17561756
fn test_parse_source_attributes() {
1757-
let file_gbk = "K12_ribo.gbk";
1757+
let file_gbk = "tests/K12_ribo.gbk";
17581758
let records = genbank!(&file_gbk);
17591759
if let Some(record) = records.first() {
17601760
if let Some((key, val)) = record.source_map.source_attributes.first_key_value() {
@@ -1769,7 +1769,7 @@ mod tests {
17691769
#[allow(unused_assignments)]
17701770
#[allow(unused_imports)]
17711771
fn test_parse_cds_attributes() {
1772-
let file_gbk = "K12_ribo.gbk";
1772+
let file_gbk = "tests/K12_ribo.gbk";
17731773
let records = genbank!(&file_gbk);
17741774
if let Some(record) = records.first() {
17751775
if let Some((locus_tag, vals)) = record.cds.attributes.first_key_value() {
@@ -1788,7 +1788,7 @@ mod tests {
17881788
#[allow(unused_assignments)]
17891789
#[allow(unused_imports)]
17901790
fn test_parse_sequence_attributes() {
1791-
let file_gbk = "K12_ribo.gbk";
1791+
let file_gbk = "tests/K12_ribo.gbk";
17921792
let records = genbank!(&file_gbk);
17931793
if let Some(record) = records.first() {
17941794
if let Some((key, vals)) = record.cds.attributes.first_key_value() {
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
>seq1
2+
ATGC-ATGCATGCATGC
3+
>seq2
4+
ATGCAATGCTTGCATGC
5+
>seq3
6+
TTGCAATCCATGCAAGC

0 commit comments

Comments
 (0)