Skip to content

Commit 3c0e799

Browse files
authored
Merge pull request #48 from databio/dev
Release `v0.1.0` - Fender stratocaster
2 parents 4162f72 + e3cac35 commit 3c0e799

File tree

130 files changed

+7930
-36
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

130 files changed

+7930
-36
lines changed

.github/workflows/CI.yml

+11-11
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
- name: Build wheels
2828
uses: PyO3/maturin-action@v1
2929
with:
30-
working-directory: ./bindings
30+
working-directory: ./bindings/python
3131
target: ${{ matrix.target }}
3232
args: --release --out dist --find-interpreter
3333
sccache: 'true'
@@ -36,7 +36,7 @@ jobs:
3636
uses: actions/upload-artifact@v3
3737
with:
3838
name: wheels
39-
path: ./bindings/dist
39+
path: ./bindings/python/dist
4040

4141
windows:
4242
runs-on: windows-latest
@@ -55,12 +55,12 @@ jobs:
5555
target: ${{ matrix.target }}
5656
args: --release --out dist --find-interpreter
5757
sccache: 'true'
58-
working-directory: ./bindings
58+
working-directory: ./bindings/python
5959
- name: Upload wheels
6060
uses: actions/upload-artifact@v3
6161
with:
6262
name: wheels
63-
path: ./bindings/dist
63+
path: ./bindings/python/dist
6464

6565
macos:
6666
runs-on: macos-latest
@@ -78,12 +78,12 @@ jobs:
7878
target: ${{ matrix.target }}
7979
args: --release --out dist --find-interpreter
8080
sccache: 'true'
81-
working-directory: ./bindings
81+
working-directory: ./bindings/python
8282
- name: Upload wheels
8383
uses: actions/upload-artifact@v3
8484
with:
8585
name: wheels
86-
path: ./bindings/dist
86+
path: ./bindings/python/dist
8787

8888
sdist:
8989
runs-on: ubuntu-latest
@@ -94,12 +94,12 @@ jobs:
9494
with:
9595
command: sdist
9696
args: --out dist
97-
working-directory: ./bindings
97+
working-directory: ./bindings/python
9898
- name: Upload sdist
9999
uses: actions/upload-artifact@v3
100100
with:
101101
name: wheels
102-
path: ./bindings/dist
102+
path: ./bindings/python/dist
103103

104104
release:
105105
name: Release
@@ -113,13 +113,13 @@ jobs:
113113
- uses: actions/download-artifact@v3
114114
with:
115115
name: wheels
116-
path: ./bindings/dist
116+
path: ./bindings/python/dist
117117
- name: List contents
118118
run: |
119119
echo "Contents of dist/"
120-
ls -l ./bindings/dist/
120+
ls -l ./bindings/python/dist/
121121
- name: Publish to PyPI
122122
uses: PyO3/maturin-action@v1
123123
with:
124124
command: upload
125-
args: --non-interactive --skip-existing ./bindings/dist/*
125+
args: --non-interactive --skip-existing ./bindings/python/dist/*

.github/workflows/R-CMD-check.yaml

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
on:
2+
push:
3+
branches:
4+
- master
5+
pull_request:
6+
branches:
7+
- master
8+
9+
name: R-CMD-check
10+
11+
jobs:
12+
R-CMD-check:
13+
runs-on: ${{ matrix.config.os }}
14+
name: ${{ matrix.config.os }} (R-${{ matrix.config.r }} rust-${{ matrix.config.rust-version }})
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
config:
19+
# - {os: windows-latest, r: 'release', rust-version: 'stable-msvc', rust-target: 'x86_64-pc-windows-gnu'}
20+
- {os: macOS-latest, r: 'release', rust-version: 'stable'}
21+
- {os: ubuntu-latest, r: 'release', rust-version: 'stable'}
22+
- {os: ubuntu-latest, r: 'devel', rust-version: 'stable'}
23+
env:
24+
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
25+
steps:
26+
- uses: actions/checkout@v2
27+
- uses: dtolnay/rust-toolchain@master
28+
with:
29+
toolchain: ${{ matrix.config.rust-version }}
30+
targets: ${{ matrix.config.rust-target }}
31+
- uses: r-lib/actions/setup-pandoc@v2
32+
- uses: r-lib/actions/setup-r@v2
33+
with:
34+
r-version: ${{ matrix.config.r }}
35+
use-public-rspm: true
36+
- uses: r-lib/actions/setup-r-dependencies@v2
37+
with:
38+
extra-packages: rcmdcheck
39+
working-directory: ${{ github.workspace }}/bindings/r
40+
- uses: r-lib/actions/check-r-package@v2
41+
with:
42+
working-directory: ${{ github.workspace }}/bindings/r

.gitignore

+10-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ Cargo.lock
1414
*.pdb
1515

1616
.venv
17-
17+
/.idea/genimtools.iml
18+
/.idea/modules.xml
19+
/.idea/.gitignore
20+
/.idea/vcs.xml
1821
# this is for "act"
19-
bin/
22+
bin/
23+
/.idea/gtars.iml
24+
/gtars/tests/data/test1.bw
25+
26+
.DS_Store
27+
.Rhistory

.vscode/settings.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"rust-analyzer.linkedProjects": [
33
"./gtars/Cargo.toml",
4-
"./bindings/Cargo.toml",
4+
"./bindings/python/Cargo.toml",
5+
"./bindings/r/src/rust/Cargo.toml",
56
]
67
}
File renamed without changes.

bindings/Cargo.toml bindings/python/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "gtars-py"
3-
version = "0.0.15"
3+
version = "0.1.0"
44
edition = "2021"
55

66
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -10,7 +10,7 @@ crate-type = ["cdylib"]
1010

1111
[dependencies]
1212
anyhow = "1.0.82"
13-
gtars = { path = "../gtars" }
13+
gtars = { path = "../../gtars" }
1414
pyo3 = { version = "0.21", features=["anyhow", "extension-module"] }
1515
numpy = "0.21"
1616
# pyo3-tch = { git = "https://github.com/LaurentMazare/tch-rs" }
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

bindings/python/src/igd/mod.rs

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
use pyo3::prelude::*;
2+
3+
use gtars::igd::search::igd_search;
4+
5+
#[pyclass(name="IGD")]
6+
pub struct IGD;
7+
8+
#[pymethods]
9+
impl IGD {
10+
11+
#[classmethod]
12+
pub fn search(database_path: String, query_file_path: String) {
13+
14+
igd_search(&database_path, &query_file_path).unwrap();
15+
16+
17+
}
18+
}

bindings/src/lib.rs bindings/python/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod ailist;
55
mod models;
66
mod tokenizers;
77
mod utils;
8+
mod igd;
89

910
pub const VERSION: &str = env!("CARGO_PKG_VERSION");
1011

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

bindings/src/utils/mod.rs bindings/python/src/utils/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,17 @@ pub fn read_tokens_from_gtok(filename: &str) -> PyResult<Vec<u32>> {
6565
Ok(tokens)
6666
}
6767

68+
#[pyfunction]
69+
pub fn read_tokens_from_gtok_as_strings(filename: &str) -> PyResult<Vec<String>> {
70+
let tokens = gtars::io::read_tokens_from_gtok(filename)?;
71+
let tokens = tokens.iter().map(|t| t.to_string()).collect();
72+
Ok(tokens)
73+
}
74+
6875
#[pymodule]
6976
pub fn utils(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
7077
m.add_wrapped(wrap_pyfunction!(write_tokens_to_gtok))?;
7178
m.add_wrapped(wrap_pyfunction!(read_tokens_from_gtok))?;
79+
m.add_wrapped(wrap_pyfunction!(read_tokens_from_gtok_as_strings))?;
7280
Ok(())
7381
}

bindings/r/.RData

2.88 KB
Binary file not shown.

bindings/r/.Rbuildignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
^src/\.cargo$
2+
^LICENSE\.md$

bindings/r/DESCRIPTION

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Package: gtars
2+
Title: Performance critical genomic interval analysis using Rust, in R
3+
Version: 0.0.0.9000
4+
Authors@R:
5+
person("Nathan", "LeRoy", , "[email protected]", role = c("aut", "cre"),
6+
comment = c(ORCID = "0000-0002-7354-7213"))
7+
Description: Performance-critical tools to manipulate, analyze, and process genomic interval data. Primarily focused on building tools for geniml - our genomic machine learning python package.
8+
License: MIT + file LICENSE
9+
Encoding: UTF-8
10+
Roxygen: list(markdown = TRUE)
11+
RoxygenNote: 7.3.2
12+
Config/rextendr/version: 0.3.1.9001
13+
SystemRequirements: Cargo (Rust's package manager), rustc

bindings/r/LICENSE

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
YEAR: 2024
2+
COPYRIGHT HOLDER: gtars authors

bindings/r/LICENSE.md

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# MIT License
2+
3+
Copyright (c) 2024 gtars authors
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

bindings/r/NAMESPACE

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(r_igd_create)
4+
export(r_igd_search)
5+
export(read_tokens_from_gtok)
6+
export(write_tokens_to_gtok)
7+
useDynLib(gtars, .registration = TRUE)

bindings/r/R/extendr-wrappers.R

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Generated by extendr: Do not edit by hand
2+
3+
# nolint start
4+
5+
#
6+
# This file was created with the following call:
7+
# .Call("wrap__make_gtars_wrappers", use_symbols = TRUE, package_name = "gtars")
8+
9+
#' @usage NULL
10+
#' @useDynLib gtars, .registration = TRUE
11+
NULL
12+
13+
`__init__` <- function() invisible(.Call(wrap____init__))
14+
15+
#' Write tokens to a gtok file
16+
#' @export
17+
#' @param filename A string representing the path to the gtok file.
18+
read_tokens_from_gtok <- function(filename) .Call(wrap__r_read_tokens_from_gtok, filename)
19+
20+
#' Write tokens to a gtok file
21+
#' @export
22+
#' @param filename A string representing the path to the gtok file.
23+
#' @param tokens The tokens to write.
24+
write_tokens_to_gtok <- function(filename, tokens) invisible(.Call(wrap__r_write_tokens_to_gtok, filename, tokens))
25+
26+
#' Create an IGD database from a directory of bed files
27+
#' @param output_path String path where the IGD database will be saved
28+
#' @param filelist String path to either a text file containing paths to bed files, or a directory containing bed files
29+
#' @param db_name String name for the database (will be used in output filenames)
30+
rextendr_igd_create <- function(output_path, filelist, db_name) .Call(wrap__rextendr_igd_create, output_path, filelist, db_name)
31+
32+
#' Search igd with a bed file
33+
#' @param database_path A string representing the path to the database igd file.
34+
#' @param query_path A string representing the path to the query bed file.
35+
rextendr_igd_search <- function(database_path, query_path) .Call(wrap__rextendr_igd_search, database_path, query_path)
36+
37+
38+
# nolint end

bindings/r/R/igd.R

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#' @useDynLib gtars, .registration = TRUE
2+
NULL
3+
4+
#' @title Create IGD Database
5+
#'
6+
#' @description Creates an IGD (Indexed Genomic Data) database from a collection of BED files.
7+
#'
8+
#' @param output_path Character string specifying the directory where the IGD database will be saved
9+
#' @param filelist Character string specifying either:
10+
#' - Path to a text file containing paths to BED files (one per line)
11+
#' - Path to a directory containing BED files
12+
#' - "-" or "stdin" to read paths from standard input
13+
#' @param db_name Character string specifying the name for the database (will be used in output filenames).
14+
#' Defaults to "igd_database"
15+
#'
16+
#' @return NULL invisibly on success
17+
#'
18+
#' @examples
19+
#' \dontrun{
20+
#' # Create database with default name
21+
#' igd_create("path/to/output", "path/to/bed/files")
22+
#' }
23+
#'
24+
#' @export
25+
r_igd_create <- function(output_path, filelist, db_name = "igd_database") {
26+
# Input validation
27+
if (!is.character(output_path) || length(output_path) != 1) {
28+
stop("output_path must be a single character string")
29+
}
30+
if (!is.character(filelist) || length(filelist) != 1) {
31+
stop("filelist must be a single character string")
32+
}
33+
34+
# Call Rust function
35+
.Call(wrap__rextendr_igd_create, output_path, filelist, db_name)
36+
37+
invisible(NULL)
38+
}
39+
40+
41+
#' @title Search IGD Database
42+
#'
43+
#' @description Searches an IGD database for region overlaps with an input BED file
44+
#'
45+
#' @param database_path path to .igd database
46+
#' @param query_path path to .bed file
47+
#'
48+
#' @return dataframe of overlap hits
49+
#'
50+
#' @examples
51+
#' \dontrun{
52+
#' }
53+
#'
54+
#' @export
55+
r_igd_search <- function(database_path, query_path) {
56+
57+
# Input validation
58+
if (!is.character(database_path) || length(database_path) != 1) {
59+
stop("database_path must be a single character string")
60+
}
61+
if (!is.character(query_path) || length(query_path) != 1) {
62+
stop("query_path must be a single character string")
63+
}
64+
65+
# Call Rust function
66+
chr_vector <- .Call(wrap__rextendr_igd_search, database_path, query_path)
67+
68+
split_result <- strsplit(chr_vector, split = '\t')
69+
df <- data.frame(matrix(unlist(split_result[-1]), nrow = length(chr_vector)-1, byrow = TRUE))
70+
colnames(df) <- split_result[[1]]
71+
72+
invisible(df)
73+
}

0 commit comments

Comments
 (0)