Skip to content

Commit 0b6544a

Browse files
authored
Merge pull request #403 from immunomind/dev
Dev
2 parents ac2c840 + 93b6e81 commit 0b6544a

27 files changed

+405
-827
lines changed

DESCRIPTION

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
Package: immunarch
22
Type: Package
33
Title: Bioinformatics Analysis of T-Cell and B-Cell Immune Repertoires
4-
Version: 0.9.0
4+
Version: 0.9.1
55
Authors@R: c(
66
person("Vadim I.", "Nazarov", , "[email protected]", c("aut", "cre")),
77
person("Vasily O.", "Tsvetkov", , role = "aut"),
8+
person("Siarhei", "Fiadziushchanka", , role = "aut"),
89
person("Eugene", "Rumynskiy", , role = "aut"),
910
person("Aleksandr A.", "Popov", , role = "aut"),
1011
person("Ivan", "Balashov", , role = "aut"),
@@ -23,7 +24,7 @@ Description: A comprehensive framework for bioinformatics exploratory analysis o
2324
and gene segments, repertoire diversity analysis, annotation of clonotypes using external immune receptor
2425
databases and clonotype tracking in vaccination and cancer studies. A successor to our
2526
previously published 'tcR' immunoinformatics package (Nazarov 2015) <doi:10.1186/s12859-015-0613-1>.
26-
License: AGPL-3
27+
License: Apache License (== 2.0)
2728
URL: https://immunarch.com/, https://github.com/immunomind/immunarch
2829
BugReports: https://github.com/immunomind/immunarch/issues
2930
Imports:
@@ -84,6 +85,6 @@ Suggests:
8485
rmarkdown
8586
VignetteBuilder: knitr
8687
Encoding: UTF-8
87-
RoxygenNote: 7.2.2
88+
RoxygenNote: 7.3.1
8889
LazyData: true
8990
LazyDataCompression: xz

LICENSE

+201-661
Large diffs are not rendered by default.

NAMESPACE

+9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(cosine_sim,default)
4+
S3method(cosine_sim,numeric)
5+
S3method(jaccard_index,character)
6+
S3method(jaccard_index,default)
7+
S3method(overlap_coef,character)
8+
S3method(overlap_coef,default)
9+
S3method(tversky_index,character)
10+
S3method(tversky_index,default)
311
S3method(vis,clonal_family)
412
S3method(vis,clonal_family_tree)
513
S3method(vis,immunr_chao1)
@@ -150,6 +158,7 @@ importFrom(dplyr,n)
150158
importFrom(dplyr,one_of)
151159
importFrom(dplyr,pull)
152160
importFrom(dplyr,rename)
161+
importFrom(dplyr,row_number)
153162
importFrom(dplyr,rowwise)
154163
importFrom(dplyr,select)
155164
importFrom(dplyr,select_)

R/RcppExports.R

+3-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

44
fill_vec <- function(read_vec, read_indices) {
5-
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
5+
.Call(`_immunarch_fill_vec`, read_vec, read_indices)
66
}
77

88
fill_reads <- function(new_reads, new_counts) {
9-
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
9+
.Call(`_immunarch_fill_reads`, new_reads, new_counts)
1010
}
11+

R/explore.R

-2
Original file line numberDiff line numberDiff line change
@@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .
145145

146146
res
147147
}
148-
149-
rep.ex <- repExplore

R/immunarch-remaster.R

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# .check_immundata <- function (.object) {
2+
# if (!is.instance(.object, "ImmunData")) {
3+
# stop("Error: the input object is not of class ImmunData. Immunarch works on ImmunData only. Helpful manual: ...")
4+
# }
5+
# }
6+
#
7+
# .repertoire_overlap <- function (.data, .method, .verbose = TRUE, ...) {
8+
# check_immundata(.data)
9+
# }
10+
#
11+
# .gene_usage <- function (.data, .gene, .type, .use_counts, .norm, .gene_vec) {
12+
#
13+
# }
14+
#
15+
# .repertoire_diversity <- function (.data, .method, .verbose = TRUE, ...) {
16+
#
17+
# }
18+
#
19+
# .track_clonotypes <- function () {
20+
#
21+
# }
22+
#
23+
# .public_repertoire <- function () {
24+
#
25+
# }

R/io-parsers.R

+19-7
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,10 @@ parse_repertoire <- function(.filename, .mode, .nuc.seq, .aa.seq, .count,
153153
.vend, .dstart, .dend, .jstart,
154154
.total.insertions, .vd.insertions, .dj.insertions
155155
)
156-
if (!is.na(.add[1])) {
156+
if (!has_no_data(.add)) {
157157
vec_names <- c(vec_names, .add)
158+
# add missing columns
159+
df %<>% add_empty_columns(.add[!(.add %in% colnames(df))])
158160
}
159161

160162
df <- df[, vec_names]
@@ -400,7 +402,7 @@ parse_mitcr <- function(.filename, .mode) {
400402
}
401403

402404
parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount")) {
403-
.filename <- .filename
405+
.filename %<>% .as_tsv()
404406
.id <- "cloneid"
405407
.count %<>% tolower()
406408
.sep <- "\t"
@@ -727,6 +729,11 @@ parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount"))
727729
}
728730
}
729731

732+
# fill cloneid column if it not exists
733+
if (!(.id %in% colnames(df))) {
734+
df %<>% mutate("{.id}" := row_number())
735+
}
736+
730737
df <- df[, make.names(df_columns)]
731738
colnames(df) <- df_column_names
732739

@@ -962,13 +969,18 @@ parse_airr <- function(.filename, .mode) {
962969
.as_tsv() %>%
963970
airr::read_rearrangement()
964971

972+
bcr_pipeline_columns <- c(
973+
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
974+
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
975+
)
965976
df %<>%
966-
select_(
977+
add_empty_columns(bcr_pipeline_columns[!(bcr_pipeline_columns %in% colnames(df))]) %>%
978+
select(
967979
"sequence", "v_call", "d_call", "j_call", "junction", "junction_aa",
968-
~contains("v_germline_end"), ~contains("d_germline_start"),
969-
~contains("d_germline_end"), ~contains("j_germline_start"),
970-
~contains("np1_length"), ~contains("np2_length"),
971-
~contains("duplicate_count"),
980+
contains("v_germline_end"), contains("d_germline_start"),
981+
contains("d_germline_end"), contains("j_germline_start"),
982+
contains("np1_length"), contains("np2_length"),
983+
contains("duplicate_count"),
972984
"cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
973985
"fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
974986
)

R/io-utility.R

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
.remove.ext <- function(.str) {
2-
# gsub(pattern = '.*/|[.].*$', replacement = '', x = .str)
3-
gsub(pattern = ".*/|[.](txt|tsv|csv)$|([.](txt|tsv|csv))?[.](gz|bzip|bzip2|bz2)$", replacement = "", x = .str)
2+
.str %<>% str_replace(".*/", "") %>%
3+
str_replace(".*\\\\", "") %>%
4+
str_replace("(\\.gz|\\.bzip|\\.bzip2|\\.bz2)$", "") %>%
5+
str_replace("(\\.txt|\\.tsv|\\.csv)$", "")
6+
return(.str)
47
}
58

69

R/io.R

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
2020
#' @importFrom jsonlite read_json
2121
#' @importFrom stringr str_split str_detect str_replace_all str_trim
2222
#' @importFrom methods as
23-
#' @importFrom dplyr contains first select_ group_by_at one_of
23+
#' @importFrom dplyr contains first select_ group_by_at one_of row_number
2424
#' @importFrom utils read.table
2525
#' @importFrom data.table setDF
2626
#'
@@ -291,13 +291,13 @@ repLoad <- function(.path, .mode = "paired", .coding = TRUE, ...) {
291291
missed_in_metadata <- setdiff(.metadata$Sample, .rep_names)
292292
if (length(missed_in_folders) || length(missed_in_metadata)) {
293293
if (length(missed_in_metadata)) {
294-
message(" -- Samples found in the metadata, but not in the folder:\n ", missed_in_metadata)
294+
message(" -- Samples found in the metadata, but not in the folder:\n ", toString(missed_in_metadata))
295295
message(" Did you correctly specify all the sample names in the metadata file?")
296296

297297
error_flag <- TRUE
298298
}
299299
if (length(missed_in_folders)) {
300-
message(" -- Samples found in the folder, but not in the metadata:\n ", missed_in_folders)
300+
message(" -- Samples found in the folder, but not in the metadata:\n ", toString(missed_in_folders))
301301
message(" Did you add all the necessary samples to the metadata file with correct names?")
302302
message(" Creating dummy sample records in the metadata for now...")
303303

R/overlap.R

+8
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
196196
UseMethod("overlap_coef")
197197
}
198198

199+
#' @export
199200
overlap_coef.default <- function(.x, .y) {
200201
.x <- collect(.x, n = Inf)
201202
.y <- collect(.y, n = Inf)
202203
nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
203204
}
204205

206+
#' @export
205207
overlap_coef.character <- function(.x, .y) {
206208
length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
207209
}
@@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
211213
UseMethod("jaccard_index")
212214
}
213215

216+
#' @export
214217
jaccard_index.default <- function(.x, .y) {
215218
.x <- collect(.x, n = Inf)
216219
.y <- collect(.y, n = Inf)
217220
intersection <- nrow(dplyr::intersect(.x, .y))
218221
intersection / (nrow(.x) + nrow(.y) - intersection)
219222
}
220223

224+
#' @export
221225
jaccard_index.character <- function(.x, .y) {
222226
intersection <- length(dplyr::intersect(.x, .y))
223227
intersection / (length(.x) + length(.y) - intersection)
@@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
227231
UseMethod("tversky_index")
228232
}
229233

234+
#' @export
230235
tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
231236
.x <- collect(.x, n = Inf)
232237
.y <- collect(.y, n = Inf)
233238
intersection <- nrow(dplyr::intersect(.x, .y))
234239
intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
235240
}
236241

242+
#' @export
237243
tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
238244
intersection <- length(dplyr::intersect(.x, .y))
239245
intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
@@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
243249
UseMethod("cosine_sim")
244250
}
245251

252+
#' @export
246253
cosine_sim.default <- function(.x, .y, .quant) {
247254
.x <- collect(.x, n = Inf)
248255
.y <- collect(.y, n = Inf)
@@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
258265
sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
259266
}
260267

268+
#' @export
261269
cosine_sim.numeric <- function(.x, .y, .quant) {
262270
df <- rbind(.x, .y)
263271
sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]

R/sampling.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
#'
3737
#' Note: each connection must represent a separate repertoire.
3838
#'
39-
#' @param .method Character. Name of a sampling method. See "Description" for more details. Default value is "downsample"
39+
#' @param .method Character. Name of a sampling method. See "Details" for more details. Default value is "downsample"
4040
#' that downsamples the repertoires to the number of clones (i.e., reads / UMIs) that the smallest repertoire has, if user
4141
#' doesn't set any value to the ".n" argument.
4242
#'

R/seqCluster.R

+5-4
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,11 @@ seqCluster <- function(.data, .dist, .perc_similarity, .nt_similarity, .fixed_th
143143
if (!all(is.na(grouping_cols))) {
144144
result_multi %<>% map2_df(., pmap(group_values, data.frame)[!singleseq_flag], ~ cbind(.x, .y))
145145
res <- rbind(result_single, result_multi)
146-
res[grouping_cols] <- str_split(str_split(res[["Cluster"]],
147-
pattern = "_", simplify = TRUE
148-
)[, 1],
149-
pattern = "/", simplify = TRUE
146+
res[grouping_cols] <- str_split(
147+
str_split(res[["Cluster"]],
148+
pattern = "_", simplify = TRUE
149+
)[, 1],
150+
pattern = "/", simplify = TRUE
150151
)[, seq_along(grouping_cols)]
151152
} else {
152153
result_multi %<>% map_df(., ~.x)

R/shiny.R

-1
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,6 @@ fixVis <- function(.plot = NA) {
343343
#
344344
server <- function(input, output, session) {
345345
create_plot <- function(input) {
346-
347346
# TODO: make automatic detection of available themes from ggplot2 and other packages
348347
choose_theme <- function(theme_label) {
349348
switch(theme_label,

R/tools.R

+11
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,17 @@ add_column_with_first_gene <- function(.data, .original_colname, .target_colname
596596
return(.data)
597597
}
598598

599+
# add columns filled with NA
600+
add_empty_columns <- function(.data, .colnames) {
601+
if (length(.colnames) > 0) {
602+
new_columns <- rep(list(NA), length(.colnames))
603+
names(new_columns) <- .colnames
604+
return(do.call(cbind, c(list(.data), new_columns)))
605+
} else {
606+
return(.data)
607+
}
608+
}
609+
599610
# used to add sample name to error/warning messages when sample name is available
600611
optional_sample <- function(prefix, sample_name, suffix) {
601612
if (is.na(sample_name) || (sample_name == "")) {

R/vis.R

+5-13
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ if (getRversion() >= "2.15.1") {
77
"Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
88
"Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
99
"AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
10-
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
10+
"CDR3.aa", "p.adj", "group1", "group2", "y.coord", ".SD",
1111
"name", "label", "."
1212
))
1313
}
@@ -47,15 +47,11 @@ if (getRversion() >= "2.15.1") {
4747

4848

4949
.tweak_fill <- function(.n) {
50-
palette_name <- ""
5150
if (.n == 1) {
5251
palette_name <- "Set2"
5352
} else if (.n == 2) {
5453
palette_name <- "Set1"
55-
}
56-
# else if (.n < 4) { palette_name = "YlGnBu" }
57-
# else if (.n < 6) { palette_name = "RdBu" }
58-
else if (.n < 12) {
54+
} else if (.n < 12) {
5955
palette_name <- "Spectral"
6056
} else {
6157
return(scale_fill_hue())
@@ -65,15 +61,11 @@ if (getRversion() >= "2.15.1") {
6561
}
6662

6763
.tweak_col <- function(.n) {
68-
palette_name <- ""
6964
if (.n == 1) {
7065
palette_name <- "Set2"
7166
} else if (.n == 2) {
7267
palette_name <- "Set1"
73-
}
74-
# else if (.n < 4) { palette_name = "YlGnBu" }
75-
# else if (.n < 6) { palette_name = "RdBu" }
76-
else if (.n < 12) {
68+
} else if (.n < 12) {
7769
palette_name <- "Spectral"
7870
} else {
7971
return(scale_colour_hue())
@@ -1469,7 +1461,7 @@ vis_box <- function(.data, .by = NA, .meta = NA, .melt = TRUE,
14691461
# print(p_df)
14701462

14711463
p <- p +
1472-
stat_compare_means(aes(label = ..p.adj..),
1464+
stat_compare_means(aes(label = after_stat(p.adj)),
14731465
bracket.size = .5, size = .signif.label.size,
14741466
label.y = max(.data$Value, na.rm = TRUE) * 1.07
14751467
)
@@ -2188,7 +2180,7 @@ vis_bar <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .
21882180
# print(p_df)
21892181

21902182
p <- p +
2191-
stat_compare_means(aes(label = ..p.adj..),
2183+
stat_compare_means(aes(label = after_stat(p.adj)),
21922184
bracket.size = .5, size = .signif.label.size,
21932185
label.y = max(.data$Value, na.rm = TRUE) * 1.07
21942186
)

0 commit comments

Comments
 (0)