immunomind
diff --git a/‎DESCRIPTION
+4-3 b/‎DESCRIPTION
+4-3
diff --git a/‎LICENSE
+201-661 b/‎LICENSE
+201-661
diff --git a/‎NAMESPACE
+9 b/‎NAMESPACE
+9
diff --git a/‎R/RcppExports.R
+3-2 b/‎R/RcppExports.R
+3-2
diff --git a/‎R/explore.R
-2 b/‎R/explore.R
-2
diff --git a/‎R/immunarch-remaster.R
+25 b/‎R/immunarch-remaster.R
+25
diff --git a/‎R/io-parsers.R
+19-7 b/‎R/io-parsers.R
+19-7
diff --git a/‎R/io-utility.R
+5-2 b/‎R/io-utility.R
+5-2
diff --git a/‎R/io.R
+3-3 b/‎R/io.R
+3-3
diff --git a/‎R/overlap.R
+8 b/‎R/overlap.R
+8
diff --git a/‎R/sampling.R
+1-1 b/‎R/sampling.R
+1-1
diff --git a/‎R/seqCluster.R
+5-4 b/‎R/seqCluster.R
+5-4
diff --git a/‎R/shiny.R
-1 b/‎R/shiny.R
-1
diff --git a/‎R/tools.R
+11 b/‎R/tools.R
+11
diff --git a/‎R/vis.R
+5-13 b/‎R/vis.R
+5-13
@@ -1,10 +1,11 @@
 Package: immunarch
 Type: Package
 Title: Bioinformatics Analysis of T-Cell and B-Cell Immune Repertoires
-Version: 0.9.0
+Version: 0.9.1
 Authors@R: c(
     person("Vadim I.", "Nazarov", , "[email protected]", c("aut", "cre")),
     person("Vasily O.", "Tsvetkov", , role = "aut"),
+    person("Siarhei", "Fiadziushchanka", , role = "aut"),
     person("Eugene", "Rumynskiy", , role = "aut"),
     person("Aleksandr A.", "Popov", , role = "aut"),
     person("Ivan", "Balashov", , role = "aut"),
@@ -23,7 +24,7 @@ Description: A comprehensive framework for bioinformatics exploratory analysis o
     and gene segments, repertoire diversity analysis, annotation of clonotypes using external immune receptor
     databases and clonotype tracking in vaccination and cancer studies. A successor to our
     previously published 'tcR' immunoinformatics package (Nazarov 2015) <doi:10.1186/s12859-015-0613-1>.
-License: AGPL-3
+License: Apache License (== 2.0)
 URL: https://immunarch.com/, https://github.com/immunomind/immunarch
 BugReports: https://github.com/immunomind/immunarch/issues
 Imports:
@@ -84,6 +85,6 @@ Suggests:
     rmarkdown
 VignetteBuilder: knitr
 Encoding: UTF-8
-RoxygenNote: 7.2.2
+RoxygenNote: 7.3.1
 LazyData: true
 LazyDataCompression: xz
@@ -1,5 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(cosine_sim,default)
+S3method(cosine_sim,numeric)
+S3method(jaccard_index,character)
+S3method(jaccard_index,default)
+S3method(overlap_coef,character)
+S3method(overlap_coef,default)
+S3method(tversky_index,character)
+S3method(tversky_index,default)
 S3method(vis,clonal_family)
 S3method(vis,clonal_family_tree)
 S3method(vis,immunr_chao1)
@@ -150,6 +158,7 @@ importFrom(dplyr,n)
 importFrom(dplyr,one_of)
 importFrom(dplyr,pull)
 importFrom(dplyr,rename)
+importFrom(dplyr,row_number)
 importFrom(dplyr,rowwise)
 importFrom(dplyr,select)
 importFrom(dplyr,select_)
 
@@ -2,9 +2,10 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 fill_vec <- function(read_vec, read_indices) {
-  .Call(`_immunarch_fill_vec`, read_vec, read_indices)
+    .Call(`_immunarch_fill_vec`, read_vec, read_indices)
 }
 
 fill_reads <- function(new_reads, new_counts) {
-  .Call(`_immunarch_fill_reads`, new_reads, new_counts)
+    .Call(`_immunarch_fill_reads`, new_reads, new_counts)
 }
+
@@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .
 
   res
 }
-
-rep.ex <- repExplore
@@ -0,0 +1,25 @@
+# .check_immundata <- function (.object) {
+#   if (!is.instance(.object, "ImmunData")) {
+#     stop("Error: the input object is not of class ImmunData. Immunarch works on ImmunData only. Helpful manual: ...")
+#   }
+# }
+#
+# .repertoire_overlap <- function (.data, .method, .verbose = TRUE, ...) {
+#   check_immundata(.data)
+# }
+#
+# .gene_usage <- function (.data, .gene, .type, .use_counts, .norm, .gene_vec) {
+#
+# }
+#
+# .repertoire_diversity <- function (.data, .method, .verbose = TRUE, ...) {
+#
+# }
+#
+# .track_clonotypes <- function () {
+#
+# }
+#
+# .public_repertoire <- function () {
+#
+# }
@@ -153,8 +153,10 @@ parse_repertoire <- function(.filename, .mode, .nuc.seq, .aa.seq, .count,
     .vend, .dstart, .dend, .jstart,
     .total.insertions, .vd.insertions, .dj.insertions
   )
-  if (!is.na(.add[1])) {
+  if (!has_no_data(.add)) {
     vec_names <- c(vec_names, .add)
+    # add missing columns
+    df %<>% add_empty_columns(.add[!(.add %in% colnames(df))])
   }
 
   df <- df[, vec_names]
@@ -400,7 +402,7 @@ parse_mitcr <- function(.filename, .mode) {
 }
 
 parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount")) {
-  .filename <- .filename
+  .filename %<>% .as_tsv()
   .id <- "cloneid"
   .count %<>% tolower()
   .sep <- "\t"
@@ -727,6 +729,11 @@ parse_mixcr <- function(.filename, .mode, .count = c("clonecount", "readcount"))
     }
   }
 
+  # fill cloneid column if it not exists
+  if (!(.id %in% colnames(df))) {
+    df %<>% mutate("{.id}" := row_number())
+  }
+
   df <- df[, make.names(df_columns)]
   colnames(df) <- df_column_names
 
@@ -962,13 +969,18 @@ parse_airr <- function(.filename, .mode) {
     .as_tsv() %>%
     airr::read_rearrangement()
 
+  bcr_pipeline_columns <- c(
+    "cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
+    "fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
+  )
   df %<>%
-    select_(
+    add_empty_columns(bcr_pipeline_columns[!(bcr_pipeline_columns %in% colnames(df))]) %>%
+    select(
       "sequence", "v_call", "d_call", "j_call", "junction", "junction_aa",
-      ~contains("v_germline_end"), ~contains("d_germline_start"),
-      ~contains("d_germline_end"), ~contains("j_germline_start"),
-      ~contains("np1_length"), ~contains("np2_length"),
-      ~contains("duplicate_count"),
+      contains("v_germline_end"), contains("d_germline_start"),
+      contains("d_germline_end"), contains("j_germline_start"),
+      contains("np1_length"), contains("np2_length"),
+      contains("duplicate_count"),
       "cdr1", "cdr2", "cdr1_aa", "cdr2_aa", "fwr1", "fwr2", "fwr3", "fwr4",
       "fwr1_aa", "fwr2_aa", "fwr3_aa", "fwr4_aa"
     )
 
@@ -1,6 +1,9 @@
 .remove.ext <- function(.str) {
-  # gsub(pattern = '.*/|[.].*$', replacement = '', x = .str)
-  gsub(pattern = ".*/|[.](txt|tsv|csv)$|([.](txt|tsv|csv))?[.](gz|bzip|bzip2|bz2)$", replacement = "", x = .str)
+  .str %<>% str_replace(".*/", "") %>%
+    str_replace(".*\\\\", "") %>%
+    str_replace("(\\.gz|\\.bzip|\\.bzip2|\\.bz2)$", "") %>%
+    str_replace("(\\.txt|\\.tsv|\\.csv)$", "")
+  return(.str)
 }
 
 
 
@@ -20,7 +20,7 @@ if (getRversion() >= "2.15.1") {
 #' @importFrom jsonlite read_json
 #' @importFrom stringr str_split str_detect str_replace_all str_trim
 #' @importFrom methods as
-#' @importFrom dplyr contains first select_ group_by_at one_of
+#' @importFrom dplyr contains first select_ group_by_at one_of row_number
 #' @importFrom utils read.table
 #' @importFrom data.table setDF
 #'
@@ -291,13 +291,13 @@ repLoad <- function(.path, .mode = "paired", .coding = TRUE, ...) {
     missed_in_metadata <- setdiff(.metadata$Sample, .rep_names)
     if (length(missed_in_folders) || length(missed_in_metadata)) {
       if (length(missed_in_metadata)) {
-        message("  -- Samples found in the metadata, but not in the folder:\n     ", missed_in_metadata)
+        message("  -- Samples found in the metadata, but not in the folder:\n     ", toString(missed_in_metadata))
         message("  Did you correctly specify all the sample names in the metadata file?")
 
         error_flag <- TRUE
       }
       if (length(missed_in_folders)) {
-        message("  -- Samples found in the folder, but not in the metadata:\n     ", missed_in_folders)
+        message("  -- Samples found in the folder, but not in the metadata:\n     ", toString(missed_in_folders))
         message("  Did you add all the necessary samples to the metadata file with correct names?")
         message("  Creating dummy sample records in the metadata for now...")
 
 
@@ -196,12 +196,14 @@ overlap_coef <- function(.x, .y) {
   UseMethod("overlap_coef")
 }
 
+#' @export
 overlap_coef.default <- function(.x, .y) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   nrow(dplyr::intersect(.x, .y)) / min(nrow(.x), nrow(.y))
 }
 
+#' @export
 overlap_coef.character <- function(.x, .y) {
   length(dplyr::intersect(.x, .y)) / min(length(.x), length(.y))
 }
@@ -211,13 +213,15 @@ jaccard_index <- function(.x, .y) {
   UseMethod("jaccard_index")
 }
 
+#' @export
 jaccard_index.default <- function(.x, .y) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   intersection <- nrow(dplyr::intersect(.x, .y))
   intersection / (nrow(.x) + nrow(.y) - intersection)
 }
 
+#' @export
 jaccard_index.character <- function(.x, .y) {
   intersection <- length(dplyr::intersect(.x, .y))
   intersection / (length(.x) + length(.y) - intersection)
@@ -227,13 +231,15 @@ tversky_index <- function(.x, .y, .a = .5, .b = .5) {
   UseMethod("tversky_index")
 }
 
+#' @export
 tversky_index.default <- function(.x, .y, .a = .5, .b = .5) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
   intersection <- nrow(dplyr::intersect(.x, .y))
   intersection / (.a * nrow(dplyr::setdiff(.x, .y)) + .b * nrow(dplyr::setdiff(.y, .x)) + intersection)
 }
 
+#' @export
 tversky_index.character <- function(.x, .y, .a = .5, .b = .5) {
   intersection <- length(dplyr::intersect(.x, .y))
   intersection / (.a * length(dplyr::setdiff(.x, .y)) + .b * length(dplyr::setdiff(.y, .x)) + intersection)
@@ -243,6 +249,7 @@ cosine_sim <- function(.x, .y, .quant) {
   UseMethod("cosine_sim")
 }
 
+#' @export
 cosine_sim.default <- function(.x, .y, .quant) {
   .x <- collect(.x, n = Inf)
   .y <- collect(.y, n = Inf)
@@ -258,6 +265,7 @@ cosine_sim.default <- function(.x, .y, .quant) {
   sum(first_col * second_col) / (sqrt(sum(first_col * first_col)) * sqrt(sum(second_col * second_col)))
 }
 
+#' @export
 cosine_sim.numeric <- function(.x, .y, .quant) {
   df <- rbind(.x, .y)
   sum(.x * .y) / (sqrt(rowSums(df^2))[1] * sqrt(rowSums(df^2))[2])[[1]]
 
@@ -36,7 +36,7 @@
 #'
 #' Note: each connection must represent a separate repertoire.
 #'
-#' @param .method Character. Name of a sampling method. See "Description" for more details. Default value is "downsample"
+#' @param .method Character. Name of a sampling method. See "Details" for more details. Default value is "downsample"
 #' that downsamples the repertoires to the number of clones (i.e., reads / UMIs) that the smallest repertoire has, if user
 #' doesn't set any value to the ".n" argument.
 #'
 
@@ -143,10 +143,11 @@ seqCluster <- function(.data, .dist, .perc_similarity, .nt_similarity, .fixed_th
     if (!all(is.na(grouping_cols))) {
       result_multi %<>% map2_df(., pmap(group_values, data.frame)[!singleseq_flag], ~ cbind(.x, .y))
       res <- rbind(result_single, result_multi)
-      res[grouping_cols] <- str_split(str_split(res[["Cluster"]],
-        pattern = "_", simplify = TRUE
-      )[, 1],
-      pattern = "/", simplify = TRUE
+      res[grouping_cols] <- str_split(
+        str_split(res[["Cluster"]],
+          pattern = "_", simplify = TRUE
+        )[, 1],
+        pattern = "/", simplify = TRUE
       )[, seq_along(grouping_cols)]
     } else {
       result_multi %<>% map_df(., ~.x)
 
@@ -343,7 +343,6 @@ fixVis <- function(.plot = NA) {
   #
   server <- function(input, output, session) {
     create_plot <- function(input) {
-
       # TODO: make automatic detection of available themes from ggplot2 and other packages
       choose_theme <- function(theme_label) {
         switch(theme_label,
 
@@ -596,6 +596,17 @@ add_column_with_first_gene <- function(.data, .original_colname, .target_colname
   return(.data)
 }
 
+# add columns filled with NA
+add_empty_columns <- function(.data, .colnames) {
+  if (length(.colnames) > 0) {
+    new_columns <- rep(list(NA), length(.colnames))
+    names(new_columns) <- .colnames
+    return(do.call(cbind, c(list(.data), new_columns)))
+  } else {
+    return(.data)
+  }
+}
+
 # used to add sample name to error/warning messages when sample name is available
 optional_sample <- function(prefix, sample_name, suffix) {
   if (is.na(sample_name) || (sample_name == "")) {
 
@@ -7,7 +7,7 @@ if (getRversion() >= "2.15.1") {
     "Overlap", "head", "Mean", "MeanVal", "MinVal", "MaxVal",
     "Q1", "Q2", "Type", "Length", "Gene", "Freq", "Sequence",
     "AA", "Clones", "Source.gr", "Target.gr", "Samples", "Samples.y",
-    "CDR3.aa", "p.adj", "group1", "group2", "y.coord", "..p.adj..", ".SD",
+    "CDR3.aa", "p.adj", "group1", "group2", "y.coord", ".SD",
     "name", "label", "."
   ))
 }
@@ -47,15 +47,11 @@ if (getRversion() >= "2.15.1") {
 
 
 .tweak_fill <- function(.n) {
-  palette_name <- ""
   if (.n == 1) {
     palette_name <- "Set2"
   } else if (.n == 2) {
     palette_name <- "Set1"
-  }
-  # else if (.n < 4) { palette_name = "YlGnBu" }
-  # else if (.n < 6) {  palette_name = "RdBu" }
-  else if (.n < 12) {
+  } else if (.n < 12) {
     palette_name <- "Spectral"
   } else {
     return(scale_fill_hue())
@@ -65,15 +61,11 @@ if (getRversion() >= "2.15.1") {
 }
 
 .tweak_col <- function(.n) {
-  palette_name <- ""
   if (.n == 1) {
     palette_name <- "Set2"
   } else if (.n == 2) {
     palette_name <- "Set1"
-  }
-  # else if (.n < 4) { palette_name = "YlGnBu" }
-  # else if (.n < 6) {  palette_name = "RdBu" }
-  else if (.n < 12) {
+  } else if (.n < 12) {
     palette_name <- "Spectral"
   } else {
     return(scale_colour_hue())
@@ -1469,7 +1461,7 @@ vis_box <- function(.data, .by = NA, .meta = NA, .melt = TRUE,
         # print(p_df)
 
         p <- p +
-          stat_compare_means(aes(label = ..p.adj..),
+          stat_compare_means(aes(label = after_stat(p.adj)),
             bracket.size = .5, size = .signif.label.size,
             label.y = max(.data$Value, na.rm = TRUE) * 1.07
           )
@@ -2188,7 +2180,7 @@ vis_bar <- function(.data, .by = NA, .meta = NA, .errorbars = c(0.025, 0.975), .
         # print(p_df)
 
         p <- p +
-          stat_compare_means(aes(label = ..p.adj..),
+          stat_compare_means(aes(label = after_stat(p.adj)),
             bracket.size = .5, size = .signif.label.size,
             label.y = max(.data$Value, na.rm = TRUE) * 1.07
           )
Original file line number	Diff line number	Diff line change
`@@ -2,9 +2,10 @@`
`2`	`2`	`# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393`
`3`	`3`
`4`	`4`	`fill_vec <- function(read_vec, read_indices) {`
`5`		- .Call(`_immunarch_fill_vec`, read_vec, read_indices)
	`5`	+ .Call(`_immunarch_fill_vec`, read_vec, read_indices)
`6`	`6`	`}`
`7`	`7`
`8`	`8`	`fill_reads <- function(new_reads, new_counts) {`
`9`		- .Call(`_immunarch_fill_reads`, new_reads, new_counts)
	`9`	+ .Call(`_immunarch_fill_reads`, new_reads, new_counts)
`10`	`10`	`}`
	`11`	`+`
Original file line number	Diff line number	Diff line change
`@@ -145,5 +145,3 @@ repExplore <- function(.data, .method = c("volume", "count", "len", "clones"), .`
`145`	`145`
`146`	`146`	`res`
`147`	`147`	`}`
`148`		`-`
`149`		`-rep.ex <- repExplore`
Original file line number	Diff line number	Diff line change
`@@ -343,7 +343,6 @@ fixVis <- function(.plot = NA) {`
`343`	`343`	`#`
`344`	`344`	`server <- function(input, output, session) {`
`345`	`345`	`create_plot <- function(input) {`
`346`		`-`
`347`	`346`	`# TODO: make automatic detection of available themes from ggplot2 and other packages`
`348`	`347`	`choose_theme <- function(theme_label) {`
`349`	`348`	`switch(theme_label,`