Skip to content

Commit afbd98d

Browse files
author
Aleksandr Popov
authored
Merge pull request #270 from immunomind/feature/tree-visualization
Clonal tree visualization
2 parents 02dd729 + 3adbd76 commit afbd98d

18 files changed

+505
-153
lines changed

DESCRIPTION

+2-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ Imports:
6565
glue,
6666
phangorn,
6767
uuid,
68-
stringi
68+
stringi,
69+
ggraph
6970
Depends:
7071
R (>= 4.0.0),
7172
ggplot2 (>= 3.1.0),

NAMESPACE

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(vis,clonal_family)
4+
S3method(vis,clonal_family_tree)
35
S3method(vis,immunr_chao1)
46
S3method(vis,immunr_clonal_prop)
57
S3method(vis,immunr_dbscan)
@@ -35,6 +37,7 @@ S3method(vis,immunr_spectr)
3537
S3method(vis,immunr_spectr_nogene)
3638
S3method(vis,immunr_top_prop)
3739
S3method(vis,immunr_tsne)
40+
S3method(vis,step_failure_ignored)
3841
export(apply_asymm)
3942
export(apply_symm)
4043
export(bunch_translate)
@@ -168,6 +171,10 @@ importFrom(ggpubr,ggscatter)
168171
importFrom(ggpubr,rotate_x_text)
169172
importFrom(ggpubr,stat_compare_means)
170173
importFrom(ggpubr,theme_pubr)
174+
importFrom(ggraph,geom_edge_diagonal)
175+
importFrom(ggraph,geom_node_point)
176+
importFrom(ggraph,ggraph)
177+
importFrom(ggraph,theme_graph)
171178
importFrom(ggseqlogo,geom_logo)
172179
importFrom(ggseqlogo,theme_logo)
173180
importFrom(glue,glue)
@@ -279,6 +286,7 @@ importFrom(stringr,str_sub)
279286
importFrom(stringr,str_trim)
280287
importFrom(tibble,rownames_to_column)
281288
importFrom(tibble,tibble)
289+
importFrom(tidyr,drop_na)
282290
importFrom(tidyr,unite)
283291
importFrom(tidyr,unnest)
284292
importFrom(tidyselect,starts_with)

R/align_lineage.R

+20-15
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
#' This function aligns all sequences (incliding germline) that belong to one clonal lineage and one cluster.
2-
#' After clustering and building the clonal lineage and germline, the next step is to analyze the degree of mutation
3-
#' and maturity of each clonal lineage. This allows for finding high mature cells and cells with a large
4-
#' number of offspring. The phylogenetic analysis will find mutations that increase the affinity of BCR.
5-
#' Making alignment of the sequence is the first step towards sequence analysis including BCR.
1+
#' Aligns all sequences incliding germline within each clonal lineage within each cluster
62
#'
73
#' @concept align_lineage
84
#'
@@ -18,7 +14,12 @@
1814
#' @importFrom doParallel registerDoParallel stopImplicitCluster
1915
#' @importFrom parallel mclapply
2016

21-
#' @description Aligns all sequences incliding germline within each clonal lineage within each cluster
17+
#' @description This function aligns all sequences (incliding germline) that belong to one clonal
18+
#' lineage and one cluster. After clustering and building the clonal lineage and germline, the next
19+
#' step is to analyze the degree of mutation and maturity of each clonal lineage. This allows for
20+
#' finding high mature cells and cells with a large number of offspring. The phylogenetic analysis
21+
#' will find mutations that increase the affinity of BCR. Making alignment of the sequence
22+
#' is the first step towards sequence analysis including BCR.
2223
#'
2324
#' @usage
2425
#'
@@ -62,9 +63,9 @@
6263
#' * Aligned (included if .verbose_output=TRUE): FALSE if this group of sequences was not aligned with lineage
6364
#' (.min_lineage_sequences is below the threshold); TRUE if it was aligned
6465
#' * Alignment: DNAbin object with alignment or DNAbin object with unaligned sequences (if Aligned=FALSE)
65-
#' * V.length (included if .verbose_output=TRUE): shortest length of V gene part outside of CDR3 region in this
66+
#' * V.length: shortest length of V gene part outside of CDR3 region in this
6667
#' group of sequences; longer V genes (including germline) are trimmed to this length before alignment
67-
#' * J.length (included if .verbose_output=TRUE): shortest length of J gene part outside of CDR3 region in this
68+
#' * J.length: shortest length of J gene part outside of CDR3 region in this
6869
#' group of sequences; longer J genes (including germline) are trimmed to this length before alignment
6970
#' * Sequences: nested dataframe containing all sequences for this combination
7071
#' of cluster and germline; it has columns
@@ -79,7 +80,7 @@
7980
#'
8081
#' bcr_data %>%
8182
#' seqCluster(seqDist(bcr_data), .fixed_threshold = 3) %>%
82-
#' repGermline(.threads = 2) %>%
83+
#' repGermline(.threads = 1) %>%
8384
#' repAlignLineage(.min_lineage_sequences = 2, .align_threads = 2, .nofail = TRUE)
8485
#' @export repAlignLineage
8586
repAlignLineage <- function(.data,
@@ -93,7 +94,7 @@ repAlignLineage <- function(.data,
9394
"Please download it from here: http://www.clustal.org/download/current/\n",
9495
"or install it with your system package manager (such as apt or dnf)."
9596
), .nofail)) {
96-
return(NA)
97+
return(get_empty_object_with_class("step_failure_ignored"))
9798
}
9899

99100
doParallel::registerDoParallel(cores = .prepare_threads)
@@ -190,9 +191,8 @@ prepare_results_row <- function(lineage_subset, .min_lineage_sequences, .verbose
190191
sequences[["Clone.ID"]] %<>% as.integer()
191192
sequences[["Clones"]] %<>% as.integer()
192193

193-
germline_parts <- strsplit(germline_seq, "N")[[1]]
194-
germline_v_len <- stringr::str_length(germline_parts[1])
195-
germline_j_len <- stringr::str_length(tail(germline_parts, 1))
194+
germline_v_len <- str_length(germline_v)
195+
germline_j_len <- str_length(germline_j)
196196
v_min_len <- min(lineage_subset[["V.lengths"]], germline_v_len)
197197
j_min_len <- min(lineage_subset[["J.lengths"]], germline_j_len)
198198

@@ -233,14 +233,16 @@ prepare_results_row <- function(lineage_subset, .min_lineage_sequences, .verbose
233233
J.germline.nt = germline_j,
234234
CDR3.germline.length = germline_cdr3_len,
235235
Alignment = alignment,
236+
V.length = v_min_len,
237+
J.length = j_min_len,
236238
Sequences = sequences
237239
))
238240
}
239241
}
240242

241243
# trim V/J tails in sequence to the specified lenghts v_min, j_min
242244
trim_seq <- function(seq, v_len, v_min, j_len, j_min) {
243-
stringr::str_sub(seq, v_len - v_min + 1, -(j_len - j_min + 1))
245+
str_sub(seq, v_len - v_min + 1, -(j_len - j_min + 1))
244246
}
245247

246248
convert_results_to_df <- function(nested_results_list, nested_alignments_list) {
@@ -254,7 +256,10 @@ convert_results_to_df <- function(nested_results_list, nested_alignments_list) {
254256
lapply(rlist::list.remove, c("Alignment", "Sequences")) %>%
255257
purrr::map_dfr(~.) %>%
256258
cbind(alignments, sequences)
257-
df[["CDR3.germline.length"]] %<>% as.integer()
259+
# fix column types after dataframe rebuilding
260+
for (column in c("CDR3.germline.length", "V.length", "J.length")) {
261+
df[[column]] %<>% as.integer()
262+
}
258263
return(df)
259264
}
260265

R/germline.R

+43-33
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
1-
#' This function creates germlines for clonal lineages. B cell clonal lineage represents a set of B cells
2-
#' that presumably have a common origin (arising from the same VDJ rearrangement event) and a common ancestor.
3-
#' Each clonal lineage has its own germline sequence that represents the ancestral sequence
4-
#' for each BCR in clonal lineage. In other words, germline sequence is a sequence of B-cells immediately
5-
#' after VDJ recombination, before B-cell maturation and hypermutation process. Germline sequence is useful
6-
#' for assessing the degree of mutation and maturity of the repertoire.
1+
#' Creates germlines for clonal lineages
72
#'
83
#' @concept germline
94
#'
@@ -16,7 +11,13 @@
1611
#' @importFrom parallel parApply detectCores makeCluster clusterExport stopCluster
1712
#' @importFrom ape as.DNAbin clustal
1813
#'
19-
#' @description Creates germlines for clonal lineages
14+
#' @description This function creates germlines for clonal lineages. B cell clonal lineage
15+
#' represents a set of B cells that presumably have a common origin (arising from the same VDJ
16+
#' rearrangement event) and a common ancestor. Each clonal lineage has its own germline sequence
17+
#' that represents the ancestral sequence for each BCR in clonal lineage. In other words,
18+
#' germline sequence is a sequence of B-cells immediately after VDJ recombination, before
19+
#' B-cell maturation and hypermutation process. Germline sequence is useful for assessing
20+
#' the degree of mutation and maturity of the repertoire.
2021
#'
2122
#' @usage
2223
#'
@@ -137,14 +138,16 @@ calculate_germlines_parallel <- function(data, align_j_gene, threads, sample_nam
137138
seq = row[["Sequence"]],
138139
v_ref = row[["V.ref.nt"]],
139140
j_ref = row[["J.ref.nt"]],
140-
v_end = str_length(row[["CDR1.nt"]]) + str_length(row[["CDR2.nt"]])
141-
+ str_length(row[["FR1.nt"]]) + str_length(row[["FR2.nt"]])
142-
+ str_length(row[["FR3.nt"]]),
141+
cdr1_nt = row[["CDR1.nt"]],
142+
cdr2_nt = row[["CDR2.nt"]],
143+
fr1_nt = row[["FR1.nt"]],
144+
fr2_nt = row[["FR2.nt"]],
145+
fr3_nt = row[["FR3.nt"]],
146+
fr4_nt = row[["FR4.nt"]],
143147
cdr3_start = as.integer(row[["CDR3.start"]]),
144148
cdr3_end = as.integer(row[["CDR3.end"]]),
145149
j_start = as.integer(row[["J.start"]]),
146150
j3_del = as.integer(row[["J3.Deletions"]]),
147-
fr4_seq = row[["FR4.nt"]],
148151
align_j_gene = align_j_gene,
149152
sample_name = sample_name
150153
)
@@ -157,43 +160,47 @@ calculate_germlines_parallel <- function(data, align_j_gene, threads, sample_nam
157160
return(data)
158161
}
159162

160-
generate_germline_sequence <- function(seq,
161-
v_ref,
162-
j_ref,
163-
v_end,
164-
cdr3_start,
165-
cdr3_end,
166-
j_start,
167-
j3_del,
168-
fr4_seq,
169-
align_j_gene,
170-
sample_name) {
171-
if (any(is.na(c(seq, v_ref, j_ref, v_end, cdr3_start, cdr3_end, j_start, j3_del, fr4_seq))) ||
163+
generate_germline_sequence <- function(seq, v_ref, j_ref, cdr1_nt, cdr2_nt,
164+
fr1_nt, fr2_nt, fr3_nt, fr4_nt,
165+
cdr3_start, cdr3_end, j_start, j3_del,
166+
align_j_gene, sample_name) {
167+
if (any(is.na(c(
168+
seq, v_ref, j_ref, cdr1_nt, cdr2_nt, fr1_nt, fr2_nt, fr3_nt, fr4_nt,
169+
cdr3_start, cdr3_end, j_start, j3_del
170+
))) ||
172171
(seq == "")) {
173172
# warnings cannot be displayed from parApply; save them and display after finish
174173
warn <- paste0(
175174
"Some of mandatory fields in a row ",
176175
optional_sample("from sample ", sample_name, " "),
177176
"contain unexpected NA or empty strings! Found values:\n",
178-
"Sequence = \"",
177+
"Sequence = ",
179178
seq,
180-
"\",\nV.ref.nt = \"",
179+
",\nV.ref.nt = ",
181180
v_ref,
182-
"\",\nJ.ref.nt = \"",
181+
",\nJ.ref.nt = ",
183182
j_ref,
184-
"\",\nCalculated_V_end = ",
185-
v_end,
186-
", CDR3.start = ",
183+
",\nCDR1.nt = ",
184+
cdr1_nt,
185+
",\nCDR2.nt = ",
186+
cdr2_nt,
187+
",\nFR1.nt = ",
188+
fr1_nt,
189+
",\nFR2.nt = ",
190+
fr2_nt,
191+
",\nFR3.nt = ",
192+
fr3_nt,
193+
",\nFR4.nt = ",
194+
fr4_nt,
195+
",\nCDR3.start = ",
187196
cdr3_start,
188197
", CDR3.end = ",
189198
cdr3_end,
190199
", J.start = ",
191200
j_start,
192201
", J3.Deletions = ",
193202
j3_del,
194-
",\nFR4.nt = \"",
195-
fr4_seq,
196-
"\".\nThe row will be dropped!"
203+
".\nThe row will be dropped!"
197204
)
198205
return(list(
199206
V.germline.nt = NA,
@@ -203,6 +210,8 @@ generate_germline_sequence <- function(seq,
203210
Warning = warn
204211
))
205212
} else {
213+
v_end <- str_length(cdr1_nt) + str_length(cdr2_nt) +
214+
str_length(fr1_nt) + str_length(fr2_nt) + str_length(fr3_nt)
206215
cdr3_length <- cdr3_end - cdr3_start
207216

208217
# trim intersection of V and CDR3 from reference V gene
@@ -212,7 +221,7 @@ generate_germline_sequence <- function(seq,
212221

213222
# trim intersection of J and CDR3 from reference J gene
214223
if (align_j_gene) {
215-
calculated_j_start <- align_and_find_j_start(j_ref, fr4_seq)
224+
calculated_j_start <- align_and_find_j_start(j_ref, fr4_nt)
216225
} else {
217226
calculated_j_start <- max(0, cdr3_end - j_start - j3_del + 1)
218227
}
@@ -402,6 +411,7 @@ align_and_find_j_start <- function(j_ref, fr4_seq, max_len_diff = 10) {
402411
germline_handle_warnings <- function(df) {
403412
warnings <- df$Warning
404413
warnings <- warnings[!is.na(warnings)]
414+
options(warning.length = 5000L)
405415
for (warn in warnings) {
406416
warning(warn)
407417
}

R/io-parsers.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ parse_repertoire <- function(.filename, .mode, .nuc.seq, .aa.seq, .count,
2626
)
2727

2828
# IO_REFACTOR
29-
suppressMessages(df <- readr::read_delim(.filename,
29+
df <- quiet(readr::read_delim(.filename,
3030
col_names = TRUE,
3131
col_types = col.classes, delim = .sep,
3232
quote = "", escape_double = FALSE,

0 commit comments

Comments
 (0)