1
- # ' This function creates germlines for clonal lineages. B cell clonal lineage represents a set of B cells
2
- # ' that presumably have a common origin (arising from the same VDJ rearrangement event) and a common ancestor.
3
- # ' Each clonal lineage has its own germline sequence that represents the ancestral sequence
4
- # ' for each BCR in clonal lineage. In other words, germline sequence is a sequence of B-cells immediately
5
- # ' after VDJ recombination, before B-cell maturation and hypermutation process. Germline sequence is useful
6
- # ' for assessing the degree of mutation and maturity of the repertoire.
1
+ # ' Creates germlines for clonal lineages
7
2
# '
8
3
# ' @concept germline
9
4
# '
16
11
# ' @importFrom parallel parApply detectCores makeCluster clusterExport stopCluster
17
12
# ' @importFrom ape as.DNAbin clustal
18
13
# '
19
- # ' @description Creates germlines for clonal lineages
14
+ # ' @description This function creates germlines for clonal lineages. B cell clonal lineage
15
+ # ' represents a set of B cells that presumably have a common origin (arising from the same VDJ
16
+ # ' rearrangement event) and a common ancestor. Each clonal lineage has its own germline sequence
17
+ # ' that represents the ancestral sequence for each BCR in clonal lineage. In other words,
18
+ # ' germline sequence is a sequence of B-cells immediately after VDJ recombination, before
19
+ # ' B-cell maturation and hypermutation process. Germline sequence is useful for assessing
20
+ # ' the degree of mutation and maturity of the repertoire.
20
21
# '
21
22
# ' @usage
22
23
# '
@@ -137,14 +138,16 @@ calculate_germlines_parallel <- function(data, align_j_gene, threads, sample_nam
137
138
seq = row [[" Sequence" ]],
138
139
v_ref = row [[" V.ref.nt" ]],
139
140
j_ref = row [[" J.ref.nt" ]],
140
- v_end = str_length(row [[" CDR1.nt" ]]) + str_length(row [[" CDR2.nt" ]])
141
- + str_length(row [[" FR1.nt" ]]) + str_length(row [[" FR2.nt" ]])
142
- + str_length(row [[" FR3.nt" ]]),
141
+ cdr1_nt = row [[" CDR1.nt" ]],
142
+ cdr2_nt = row [[" CDR2.nt" ]],
143
+ fr1_nt = row [[" FR1.nt" ]],
144
+ fr2_nt = row [[" FR2.nt" ]],
145
+ fr3_nt = row [[" FR3.nt" ]],
146
+ fr4_nt = row [[" FR4.nt" ]],
143
147
cdr3_start = as.integer(row [[" CDR3.start" ]]),
144
148
cdr3_end = as.integer(row [[" CDR3.end" ]]),
145
149
j_start = as.integer(row [[" J.start" ]]),
146
150
j3_del = as.integer(row [[" J3.Deletions" ]]),
147
- fr4_seq = row [[" FR4.nt" ]],
148
151
align_j_gene = align_j_gene ,
149
152
sample_name = sample_name
150
153
)
@@ -157,43 +160,47 @@ calculate_germlines_parallel <- function(data, align_j_gene, threads, sample_nam
157
160
return (data )
158
161
}
159
162
160
- generate_germline_sequence <- function (seq ,
161
- v_ref ,
162
- j_ref ,
163
- v_end ,
164
- cdr3_start ,
165
- cdr3_end ,
166
- j_start ,
167
- j3_del ,
168
- fr4_seq ,
169
- align_j_gene ,
170
- sample_name ) {
171
- if (any(is.na(c(seq , v_ref , j_ref , v_end , cdr3_start , cdr3_end , j_start , j3_del , fr4_seq ))) ||
163
+ generate_germline_sequence <- function (seq , v_ref , j_ref , cdr1_nt , cdr2_nt ,
164
+ fr1_nt , fr2_nt , fr3_nt , fr4_nt ,
165
+ cdr3_start , cdr3_end , j_start , j3_del ,
166
+ align_j_gene , sample_name ) {
167
+ if (any(is.na(c(
168
+ seq , v_ref , j_ref , cdr1_nt , cdr2_nt , fr1_nt , fr2_nt , fr3_nt , fr4_nt ,
169
+ cdr3_start , cdr3_end , j_start , j3_del
170
+ ))) ||
172
171
(seq == " " )) {
173
172
# warnings cannot be displayed from parApply; save them and display after finish
174
173
warn <- paste0(
175
174
" Some of mandatory fields in a row " ,
176
175
optional_sample(" from sample " , sample_name , " " ),
177
176
" contain unexpected NA or empty strings! Found values:\n " ,
178
- " Sequence = \" " ,
177
+ " Sequence = " ,
179
178
seq ,
180
- " \" ,\n V.ref.nt = \" " ,
179
+ " ,\n V.ref.nt = " ,
181
180
v_ref ,
182
- " \" ,\n J.ref.nt = \" " ,
181
+ " ,\n J.ref.nt = " ,
183
182
j_ref ,
184
- " \" ,\n Calculated_V_end = " ,
185
- v_end ,
186
- " , CDR3.start = " ,
183
+ " ,\n CDR1.nt = " ,
184
+ cdr1_nt ,
185
+ " ,\n CDR2.nt = " ,
186
+ cdr2_nt ,
187
+ " ,\n FR1.nt = " ,
188
+ fr1_nt ,
189
+ " ,\n FR2.nt = " ,
190
+ fr2_nt ,
191
+ " ,\n FR3.nt = " ,
192
+ fr3_nt ,
193
+ " ,\n FR4.nt = " ,
194
+ fr4_nt ,
195
+ " ,\n CDR3.start = " ,
187
196
cdr3_start ,
188
197
" , CDR3.end = " ,
189
198
cdr3_end ,
190
199
" , J.start = " ,
191
200
j_start ,
192
201
" , J3.Deletions = " ,
193
202
j3_del ,
194
- " ,\n FR4.nt = \" " ,
195
- fr4_seq ,
196
- " \" .\n The row will be dropped!"
203
+ " .\n The row will be dropped!"
197
204
)
198
205
return (list (
199
206
V.germline.nt = NA ,
@@ -203,6 +210,8 @@ generate_germline_sequence <- function(seq,
203
210
Warning = warn
204
211
))
205
212
} else {
213
+ v_end <- str_length(cdr1_nt ) + str_length(cdr2_nt ) +
214
+ str_length(fr1_nt ) + str_length(fr2_nt ) + str_length(fr3_nt )
206
215
cdr3_length <- cdr3_end - cdr3_start
207
216
208
217
# trim intersection of V and CDR3 from reference V gene
@@ -212,7 +221,7 @@ generate_germline_sequence <- function(seq,
212
221
213
222
# trim intersection of J and CDR3 from reference J gene
214
223
if (align_j_gene ) {
215
- calculated_j_start <- align_and_find_j_start(j_ref , fr4_seq )
224
+ calculated_j_start <- align_and_find_j_start(j_ref , fr4_nt )
216
225
} else {
217
226
calculated_j_start <- max(0 , cdr3_end - j_start - j3_del + 1 )
218
227
}
@@ -402,6 +411,7 @@ align_and_find_j_start <- function(j_ref, fr4_seq, max_len_diff = 10) {
402
411
germline_handle_warnings <- function (df ) {
403
412
warnings <- df $ Warning
404
413
warnings <- warnings [! is.na(warnings )]
414
+ options(warning.length = 5000L )
405
415
for (warn in warnings ) {
406
416
warning(warn )
407
417
}
0 commit comments