@@ -834,8 +834,6 @@ parse_tcr <- function(.filename, .mode) {
834
834
}
835
835
836
836
parse_vdjtools <- function (.filename , .mode ) {
837
- skip <- 0
838
-
839
837
# Check for different VDJtools outputs
840
838
f <- file(.filename , " r" )
841
839
l <- readLines(f , 1 )
@@ -964,19 +962,28 @@ parse_airr <- function(.filename, .mode) {
964
962
.as_tsv() %> %
965
963
airr :: read_rearrangement()
966
964
967
- df <- df %> %
968
- select(
969
- sequence , v_call , d_call , j_call , junction , junction_aa ,
970
- contains(" v_germline_end" ), contains(" d_germline_start" ), contains(" d_germline_end" ),
971
- contains(" j_germline_start" ), contains(" np1_length" ), contains(" np2_length" ),
972
- contains(" duplicate_count" )
965
+ df %<> %
966
+ select_(
967
+ " sequence" , " v_call" , " d_call" , " j_call" , " junction" , " junction_aa" ,
968
+ ~ contains(" v_germline_end" ), ~ contains(" d_germline_start" ),
969
+ ~ contains(" d_germline_end" ), ~ contains(" j_germline_start" ),
970
+ ~ contains(" np1_length" ), ~ contains(" np2_length" ),
971
+ ~ contains(" duplicate_count" ),
972
+ " cdr1" , " cdr2" , " cdr1_aa" , " cdr2_aa" , " fwr1" , " fwr2" , " fwr3" , " fwr4" ,
973
+ " fwr1_aa" , " fwr2_aa" , " fwr3_aa" , " fwr4_aa"
973
974
)
974
975
975
976
namekey <- c(
976
977
duplicate_count = IMMCOL $ count , junction = IMMCOL $ cdr3nt , junction_aa = IMMCOL $ cdr3aa ,
977
978
v_call = IMMCOL $ v , d_call = IMMCOL $ d , j_call = IMMCOL $ j , v_germline_end = IMMCOL $ ve ,
978
979
d_germline_start = IMMCOL $ ds , d_germline_end = IMMCOL $ de , j_germline_start = IMMCOL $ js ,
979
- np1_length = " unidins" , np2_length = IMMCOL $ dnj , sequence = IMMCOL $ seq
980
+ np1_length = " unidins" , np2_length = IMMCOL $ dnj , sequence = IMMCOL $ seq ,
981
+ cdr1 = IMMCOL_EXT $ cdr1nt , cdr2 = IMMCOL_EXT $ cdr2nt ,
982
+ cdr1_aa = IMMCOL_EXT $ cdr1aa , cdr2_aa = IMMCOL_EXT $ cdr2aa ,
983
+ fwr1 = IMMCOL_EXT $ fr1nt , fwr2 = IMMCOL_EXT $ fr2nt ,
984
+ fwr3 = IMMCOL_EXT $ fr3nt , fwr4 = IMMCOL_EXT $ fr4nt ,
985
+ fwr1_aa = IMMCOL_EXT $ fr1aa , fwr2_aa = IMMCOL_EXT $ fr2aa ,
986
+ fwr3_aa = IMMCOL_EXT $ fr3aa , fwr4_aa = IMMCOL_EXT $ fr4aa
980
987
)
981
988
982
989
names(df ) <- namekey [names(df )]
@@ -998,13 +1005,15 @@ parse_airr <- function(.filename, .mode) {
998
1005
}
999
1006
}
1000
1007
1001
- for (column in IMMCOL $ order ) {
1008
+ order <- c(IMMCOL $ order , IMMCOL_EXT $ order [IMMCOL_EXT $ order %in% namekey ])
1009
+
1010
+ for (column in order ) {
1002
1011
if (! (column %in% colnames(df ))) {
1003
1012
df [column ] <- NA
1004
1013
}
1005
1014
}
1006
1015
1007
- df <- df [IMMCOL $ order ]
1016
+ df <- df [order ]
1008
1017
total <- sum(df $ Clones )
1009
1018
df [IMMCOL $ prop ] <- df [IMMCOL $ count ] / total
1010
1019
df [IMMCOL $ seq ] <- stringr :: str_remove_all(df [[IMMCOL $ seq ]], " N" )
@@ -1044,21 +1053,50 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
1044
1053
.vgenes = " v_gene" , .jgenes = " j_gene" , .dgenes = " d_gene" ,
1045
1054
.vend = NA , .jstart = NA , .dstart = NA , .dend = NA ,
1046
1055
.vd.insertions = NA , .dj.insertions = NA , .total.insertions = NA ,
1047
- .skip = 0 , .sep = " ," , # .add = c("chain", "raw_clonotype_id", "raw_consensus_id", "barcode", "contig_id")
1048
- .add = c(" chain" , " barcode" , " raw_clonotype_id" , " contig_id" , " c_gene" )
1056
+ .skip = 0 , .sep = " ," ,
1057
+ .add = c(
1058
+ " chain" , " barcode" , " raw_clonotype_id" , " contig_id" , " c_gene" ,
1059
+ " cdr1_nt" , " cdr1" , " cdr2_nt" , " cdr2" ,
1060
+ " fwr1_nt" , " fwr1" , " fwr2_nt" , " fwr2" , " fwr3_nt" , " fwr3" , " fwr4_nt" , " fwr4"
1061
+ )
1049
1062
)
1050
1063
1064
+ setnames(df , " cdr1_nt" , IMMCOL_EXT $ cdr1nt )
1065
+ setnames(df , " cdr2_nt" , IMMCOL_EXT $ cdr2nt )
1066
+ setnames(df , " cdr1" , IMMCOL_EXT $ cdr1aa )
1067
+ setnames(df , " cdr2" , IMMCOL_EXT $ cdr2aa )
1068
+ setnames(df , " fwr1_nt" , IMMCOL_EXT $ fr1nt )
1069
+ setnames(df , " fwr2_nt" , IMMCOL_EXT $ fr2nt )
1070
+ setnames(df , " fwr3_nt" , IMMCOL_EXT $ fr3nt )
1071
+ setnames(df , " fwr4_nt" , IMMCOL_EXT $ fr4nt )
1072
+ setnames(df , " fwr1" , IMMCOL_EXT $ fr1aa )
1073
+ setnames(df , " fwr2" , IMMCOL_EXT $ fr2aa )
1074
+ setnames(df , " fwr3" , IMMCOL_EXT $ fr3aa )
1075
+ setnames(df , " fwr4" , IMMCOL_EXT $ fr4aa )
1076
+
1051
1077
# Process 10xGenomics filtered contigs files - count barcodes, merge consensues ids, clonotype ids and contig ids
1052
1078
df <- df [order(df $ chain ), ]
1053
1079
setDT(df )
1054
1080
1055
1081
if (.mode == " paired" ) {
1056
1082
df %<> %
1057
1083
lazy_dt() %> %
1058
- group_by( barcode , raw_clonotype_id ) %> %
1084
+ group_by_colnames( " barcode" , " raw_clonotype_id" ) %> %
1059
1085
summarise(
1086
+ CDR1.nt = paste0(get(" CDR1.nt" ), collapse = IMMCOL_ADD $ scsep ),
1087
+ CDR1.aa = paste0(get(" CDR1.aa" ), collapse = IMMCOL_ADD $ scsep ),
1088
+ CDR2.nt = paste0(get(" CDR2.nt" ), collapse = IMMCOL_ADD $ scsep ),
1089
+ CDR2.aa = paste0(get(" CDR2.aa" ), collapse = IMMCOL_ADD $ scsep ),
1060
1090
CDR3.nt = paste0(get(" CDR3.nt" ), collapse = IMMCOL_ADD $ scsep ),
1061
1091
CDR3.aa = paste0(get(" CDR3.aa" ), collapse = IMMCOL_ADD $ scsep ),
1092
+ FR1.nt = paste0(get(" FR1.nt" ), collapse = IMMCOL_ADD $ scsep ),
1093
+ FR1.aa = paste0(get(" FR1.aa" ), collapse = IMMCOL_ADD $ scsep ),
1094
+ FR2.nt = paste0(get(" FR2.nt" ), collapse = IMMCOL_ADD $ scsep ),
1095
+ FR2.aa = paste0(get(" FR2.aa" ), collapse = IMMCOL_ADD $ scsep ),
1096
+ FR3.nt = paste0(get(" FR3.nt" ), collapse = IMMCOL_ADD $ scsep ),
1097
+ FR3.aa = paste0(get(" FR3.aa" ), collapse = IMMCOL_ADD $ scsep ),
1098
+ FR4.nt = paste0(get(" FR4.nt" ), collapse = IMMCOL_ADD $ scsep ),
1099
+ FR4.aa = paste0(get(" FR4.aa" ), collapse = IMMCOL_ADD $ scsep ),
1062
1100
V.name = paste0(get(" V.name" ), collapse = IMMCOL_ADD $ scsep ),
1063
1101
J.name = paste0(get(" J.name" ), collapse = IMMCOL_ADD $ scsep ),
1064
1102
D.name = paste0(get(" D.name" ), collapse = IMMCOL_ADD $ scsep ),
@@ -1079,7 +1117,7 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
1079
1117
V.name.sorted = sort_string(get(" V.name" ), IMMCOL_ADD $ scsep ),
1080
1118
J.name.sorted = sort_string(get(" J.name" ), IMMCOL_ADD $ scsep )
1081
1119
) %> %
1082
- group_by( CDR3.nt.sorted , V.name.sorted , J.name.sorted ) %> %
1120
+ group_by_colnames( " CDR3.nt.sorted" , " V.name.sorted" , " J.name.sorted" ) %> %
1083
1121
summarise(
1084
1122
Clones = length(unique(get(" barcode" ))),
1085
1123
CDR3.nt = first(get(" CDR3.nt" )),
@@ -1094,7 +1132,19 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
1094
1132
paste0(unique(get(" raw_clonotype_id" )), collapse = IMMCOL_ADD $ scsep )
1095
1133
),
1096
1134
contig_id = paste0(get(" contig_id" ), collapse = IMMCOL_ADD $ scsep ),
1097
- c_gene = first(get(" c_gene" ))
1135
+ c_gene = first(get(" c_gene" )),
1136
+ CDR1.nt = first(get(IMMCOL_EXT $ cdr1nt )),
1137
+ CDR2.nt = first(get(IMMCOL_EXT $ cdr2nt )),
1138
+ CDR1.aa = first(get(IMMCOL_EXT $ cdr1aa )),
1139
+ CDR2.aa = first(get(IMMCOL_EXT $ cdr2aa )),
1140
+ FR1.nt = first(get(IMMCOL_EXT $ fr1nt )),
1141
+ FR2.nt = first(get(IMMCOL_EXT $ fr2nt )),
1142
+ FR3.nt = first(get(IMMCOL_EXT $ fr3nt )),
1143
+ FR4.nt = first(get(IMMCOL_EXT $ fr4nt )),
1144
+ FR1.aa = first(get(IMMCOL_EXT $ fr1aa )),
1145
+ FR2.aa = first(get(IMMCOL_EXT $ fr2aa )),
1146
+ FR3.aa = first(get(IMMCOL_EXT $ fr3aa )),
1147
+ FR4.aa = first(get(IMMCOL_EXT $ fr4aa ))
1098
1148
) %> %
1099
1149
as.data.table() %> %
1100
1150
subset(
0 commit comments