Skip to content

Commit 2b6d707

Browse files
author
Aleksandr Popov
authored
Merge pull request #246 from immunomind/repload-fixes
2 parents 946d85a + ad01d03 commit 2b6d707

File tree

3 files changed

+29
-56
lines changed

3 files changed

+29
-56
lines changed

R/io-parsers.R

+22-22
Original file line numberDiff line numberDiff line change
@@ -1040,8 +1040,6 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
10401040
.skip = 0, .sep = ",", # .add = c("chain", "raw_clonotype_id", "raw_consensus_id", "barcode", "contig_id")
10411041
.add = c("chain", "barcode", "raw_clonotype_id", "contig_id", "c_gene")
10421042
)
1043-
# setnames(df, "raw_clonotype_id", "RawClonotypeID")
1044-
# setnames(df, "raw_consensus_id", "RawConsensusID")
10451043

10461044
# Process 10xGenomics filtered contigs files - count barcodes, merge consensues ids, clonotype ids and contig ids
10471045
df <- df[order(df$chain), ]
@@ -1052,16 +1050,17 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
10521050
lazy_dt() %>%
10531051
group_by(barcode, raw_clonotype_id) %>%
10541052
summarise(
1055-
CDR3.nt = paste0(CDR3.nt, collapse = IMMCOL_ADD$scsep),
1056-
CDR3.aa = paste0(CDR3.aa, collapse = IMMCOL_ADD$scsep),
1057-
V.name = paste0(V.name, collapse = IMMCOL_ADD$scsep),
1058-
J.name = paste0(J.name, collapse = IMMCOL_ADD$scsep),
1059-
D.name = paste0(D.name, collapse = IMMCOL_ADD$scsep),
1060-
chain = paste0(chain, collapse = IMMCOL_ADD$scsep),
1061-
# raw_clonotype_id = gsub("clonotype", "", paste0(raw_clonotype_id, collapse = IMMCOL_ADD$scsep)),
1062-
# raw_consensus_id = gsub("clonotype|consensus", "", paste0(raw_consensus_id, collapse = IMMCOL_ADD$scsep)),
1063-
contig_id = gsub("_contig_", "", paste0(contig_id, collapse = IMMCOL_ADD$scsep)),
1064-
c_gene = paste0(c_gene, collapse = IMMCOL_ADD$scsep)
1053+
CDR3.nt = paste0(get("CDR3.nt"), collapse = IMMCOL_ADD$scsep),
1054+
CDR3.aa = paste0(get("CDR3.aa"), collapse = IMMCOL_ADD$scsep),
1055+
V.name = paste0(get("V.name"), collapse = IMMCOL_ADD$scsep),
1056+
J.name = paste0(get("J.name"), collapse = IMMCOL_ADD$scsep),
1057+
D.name = paste0(get("D.name"), collapse = IMMCOL_ADD$scsep),
1058+
chain = paste0(get("chain"), collapse = IMMCOL_ADD$scsep),
1059+
contig_id = gsub(
1060+
"_contig_", "",
1061+
paste0(get("contig_id"), collapse = IMMCOL_ADD$scsep)
1062+
),
1063+
c_gene = paste0(get("c_gene"), collapse = IMMCOL_ADD$scsep)
10651064
) %>%
10661065
as.data.table()
10671066
}
@@ -1070,16 +1069,17 @@ parse_10x_filt_contigs <- function(.filename, .mode) {
10701069
lazy_dt() %>%
10711070
group_by(CDR3.nt, V.name, J.name) %>%
10721071
summarise(
1073-
Clones = length(unique(barcode)),
1074-
CDR3.aa = first(CDR3.aa),
1075-
D.name = first(D.name),
1076-
chain = first(chain),
1077-
barcode = paste0(unique(barcode), collapse = IMMCOL_ADD$scsep),
1078-
raw_clonotype_id = gsub("clonotype|None", "", paste0(unique(raw_clonotype_id), collapse = IMMCOL_ADD$scsep)),
1079-
# raw_clonotype_id = gsub("clonotype", "", paste0(raw_clonotype_id, collapse = IMMCOL_ADD$scsep)),
1080-
# raw_consensus_id = gsub("clonotype|consensus", "", paste0(raw_consensus_id, collapse = IMMCOL_ADD$scsep)),
1081-
contig_id = paste0(contig_id, collapse = IMMCOL_ADD$scsep),
1082-
c_gene = first(c_gene)
1072+
Clones = length(unique(get("barcode"))),
1073+
CDR3.aa = first(get("CDR3.aa")),
1074+
D.name = first(get("D.name")),
1075+
chain = first(get("chain")),
1076+
barcode = paste0(unique(get("barcode")), collapse = IMMCOL_ADD$scsep),
1077+
raw_clonotype_id = gsub(
1078+
"clonotype|None", "",
1079+
paste0(unique(get("raw_clonotype_id")), collapse = IMMCOL_ADD$scsep)
1080+
),
1081+
contig_id = paste0(get("contig_id"), collapse = IMMCOL_ADD$scsep),
1082+
c_gene = first(get("c_gene"))
10831083
) %>%
10841084
as.data.table()
10851085

R/io.R

+6-25
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,6 @@ if (getRversion() >= "2.15.1") {
5151
#' R data frames with only one type of chain and cell presented. The metadata file will have additional columns specifying
5252
#' cell and chain types for different samples.
5353
#'
54-
#' @param .format A character string specifying what format to use. Do NOT use it. See "Details" for more information on supported formats.
55-
#'
56-
#' Leave NA (which is default) if you want `immunarch` to detect formats automatically.
57-
#'
5854
#' @param .mode Either "single" for single chain data or "paired" for paired chain data.
5955
#'
6056
#' Currently "single" works for every format, and "paired" works only for 10X Genomics data.
@@ -75,10 +71,6 @@ if (getRversion() >= "2.15.1") {
7571
#' immunoseq_3 \tab FALSE \tab 3 \tab A
7672
#' }
7773
#'
78-
#' \code{repLoad} has the ".format" argument that sets the format for input repertoire files.
79-
#' Immunarch detects the file format automatically, and the argument is left only for the compatability
80-
#' purposes. It will be soon removed. Do not pass it or your code will stop working!
81-
#'
8274
#' Currently, Immunarch support the following formats:
8375
#'
8476
#' - "immunoseq" - ImmunoSEQ of any version. http://www.adaptivebiotech.com/immunoseq
@@ -143,27 +135,16 @@ if (getRversion() >= "2.15.1") {
143135
#' # > names(immdata)
144136
#' # [1] "data" "meta"
145137
#' @export repLoad
146-
repLoad <- function(.path, .format = NA, .mode = "paired", .coding = TRUE) {
147-
if (!is.na(.format)) {
148-
warning("Please don't provide the .format argument,
149-
immunarch detects the format automatically.
150-
The .format argument will soon be removed.")
151-
}
152-
138+
repLoad <- function(.path, .mode = "paired", .coding = TRUE) {
153139
exclude_extensions <- c(
154140
"so", "exe", "bam", "fasta", "fai", "fastq", "bed", "rds", "report", "vdjca"
155141
)
156142

157143
# Process a repertoire file: detect format and load the data
158144
# Return: a named list with a repertoire data frame and it's name
159-
.read_repertoire <- function(.path, .format, .mode, .coding) {
145+
.read_repertoire <- function(.path, .mode, .coding) {
160146
parse_res <- list()
161-
162-
# Detect format
163-
cur_format <- .format
164-
if (is.na(.format)) {
165-
cur_format <- .detect_format(.path)
166-
}
147+
cur_format <- .detect_format(.path)
167148

168149
# Parse the file
169150
if (is.na(cur_format)) {
@@ -221,7 +202,7 @@ repLoad <- function(.path, .format = NA, .mode = "paired", .coding = TRUE) {
221202
# just load all repertoire files.
222203
# Do NOT (!) create a dummy metadata, return en empty data frame instead
223204
# Return: list with data, metadata and barcodes (if necessary)
224-
.process_batch <- function(.files, .format, .mode, .coding) {
205+
.process_batch <- function(.files, .mode, .coding) {
225206
parsed_batch <- list()
226207
metadata <- tibble()
227208

@@ -252,7 +233,7 @@ repLoad <- function(.path, .format = NA, .mode = "paired", .coding = TRUE) {
252233
} else if (stringr::str_detect(.filepath, "barcode")) {
253234
# TODO: add the barcode processing subroutine to split by samples
254235
} else {
255-
repertoire <- .read_repertoire(.filepath, .format, .mode, .coding)
236+
repertoire <- .read_repertoire(.filepath, .mode, .coding)
256237
if (length(repertoire) != 0) {
257238
parsed_batch <- c(parsed_batch, repertoire)
258239
}
@@ -358,7 +339,7 @@ repLoad <- function(.path, .format = NA, .mode = "paired", .coding = TRUE) {
358339
for (batch_i in seq_along(batches)) {
359340
if (length(batches[[batch_i]])) {
360341
message('Processing "', names(batches)[batch_i], '" ...')
361-
parsed_batches[[names(batches)[batch_i]]] <- .process_batch(batches[[batch_i]], .format, .mode, .coding)
342+
parsed_batches[[names(batches)[batch_i]]] <- .process_batch(batches[[batch_i]], .mode, .coding)
362343
}
363344
}
364345

man/repLoad.Rd

+1-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)