From 5fe5b8979b96246c0e6903627ed7436883930670 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 18:07:02 +0200
Subject: [PATCH 01/24] updates for Vizgen

---
 R/preprocessing.R | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 45baa4254..63a6e6cc7 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -1,5 +1,7 @@
 #' @include generics.R
 #' @importFrom progressr progressor
+#' @import dplyr
+#' @importFrom magrittr %>% %<>%
 #'
 NULL
 
@@ -2393,6 +2395,21 @@ ReadVitessce <- function(
 #'  \item \dQuote{fov}: cell's fov
 #' }
 #' @param z Z-index to load; must be between 0 and 6, inclusive
+#' @param use.BiocParallel If to use \code{BiocParallel::bplapply()}, 
+#' default is \code{TRUE}, if \code{FALSE}, uses \code{future} library
+#' @param workers.total Number of cores to use for \code{BiocParallel::bplapply()}
+#' @param DTthreads.pct Set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
+#' if set to \code{NULL} will use default setting as in \code{data.table::getDTthreads(verbose = T)}
+#' @param coord.space;
+#' choose one or more of:
+#' \itemize{
+#'  \item \dQuote{pixel}: molecule coordinates in pixel space
+#'  \item \dQuote{micron}: molecule coordinates in micron space
+#' }
+#' @param use.cellpose.out If TRUE, and ./Cellpose folder exists, will load results from current MERSCOPE Instrument output. Default to TRUE. Set to FALSE if to use previous outputs (ie. non-Cellpose).
+#' @param add.zIndex If to add \code{z} slice index to a cell
+#' @param update.object If to update final object, default to TRUE.
+#' @param ... Arguments passed to \code{ReadVizgen()}
 #'
 #' @return \code{ReadVizgen}: A list with some combination of the
 #' following values:
@@ -2435,6 +2452,12 @@ ReadVizgen <- function(
   molecules = NULL,
   type = 'segmentations',
   mol.type = 'microns',
+  use.cellpose.out = TRUE,
+  coord.space = "micron", 
+  metadata = NULL, 
+  use.BiocParallel = TRUE, 
+  workers.total = 12, 
+  DTthreads.pct = NULL,
   metadata = NULL,
   filter = NA_character_,
   z = 3L
@@ -2443,7 +2466,22 @@ ReadVizgen <- function(
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
   }
-  # hdf5r is only used for loading polygon boundaries
+  if (!requireNamespace("BiocParallel", quietly = TRUE)) {
+      stop("Please install 'BiocParallel' for parallelization", call. = FALSE)
+    }
+    
+    if (!requireNamespace("sfarrow", quietly = TRUE)) {
+      stop("Please install 'sfarrow' for reading cell boundaries from `.parquet` files ",
+		   call. = FALSE)
+    }
+	
+	
+	
+	
+	
+	
+	
+	# hdf5r is only used for loading polygon boundaries
   # Not needed for all Vizgen input
   hdf5 <- requireNamespace("hdf5r", quietly = TRUE)
   # Argument checking

From aa32cc66c3258b0fe16b73919648a62f30015846 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 18:12:49 +0200
Subject: [PATCH 02/24] updates for Vizgen

---
 R/convenience.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/convenience.R b/R/convenience.R
index ff0e91900..775591ff7 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -1,5 +1,6 @@
 #' @include generics.R
 #' @include visualization.R
+#' @importFrom magrittr %>% %<>%
 #'
 NULL
 

From 56d983c3e5e9b2106e3fac26384d9857b2aadc66 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 19:23:25 +0200
Subject: [PATCH 03/24] updates for `ReadVizgen()`

---
 R/preprocessing.R | 560 +++++++++++++++++++++++++++-------------------
 1 file changed, 334 insertions(+), 226 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 63a6e6cc7..d65642402 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2446,42 +2446,59 @@ ReadVitessce <- function(
 #' @template note-reqdpkg
 #'
 ReadVizgen <- function(
-  data.dir,
-  transcripts = NULL,
-  spatial = NULL,
-  molecules = NULL,
-  type = 'segmentations',
-  mol.type = 'microns',
-  use.cellpose.out = TRUE,
-  coord.space = "micron", 
-  metadata = NULL, 
-  use.BiocParallel = TRUE, 
-  workers.total = 12, 
-  DTthreads.pct = NULL,
-  metadata = NULL,
-  filter = NA_character_,
-  z = 3L
+    data.dir,
+    transcripts = NULL,
+    spatial = NULL,
+    molecules = NULL,
+    type = 'segmentations',
+    mol.type = 'microns',
+    use.cellpose.out = TRUE,
+    coord.space = "micron", 
+    metadata = NULL, 
+    use.BiocParallel = TRUE, 
+    workers.total = 12, 
+    DTthreads.pct = NULL,
+    metadata = NULL,
+    filter = NA_character_,
+    z = 3L
 ) {
+  
   # TODO: handle multiple segmentations per z-plane
+  
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
   }
   if (!requireNamespace("BiocParallel", quietly = TRUE)) {
-      stop("Please install 'BiocParallel' for parallelization", call. = FALSE)
-    }
-    
-    if (!requireNamespace("sfarrow", quietly = TRUE)) {
-      stop("Please install 'sfarrow' for reading cell boundaries from `.parquet` files ",
-		   call. = FALSE)
-    }
-	
-	
-	
-	
-	
-	
-	
-	# hdf5r is only used for loading polygon boundaries
+    stop("Please install 'BiocParallel' for parallelization")
+  }
+  
+  if (!requireNamespace("sfarrow", quietly = TRUE)) {
+    stop("Please install 'sfarrow' for reading cell boundaries from `.parquet` files")
+  }
+  
+  # setting workers to use for parallel computing - `parallel` ----
+  if (use.BiocParallel) {
+    message("Using parallelization with: `BiocParallel`")
+    if (is.null(workers.total)) { 
+      workers.total <- quantile(BiocParallel::multicoreWorkers() %>% seq)[4] %>% ceiling
+      message(workers.total, " of total workers available will be used")
+    } else { message("Setting total workers to: ", workers.total) }   
+  } else { message("Using parallelization with: `future`", "\n",
+                   "NOTE: set workers for parallelization, eg: `future::plan('multisession', workers = 10)`")
+  }
+  
+  # setting cores to use for parallel computing - `data.table`
+  if (!is.null(DTthreads.pct)) {
+    message("Using parallelization with: `data.table`", "\n",
+            "..for `data.table::fread`")
+    data.table::setDTthreads(threads = 0) # all cores
+    DTthreads <- data.table::getDTthreads() # max cores
+    DTthreads <- c((DTthreads * DTthreads.pct) / 100) %>% ceiling # percentage from total threads
+    message("Setting DTthreads to: ", DTthreads, " (", paste0(DTthreads.pct, "%"), ")")
+    data.table::setDTthreads(threads = DTthreads) # set
+  }
+  
+  # hdf5r is only used for loading polygon boundaries
   # Not needed for all Vizgen input
   hdf5 <- requireNamespace("hdf5r", quietly = TRUE)
   # Argument checking
@@ -2515,12 +2532,26 @@ ReadVizgen <- function(
   if (use.dir && !dir.exists(paths = data.dir)) {
     stop("Cannot find Vizgen directory ", data.dir)
   }
-  # Identify input files
-  files <- c(
-    transcripts = transcripts %||% 'cell_by_gene[_a-zA-Z0-9]*.csv',
-    spatial = spatial %||% 'cell_metadata[_a-zA-Z0-9]*.csv',
-    molecules = molecules %||% 'detected_transcripts[_a-zA-Z0-9]*.csv'
-  )
+  
+  # use Cellpose output ----
+  if (use.cellpose.out && grep("Cellpose", list.files(data.dir)) %>% any) {
+    message("Cellpose output will be used..")
+    # Identify input files
+    files <- c(transcripts = transcripts %||% paste0(data.dir, 
+                                                     paste0("/", list.files(data.dir, pattern = "Cellpose"), 
+                                                            "/cellpose_cell_by_gene.csv")),
+               
+               spatial = spatial %||% paste0(data.dir, 
+                                             paste0("/", list.files(data.dir, pattern = "Cellpose"), 
+                                                    "/cellpose_cell_metadata.csv")),
+               
+               molecules = molecules %||% "detected_transcripts[_a-zA-Z0-9]*.csv")
+  } else { 
+    files <- c(transcripts = transcripts %||% "cell_by_gene[_a-zA-Z0-9]*.csv",
+               spatial = spatial %||% "cell_metadata[_a-zA-Z0-9]*.csv",
+               molecules = molecules %||% "detected_transcripts[_a-zA-Z0-9]*.csv")
+  }
+  
   files[is.na(x = files)] <- NA_character_
   h5dir <- file.path(
     ifelse(
@@ -2575,16 +2606,28 @@ ReadVizgen <- function(
     sp <- sp[, -1, drop = FALSE]
     # Check to see if we should load segmentations
     if ('segmentations' %in% type) {
-      poly <- if (isFALSE(x = hdf5)) {
+      poly <- if (isFALSE(x = hdf5) && !use.cellpose.out) {
         warning(
           "Cannot find hdf5r; unable to load segmentation vertices",
           immediate. = TRUE
         )
         FALSE
-      } else if (!dir.exists(paths = h5dir)) {
+      } else if (!dir.exists(paths = h5dir) && !use.cellpose.out) {
         warning("Cannot find cell boundary H5 files", immediate. = TRUE)
         FALSE
-      } else {
+      } else if (use.cellpose.out) { # added for Cellpose output ----
+        files2scan <-
+          list.files(data.dir, pattern = ".parquet$", 
+                     all.files = TRUE, 
+                     full.names = TRUE, 
+                     recursive = TRUE)
+        if (length(files2scan)) { 
+          message("Cell segmentations are found in `.parquet` file(s)", "\n", 
+                  "..using ", coord.space, " space coordinates")
+        }
+        TRUE
+      }
+      else {
         TRUE
       }
       if (isFALSE(x = poly)) {
@@ -2632,199 +2675,264 @@ ReadVizgen <- function(
     files <- files[setdiff(x = names(x = files), y = 'molecules')]
   }
   files <- files[!is.na(x = files)]
-  # Read input data
+  
+  # Read input data ----
   outs <- vector(mode = 'list', length = length(x = files))
   names(x = outs) <- names(x = files)
   if (!is.null(metadata)) {
     outs <- c(outs, list(metadata = NULL))
   }
   for (otype in names(x = outs)) {
-    outs[[otype]] <- switch(
-      EXPR = otype,
-      'transcripts' = {
-        ptx <- progressor()
-        ptx(message = 'Reading counts matrix', class = 'sticky', amount = 0)
-        tx <- data.table::fread(
-          file = files[[otype]],
-          sep = ',',
-          data.table = FALSE,
-          verbose = FALSE
-        )
-        rownames(x = tx) <- as.character(x = tx[, 1])
-        tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE]))
-        if (!is.na(x = filter)) {
-          ptx(
-            message = paste("Filtering genes with pattern", filter),
-            class = 'sticky',
-            amount = 0
-          )
-          tx <- tx[!grepl(pattern = filter, x = rownames(x = tx)), , drop = FALSE]
-        }
-        ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4)
-        if ((sum(tx == 0) / length(x = tx)) > ratio) {
-          ptx(
-            message = 'Converting counts to sparse matrix',
-            class = 'sticky',
-            amount = 0
-          )
-          tx <- as.sparse(x = tx)
-        }
-        ptx(type = 'finish')
-        tx
-      },
-      'centroids' = {
-        pcents <- progressor()
-        pcents(
-          message = 'Creating centroid coordinates',
-          class = 'sticky',
-          amount = 0
-        )
-        pcents(type = 'finish')
-        data.frame(
-          x = sp$center_x,
-          y = sp$center_y,
-          cell = rownames(x = sp),
-          stringsAsFactors = FALSE
-        )
-      },
-      'segmentations' = {
-        ppoly <- progressor(steps = length(x = unique(x = sp$fov)))
-        ppoly(
-          message = "Creating polygon coordinates",
-          class = 'sticky',
-          amount = 0
-        )
-        pg <- future_lapply(
-          X = unique(x = sp$fov),
-          FUN = function(f, ...) {
-            fname <- file.path(h5dir, paste0('feature_data_', f, '.hdf5'))
-            if (!file.exists(fname)) {
-              warning(
-                "Cannot find HDF5 file for field of view ",
-                f,
-                immediate. = TRUE
-              )
-              return(NULL)
-            }
-            hfile <- hdf5r::H5File$new(filename = fname, mode = 'r')
-            on.exit(expr = hfile$close_all())
-            cells <- rownames(x = subset(x = sp, subset = fov == f))
-            df <- lapply(
-              X = cells,
-              FUN = function(x) {
-                return(tryCatch(
-                  expr = {
-                    cc <- hfile[['featuredata']][[x]][[zidx]][['p_0']][['coordinates']]$read()
-                    cc <- as.data.frame(x = t(x = cc))
-                    colnames(x = cc) <- c('x', 'y')
-                    cc$cell <- x
-                    cc
-                  },
-                  error = function(...) {
-                    return(NULL)
-                  }
-                ))
-              }
-            )
-            ppoly()
-            return(do.call(what = 'rbind', args = df))
-          }
-        )
-        ppoly(type = 'finish')
-        pg <- do.call(what = 'rbind', args = pg)
-        npg <- length(x = unique(x = pg$cell))
-        if (npg < nrow(x = sp)) {
-          warning(
-            nrow(x = sp) - npg,
-            " cells missing polygon information",
-            immediate. = TRUE
-          )
-        }
-        pg
-      },
-      'boxes' = {
-        pbox <- progressor(steps = nrow(x = sp))
-        pbox(message = "Creating box coordinates", class = 'sticky', amount = 0)
-        bx <- future_lapply(
-          X = rownames(x = sp),
-          FUN = function(cell) {
-            row <- sp[cell, ]
-            df <- expand.grid(
-              x = c(row$min_x, row$max_x),
-              y = c(row$min_y, row$max_y),
-              cell = cell,
-              KEEP.OUT.ATTRS = FALSE,
-              stringsAsFactors = FALSE
-            )
-            df <- df[c(1, 3, 4, 2), , drop = FALSE]
-            pbox()
-            return(df)
-          }
-        )
-        pbox(type = 'finish')
-        do.call(what = 'rbind', args = bx)
-      },
-      'metadata' = {
-        pmeta <- progressor()
-        pmeta(
-          message = 'Loading metadata',
-          class = 'sticky',
-          amount = 0
-        )
-        pmeta(type = 'finish')
-        sp[, metadata, drop = FALSE]
-      },
-      'pixels' = {
-        ppixels <- progressor()
-        ppixels(
-          message = 'Creating pixel-level molecule coordinates',
-          class = 'sticky',
-          amount = 0
-        )
-        df <- data.frame(
-          x = mx$x,
-          y = mx$y,
-          gene = mx$gene,
-          stringsAsFactors = FALSE
-        )
-        # if (!is.na(x = filter)) {
-        #   ppixels(
-        #     message = paste("Filtering molecules with pattern", filter),
-        #     class = 'sticky',
-        #     amount = 0
-        #   )
-        #   df <- df[!grepl(pattern = filter, x = df$gene), , drop = FALSE]
-        # }
-        ppixels(type = 'finish')
-        df
-      },
-      'microns' = {
-        pmicrons <- progressor()
-        pmicrons(
-          message = "Creating micron-level molecule coordinates",
-          class = 'sticky',
-          amount = 0
-        )
-        df <- data.frame(
-          x = mx$global_x,
-          y = mx$global_y,
-          gene = mx$gene,
-          stringsAsFactors = FALSE
-        )
-        # if (!is.na(x = filter)) {
-        #   pmicrons(
-        #     message = paste("Filtering molecules with pattern", filter),
-        #     class = 'sticky',
-        #     amount = 0
-        #   )
-        #   df <- df[!grepl(pattern = filter, x = df$gene), , drop = FALSE]
-        # }
-        pmicrons(type = 'finish')
-        df
-      },
-      stop("Unknown MERFISH input type: ", type)
-    )
-  }
+    outs[[otype]] <- 
+      switch(EXPR = otype, 
+             transcripts = {
+               ptx <- progressor()
+               ptx(message = "Reading counts matrix", class = "sticky",
+                   amount = 0)
+               tx <- data.table::fread(file = files[[otype]], sep = ",",
+                                       data.table = FALSE, verbose = FALSE)
+               rownames(x = tx) <- as.character(x = tx[, 1])
+               tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE]))
+               if (!is.na(x = filter)) {
+                 ptx(message = paste("Filtering genes with pattern",
+                                     filter), class = "sticky", amount = 0)
+                 tx <- tx[!grepl(pattern = filter, x = rownames(x = tx)),
+                          , drop = FALSE]
+               }
+               ratio <- getOption(x = "Seurat.input.sparse_ratio",
+                                  default = 0.4)
+               if ((sum(tx == 0)/length(x = tx)) > ratio) {
+                 ptx(message = "Converting counts to sparse matrix",
+                     class = "sticky", amount = 0)
+                 tx <- as.sparse(x = tx)
+               }
+               ptx(type = "finish")
+               tx
+               
+             }, centroids = {
+               pcents <- progressor()
+               pcents(message = "Creating centroid coordinates",
+                      class = "sticky", amount = 0)
+               pcents(type = "finish")
+               data.frame(x = sp$center_x, y = sp$center_y, cell = rownames(x = sp),
+                          stringsAsFactors = FALSE)
+               
+               # use Cellpose segmentations
+             }, segmentations = {
+               if (use.cellpose.out) { 
+                 files2scan <-
+                   list.files(data.dir, pattern = ".parquet$", 
+                              all.files = TRUE, 
+                              full.names = TRUE, 
+                              recursive = TRUE)
+                 if (length(files2scan)) {
+                   file2read <- files2scan %>%
+                     grep(coord.space, ., value = TRUE) %>% 
+                     grep(".parquet$", ., value = TRUE)
+                 }
+                 
+                 # read .parquet file..
+                 parq <- sfarrow::st_read_parquet(file2read)
+                 
+                 # get all cell segmentations
+                 segs <- filter(parq, ZIndex == z) %>% pull(Geometry)
+                 # check if any cells have > 1 segmentation boundary
+                 test.segs <- 
+                   lapply(segs %>% seq, function(i) segs[[i]][[1]] %>% length)
+                 if (which(unlist(test.segs) > 1) %>% any) {
+                   segs.art.index <- which(unlist(test.segs) > 1)
+                   message(segs.art.index %>% length, 
+                           " Cells have > 1 segmentaion boundary artifacts", "\n",
+                           "..removing artifacts", "\n",
+                           "..keeping cell boundaries with maximum coords")
+                   # usually artifacts have small boundaries/coords
+                   # find cell boundaries with maximum coords
+                   for (i in segs.art.index %>% seq) { 
+                     dims <- lapply(segs[[segs.art.index[i]]][[1]] %>% seq(), 
+                                    function(d) { dim(segs[[segs.art.index[i]]][[1]][[d]])[1] } )
+                     # get & keep boundaries with maximum coords
+                     maxs.segs <- which(unlist(dims) == max(unlist(dims)))  
+                     segs[[segs.art.index[i]]][[1]] <- segs[[segs.art.index[i]]][[1]][maxs.segs]
+                   }
+                 } else { message("All cells have 1 segmentaion boundary (no artifacts)") }
+                 # add cell names 
+                 names(segs) <- filter(parq, ZIndex == z) %>% pull(EntityID) %>% as.character       
+                 
+                 # TODO: (optionally) resample & make cell boundaries equidistant!  
+                 if (use.BiocParallel) {
+                   # extract cell boundaries per cells (faster)
+                   gc() %>% invisible() # free up memory
+                   message("Using fast extraction of cell boundaries..")  
+                   segs_list <-
+                     BiocParallel::bplapply(segs %>% seq,
+                                            function(i) {
+                                              segs[[i]][[1]] %>% 
+                                                data.table::as.data.table(x = .) %>%
+                                                as.data.frame %>%
+                                                mutate(cell = names(segs)[i]) },
+                                            BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                                                                                   force.GC = FALSE, 
+                                                                                   progressbar = TRUE)
+                     )
+                 } else {
+                   # extract cell boundaries per cells
+                   message("Extracting of cell boundaries..")    
+                   segs_list <-
+                     lapply(segs %>% seq,
+                            function(i) {
+                              segs[[i]][[1]] %>%
+                                data.table::as.data.table(x = .) %>%
+                                as.data.frame %>% 
+                                mutate(cell = names(segs)[i])
+                            }
+                     )
+                 }
+                 
+                 #segs_list %>% length
+                 # df of all cell segmentations
+                 gc() %>% invisible() # free up memory
+                 #segs <- do.call("rbind", segs_list)
+                 segs <- data.table::rbindlist(segs_list)
+                 names(segs)[1:2] <- c("x", "y") 
+                 message("All cell boundaries (with no artifacts) are loaded..")        
+                 segs
+               } else { # use non-Cellpose segmentations
+                 ppoly <- progressor(steps = length(x = unique(x = sp$fov)))
+                 ppoly(message = "Creating polygon coordinates", class = "sticky",
+                       amount = 0)
+                 # use parallel or future
+                 if (use.BiocParallel) { 
+                   pg <- BiocParallel::bplapply(X = unique(x = sp$fov), FUN = function(f, ...) {
+                     fname <- file.path(h5dir, paste0("feature_data_", f, ".hdf5"))
+                     if (!file.exists(fname)) {
+                       warning("Cannot find HDF5 file for field of view ",
+                               f, immediate. = TRUE)
+                       return(NULL)
+                     }
+                     # reading hdf5 files
+                     hfile <- hdf5r::H5File$new(filename = fname, 
+                                                mode = "r")
+                     on.exit(expr = hfile$close_all())
+                     cells <- rownames(x = subset(x = sp, subset = fov == f))     
+                     # creating df for cell boundaries     
+                     df <- BiocParallel::bplapply(X = cells, FUN = function(x) {
+                       return(tryCatch(expr = {
+                         cc <- hfile[["featuredata"]][[x]][[zidx]][["p_0"]][["coordinates"]]$read()
+                         cc <- as.data.frame(x = t(x = cc))
+                         colnames(x = cc) <- c("x", "y")
+                         cc$cell <- x
+                         cc
+                       }, error = function(...) {
+                         return(NULL)
+                       }))
+                     }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                                                               force.GC = FALSE, 
+                                                               progressbar = TRUE))   
+                     ppoly()
+                     return(do.call(what = "rbind", args = df))
+                   }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                                                             force.GC = FALSE, 
+                                                             progressbar = TRUE))
+                 } else { 
+                   pg <- 
+                     future.apply::future_lapply(X = unique(x = sp$fov), 
+                                                 FUN = function(f, ...) {
+                                                   fname <- file.path(h5dir, paste0("feature_data_",
+                                                                                    f, ".hdf5"))
+                                                   if (!file.exists(fname)) {
+                                                     warning("Cannot find HDF5 file for field of view ",
+                                                             f, immediate. = TRUE)
+                                                     return(NULL)
+                                                   }
+                                                   hfile <- hdf5r::H5File$new(filename = fname,
+                                                                              mode = "r")
+                                                   on.exit(expr = hfile$close_all())
+                                                   cells <- rownames(x = subset(x = sp, subset = fov == f))
+                                                   df <- lapply(X = cells, FUN = function(x) {
+                                                     return(tryCatch(expr = {
+                                                       cc <- hfile[["featuredata"]][[x]][[zidx]][["p_0"]][["coordinates"]]$read()
+                                                       cc <- as.data.frame(x = t(x = cc))
+                                                       colnames(x = cc) <- c("x", "y")
+                                                       cc$cell <- x
+                                                       cc
+                                                     }, error = function(...) {
+                                                       return(NULL)
+                                                     }))
+                                                   })
+                                                   ppoly()
+                                                   return(do.call(what = "rbind", args = df))
+                                                 }
+                     )
+                 }
+                 ppoly(type = "finish")
+                 # cell polygons
+                 pg <- do.call(what = "rbind", args = pg)
+                 npg <- length(x = unique(x = pg$cell))
+                 if (npg < nrow(x = sp)) {
+                   warning(nrow(x = sp) - npg, " cells missing polygon information",
+                           immediate. = TRUE)
+                 }
+                 pg # final segmentaions out..
+               }
+             }, boxes = {
+               pbox <- progressor(steps = nrow(x = sp))
+               pbox(message = "Creating box coordinates", class = "sticky",
+                    amount = 0)
+               # use parallel or future
+               if (use.BiocParallel) {
+                 message("Creating box coordinates..")
+                 bx <- BiocParallel::bplapply(X = rownames(x = sp), FUN = function(cell) {
+                   row <- sp[cell, ]
+                   df <- expand.grid(x = c(row$min_x, row$max_x),
+                                     y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
+                                     stringsAsFactors = FALSE)
+                   df <- df[c(1, 3, 4, 2), , drop = FALSE]
+                   pbox()
+                   return(df)
+                 }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                                                           force.GC = FALSE, progressbar = TRUE)
+                 )
+               } else {
+                 bx <- future.apply::future_lapply(X = rownames(x = sp), FUN = function(cell) {
+                   row <- sp[cell, ]
+                   df <- expand.grid(x = c(row$min_x, row$max_x),
+                                     y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
+                                     stringsAsFactors = FALSE)
+                   df <- df[c(1, 3, 4, 2), , drop = FALSE]
+                   pbox()
+                   return(df)
+                 })
+               }
+               pbox(type = "finish")
+               do.call(what = "rbind", args = bx)
+             }, metadata = {
+               pmeta <- progressor()
+               pmeta(message = "Loading metadata", class = "sticky",
+                     amount = 0)
+               pmeta(type = "finish")
+               sp[, metadata, drop = FALSE]
+             }, pixels = {
+               ppixels <- progressor()
+               ppixels(message = "Creating pixel-level molecule coordinates",
+                       class = "sticky", amount = 0)
+               df <- data.frame(x = mx$x, y = mx$y, gene = mx$gene,
+                                stringsAsFactors = FALSE)
+               ppixels(type = "finish")
+               df
+             }, microns = {
+               pmicrons <- progressor()
+               pmicrons(message = "Creating micron-level molecule coordinates",
+                        class = "sticky", amount = 0)
+               df <- data.frame(x = mx$global_x, y = mx$global_y,
+                                gene = mx$gene, stringsAsFactors = FALSE)
+               pmicrons(type = "finish")
+               df
+             }, stop("Unknown MERFISH input type: ", type))
+  }
+  
+  # add z-slice index for cells ----
+  outs$zIndex <- data.frame(z = rep_len(z, length.out = outs$centroids$cell %>% length), cell = outs$centroids$cell)
+  
   return(outs)
 }
 

From 115e45d0584d14d7a26c4e7d24ec21b572a7d96f Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 19:39:34 +0200
Subject: [PATCH 04/24] updates for `LoadVizgen`

---
 R/convenience.R | 91 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 62 insertions(+), 29 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 775591ff7..b06deb1c1 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -137,37 +137,70 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 #'
 #' @rdname ReadVizgen
 #'
-LoadVizgen <- function(data.dir, fov, assay = 'Vizgen', z = 3L) {
-  data <- ReadVizgen(
-    data.dir = data.dir,
-    filter = "^Blank-",
-    type = c("centroids", "segmentations"),
-    z = z
-  )
-  segs <- CreateSegmentation(data$segmentations)
-  cents <- CreateCentroids(data$centroids)
-  segmentations.data <- list(
-    "centroids" = cents,
-    "segmentation" = segs
-  )
-  coords <- CreateFOV(
-    coords = segmentations.data,
-    type = c("segmentation", "centroids"),
-    molecules = data$microns,
-    assay = assay
-  )
-  obj <- CreateSeuratObject(counts = data$transcripts, assay = assay)
-  # only consider the cells we have counts and a segmentation for
-  # Cells which don't have a segmentation are probably found in other z slices.
-  coords <- subset(
-    x = coords,
-    cells = intersect(
-      x = Cells(x = coords[["segmentation"]]),
-      y = Cells(x = obj)
-    )
-  )
+LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
+                       add.zIndex = TRUE, update.object = TRUE,
+                       ...) {
+  data <- ReadVizgen(data.dir = data.dir, ...)
+  
+  # if "segmentations" are not present, use cell bounding boxes instead
+  if (!"segmentations" %in% names(data)) { 
+    bound.boxes <- CreateSegmentation(data[["boxes"]])
+    cents <- CreateCentroids(data[["centroids"]])
+    bound.boxes.data <- list(centroids = cents, boxes = bound.boxes)
+    message("Creating FOVs", "\n",
+            "..using box coordinates instead of segmentations")  
+    coords <- CreateFOV(coords = bound.boxes.data, type = c("boxes",
+                                                            "centroids"), molecules = data[[mol.type]], assay = assay)
+    message("Creating Seurat object")  
+    obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
+    # only consider the cells we have counts and a segmentation for
+    # Cells which don't have a segmentation are probably found in other z slices.
+    coords <- subset(x = coords, 
+                     cells = intersect(x = Cells(x = coords[["boxes"]]),
+                                       y = Cells(x = obj)))
+  } else {
+    segs <- CreateSegmentation(data[["segmentations"]])
+    cents <- CreateCentroids(data[["centroids"]])
+    segmentations.data <- list(centroids = cents, segmentation = segs)
+    message("Creating FOVs", "\n", 
+            "..using segmentations")    
+    coords <- CreateFOV(coords = segmentations.data, type = c("segmentation",
+                                                              "centroids"), molecules = data[[mol.type]], assay = assay)
+    message("Creating Seurat object..")  
+    obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
+    # only consider the cells we have counts and a segmentation for
+    # Cells which don't have a segmentation are probably found in other z slices.
+    coords <- subset(x = coords,
+                     cells = intersect(x = Cells(x = coords[["segmentation"]]),
+                                       y = Cells(x = obj)))    
+  }
+  
+  # add z-stack index for cells
+  if (add.zIndex) { obj$z <- data$zIndex$z }
+  
+  # add metadata vars
+  message("..adding metadata infos")  
+  if (c("metadata" %in% names(data))) {
+    metadata <- match.arg(arg = "metadata", choices = names(data), several.ok = TRUE)
+    meta.vars <- names(data[[metadata]])
+    for (i in meta.vars %>% seq) {
+      obj %<>% AddMetaData(metadata = data[[metadata]][[meta.vars[i]]], 
+                           col.name = meta.vars[i])
+    }
+  }
+  
+  # sanity on fov name
+  fov %<>% gsub("_|-", ".", .) 
+  
+  #message("..adding FOV")
   # add coords to seurat object
   obj[[fov]] <- coords
+  
+  if (update.object) { 
+    message("Updating object..")
+    obj %<>% UpdateSeuratObject() } 
+  
+  #message("Object is ready!")    
   return(obj)
 }
 

From eff89e97fabc1f13981dedcb24ba97cf4787f635 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 20:04:58 +0200
Subject: [PATCH 05/24] fix for argument 'metadata'

---
 R/preprocessing.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index d65642402..b4000e728 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2458,7 +2458,6 @@ ReadVizgen <- function(
     use.BiocParallel = TRUE, 
     workers.total = 12, 
     DTthreads.pct = NULL,
-    metadata = NULL,
     filter = NA_character_,
     z = 3L
 ) {

From 87d9303481d72bc6602681d663f7d23bb63970ec Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 20:35:48 +0200
Subject: [PATCH 06/24] param args for `LoadVizgen`

---
 R/convenience.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/R/convenience.R b/R/convenience.R
index b06deb1c1..5e5c16bfb 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -130,6 +130,10 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 
 #' @return \code{LoadVizgen}: A \code{\link[SeuratObject]{Seurat}} object
 #'
+#' @param add.zIndex If to add \code{z} slice index to a cell
+#' @param update.object If to update final object, default to TRUE.
+#' @param ... Arguments passed to \code{ReadVizgen}
+#'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
 #' CreateSegmentation CreateSeuratObject
 #'

From 089740a5658b6325ee0856a7273e09093ef21e78 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 13 Apr 2023 20:48:35 +0200
Subject: [PATCH 07/24] fix args for `ReadVizgen`

---
 R/preprocessing.R | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index b4000e728..9845d727e 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2395,21 +2395,20 @@ ReadVitessce <- function(
 #'  \item \dQuote{fov}: cell's fov
 #' }
 #' @param z Z-index to load; must be between 0 and 6, inclusive
-#' @param use.BiocParallel If to use \code{BiocParallel::bplapply()}, 
+#' @param use.BiocParallel If to use \code{BiocParallel::bplapply}, 
 #' default is \code{TRUE}, if \code{FALSE}, uses \code{future} library
-#' @param workers.total Number of cores to use for \code{BiocParallel::bplapply()}
+#' @param workers.total Number of cores to use for \code{BiocParallel::bplapply}
 #' @param DTthreads.pct Set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
 #' if set to \code{NULL} will use default setting as in \code{data.table::getDTthreads(verbose = T)}
-#' @param coord.space;
+#' @param coord.space Type of molecule spatial coordinate space to use;
 #' choose one or more of:
 #' \itemize{
 #'  \item \dQuote{pixel}: molecule coordinates in pixel space
 #'  \item \dQuote{micron}: molecule coordinates in micron space
 #' }
-#' @param use.cellpose.out If TRUE, and ./Cellpose folder exists, will load results from current MERSCOPE Instrument output. Default to TRUE. Set to FALSE if to use previous outputs (ie. non-Cellpose).
-#' @param add.zIndex If to add \code{z} slice index to a cell
-#' @param update.object If to update final object, default to TRUE.
-#' @param ... Arguments passed to \code{ReadVizgen()}
+#' @param use.cellpose.out If \code{TRUE}, and \code{./Cellpose} folder exists, 
+#' will load results from current MERSCOPE Instrument output. Default to \code{TRUE}; 
+#' set it to \code{FALSE} if to use previous outputs (ie. non-Cellpose).
 #'
 #' @return \code{ReadVizgen}: A list with some combination of the
 #' following values:
@@ -2452,14 +2451,14 @@ ReadVizgen <- function(
     molecules = NULL,
     type = 'segmentations',
     mol.type = 'microns',
-    use.cellpose.out = TRUE,
-    coord.space = "micron", 
-    metadata = NULL, 
-    use.BiocParallel = TRUE, 
-    workers.total = 12, 
+    metadata = NULL,
+    filter = NA_character_,	
+    z = 3L,
+    use.BiocParallel = TRUE,
+    workers.total = 12,
     DTthreads.pct = NULL,
-    filter = NA_character_,
-    z = 3L
+    coord.space = "micron", 
+    use.cellpose.out = TRUE
 ) {
   
   # TODO: handle multiple segmentations per z-plane

From 547000e3c8b462d448a4e50066a370d65eb717d9 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 19 Apr 2023 18:03:58 +0200
Subject: [PATCH 08/24] add support for `future.apply`

---
 R/preprocessing.R | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 9845d727e..e83b4bb7c 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2729,7 +2729,7 @@ ReadVizgen <- function(
                      grep(".parquet$", ., value = TRUE)
                  }
                  
-                 # read .parquet file..
+                 # read .parquet file ----
                  parq <- sfarrow::st_read_parquet(file2read)
                  
                  # get all cell segmentations
@@ -2766,7 +2766,7 @@ ReadVizgen <- function(
                                             function(i) {
                                               segs[[i]][[1]] %>% 
                                                 data.table::as.data.table(x = .) %>%
-                                                as.data.frame %>%
+                                                #as.data.frame %>%
                                                 mutate(cell = names(segs)[i]) },
                                             BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
                                                                                    force.GC = FALSE, 
@@ -2776,14 +2776,14 @@ ReadVizgen <- function(
                    # extract cell boundaries per cells
                    message("Extracting of cell boundaries..")    
                    segs_list <-
-                     lapply(segs %>% seq,
-                            function(i) {
-                              segs[[i]][[1]] %>%
-                                data.table::as.data.table(x = .) %>%
-                                as.data.frame %>% 
-                                mutate(cell = names(segs)[i])
-                            }
-                     )
+                     future.apply::future_lapply(segs %>% seq,
+                                                 function(i) {
+                                                   segs[[i]][[1]] %>%
+                                                     data.table::as.data.table(x = .) %>%
+                                                     #as.data.frame %>% 
+                                                     mutate(cell = names(segs)[i])
+                                                   }
+                                                 )
                  }
                  
                  #segs_list %>% length

From f73dbdba57c31d209cbff2d95d2e400052107d1e Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 24 May 2023 14:24:14 +0200
Subject: [PATCH 09/24] Vizgen support single `.parquet` file

---
 R/preprocessing.R | 622 +++++++++++++++++++++++-----------------------
 1 file changed, 313 insertions(+), 309 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index e83b4bb7c..cfbb2eaaf 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -58,11 +58,11 @@ globalVariables(
 #' CalculateBarcodeInflections(pbmc_small, group.column = 'groups')
 #'
 CalculateBarcodeInflections <- function(
-  object,
-  barcode.column = "nCount_RNA",
-  group.column = "orig.ident",
-  threshold.low = NULL,
-  threshold.high = NULL
+    object,
+    barcode.column = "nCount_RNA",
+    group.column = "orig.ident",
+    threshold.low = NULL,
+    threshold.high = NULL
 ) {
   ## Check that barcode.column exists in meta.data
   if (!(barcode.column %in% colnames(x = object[[]]))) {
@@ -203,15 +203,15 @@ CalculateBarcodeInflections <- function(
 #' }
 #'
 HTODemux <- function(
-  object,
-  assay = "HTO",
-  positive.quantile = 0.99,
-  init = NULL,
-  nstarts = 100,
-  kfunc = "clara",
-  nsamples = 100,
-  seed = 42,
-  verbose = TRUE
+    object,
+    assay = "HTO",
+    positive.quantile = 0.99,
+    init = NULL,
+    nstarts = 100,
+    kfunc = "clara",
+    nsamples = 100,
+    seed = 42,
+    verbose = TRUE
 ) {
   if (!is.null(x = seed)) {
     set.seed(seed = seed)
@@ -370,14 +370,14 @@ HTODemux <- function(
 #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A'))
 #'
 GetResidual <- function(
-  object,
-  features,
-  assay = NULL,
-  umi.assay = NULL,
-  clip.range = NULL,
-  replace.value = FALSE,
-  na.rm = TRUE,
-  verbose = TRUE
+    object,
+    features,
+    assay = NULL,
+    umi.assay = NULL,
+    clip.range = NULL,
+    replace.value = FALSE,
+    na.rm = TRUE,
+    verbose = TRUE
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   if (IsSCT(assay = object[[assay]])) {
@@ -502,14 +502,14 @@ GetResidual <- function(
 #' }
 #'
 Load10X_Spatial <- function(
-  data.dir,
-  filename = 'filtered_feature_bc_matrix.h5',
-  assay = 'Spatial',
-  slice = 'slice1',
-  filter.matrix = TRUE,
-  to.upper = FALSE,
-  image = NULL,
-  ...
+    data.dir,
+    filename = 'filtered_feature_bc_matrix.h5',
+    assay = 'Spatial',
+    slice = 'slice1',
+    filter.matrix = TRUE,
+    to.upper = FALSE,
+    image = NULL,
+    ...
 ) {
   if (length(x = data.dir) > 1) {
     warning("'Load10X_Spatial' accepts only one 'data.dir'", immediate. = TRUE)
@@ -522,9 +522,9 @@ Load10X_Spatial <- function(
   object <- CreateSeuratObject(counts = data, assay = assay)
   if (is.null(x = image)) {
     image <- Read10X_Image(
-	    image.dir = file.path(data.dir, 'spatial'),
-	    filter.matrix = filter.matrix
-  	)
+      image.dir = file.path(data.dir, 'spatial'),
+      filter.matrix = filter.matrix
+    )
   } else {
     if (!inherits(x = image, what = "VisiumV1"))
       stop("Image must be an object of class 'VisiumV1'.")
@@ -557,13 +557,13 @@ Load10X_Spatial <- function(
 #' @concept preprocessing
 #'
 LoadSTARmap <- function(
-  data.dir,
-  counts.file = "cell_barcode_count.csv",
-  gene.file = "genes.csv",
-  qhull.file = "qhulls.tsv",
-  centroid.file = "centroids.tsv",
-  assay = "Spatial",
-  image = "image"
+    data.dir,
+    counts.file = "cell_barcode_count.csv",
+    gene.file = "genes.csv",
+    qhull.file = "qhulls.tsv",
+    centroid.file = "centroids.tsv",
+    assay = "Spatial",
+    image = "image"
 ) {
   if (!dir.exists(paths = data.dir)) {
     stop("Cannot find directory ", data.dir, call. = FALSE)
@@ -669,13 +669,13 @@ LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) {
 #' }
 #'
 MULTIseqDemux <- function(
-  object,
-  assay = "HTO",
-  quantile = 0.7,
-  autoThresh = FALSE,
-  maxiter = 5,
-  qrange = seq(from = 0.1, to = 0.9, by = 0.05),
-  verbose = TRUE
+    object,
+    assay = "HTO",
+    quantile = 0.7,
+    autoThresh = FALSE,
+    maxiter = 5,
+    qrange = seq(from = 0.1, to = 0.9, by = 0.05),
+    verbose = TRUE
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   multi_data_norm <- t(x = GetAssayData(
@@ -787,11 +787,11 @@ MULTIseqDemux <- function(
 #' }
 #'
 Read10X <- function(
-  data.dir,
-  gene.column = 2,
-  cell.column = 1,
-  unique.features = TRUE,
-  strip.suffix = FALSE
+    data.dir,
+    gene.column = 2,
+    cell.column = 1,
+    unique.features = TRUE,
+    strip.suffix = FALSE
 ) {
   full.data <- list()
   has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
@@ -828,7 +828,7 @@ Read10X <- function(
     } else {
       cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
     }
-
+    
     if (ncol(x = cell.barcodes) > 1) {
       cell.names <- cell.barcodes[, cell.column]
     } else {
@@ -851,7 +851,7 @@ Read10X <- function(
     } else {
       colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names)
     }
-
+    
     if (has_dt) {
       feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE))
     } else {
@@ -861,7 +861,7 @@ Read10X <- function(
         stringsAsFactors = FALSE
       )
     }
-
+    
     if (any(is.na(x = feature.names[, gene.column]))) {
       warning(
         'Some features names are NA. Replacing NA names with ID from the opposite column requested',
@@ -1108,10 +1108,10 @@ Read10X_Image <- function(image.dir, image.name = "tissue_lowres_image.png", fil
 #' @template note-reqdpkg
 #'
 ReadAkoya <- function(
-  filename,
-  type = c('inform', 'processor', 'qupath'),
-  filter = 'DAPI|Blank|Empty',
-  inform.quant = c('mean', 'total', 'min', 'max', 'std')
+    filename,
+    type = c('inform', 'processor', 'qupath'),
+    filter = 'DAPI|Blank|Empty',
+    inform.quant = c('mean', 'total', 'min', 'max', 'std')
 ) {
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
@@ -1429,18 +1429,18 @@ ReadAkoya <- function(
 #' }
 #'
 ReadMtx <- function(
-  mtx,
-  cells,
-  features,
-  cell.column = 1,
-  feature.column = 2,
-  cell.sep = "\t",
-  feature.sep = "\t",
-  skip.cell = 0,
-  skip.feature = 0,
-  mtx.transpose = FALSE,
-  unique.features = TRUE,
-  strip.suffix = FALSE
+    mtx,
+    cells,
+    features,
+    cell.column = 1,
+    feature.column = 2,
+    cell.sep = "\t",
+    feature.sep = "\t",
+    skip.cell = 0,
+    skip.feature = 0,
+    mtx.transpose = FALSE,
+    unique.features = TRUE,
+    strip.suffix = FALSE
 ) {
   all.files <- list(
     "expression matrix" = mtx,
@@ -1539,7 +1539,7 @@ ReadMtx <- function(
         feature.column,
         ". Try specifiying a different column.",
         call. = FALSE
-        )
+      )
     } else {
       warning(
         "Some features names are NA in column ",
@@ -1548,7 +1548,7 @@ ReadMtx <- function(
         replacement.column,
         ".",
         call. = FALSE
-        )
+      )
     }
     feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column]
   }
@@ -1572,7 +1572,7 @@ ReadMtx <- function(
         no = ""
       ),
       call. = FALSE
-      )
+    )
   }
   if (length(x = feature.names) != nrow(x = data)) {
     stop(
@@ -1586,9 +1586,9 @@ ReadMtx <- function(
         no = ""
       ),
       call. = FALSE
-      )
+    )
   }
-
+  
   colnames(x = data) <- cell.names
   rownames(x = data) <- feature.names
   data <- as.sparse(x = data)
@@ -1672,24 +1672,24 @@ ReadMtx <- function(
 #' @template note-reqdpkg
 #'
 ReadNanostring <- function(
-  data.dir,
-  mtx.file = NULL,
-  metadata.file = NULL,
-  molecules.file = NULL,
-  segmentations.file = NULL,
-  type = 'centroids',
-  mol.type = 'pixels',
-  metadata = NULL,
-  mols.filter = NA_character_,
-  genes.filter = NA_character_,
-  fov.filter = NULL,
-  subset.counts.matrix = NULL,
-  cell.mols.only = TRUE
+    data.dir,
+    mtx.file = NULL,
+    metadata.file = NULL,
+    molecules.file = NULL,
+    segmentations.file = NULL,
+    type = 'centroids',
+    mol.type = 'pixels',
+    metadata = NULL,
+    mols.filter = NA_character_,
+    genes.filter = NA_character_,
+    fov.filter = NULL,
+    subset.counts.matrix = NULL,
+    cell.mols.only = TRUE
 ) {
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
   }
-
+  
   # Argument checking
   type <- match.arg(
     arg = type,
@@ -1712,7 +1712,7 @@ ReadNanostring <- function(
       several.ok = TRUE
     )
   }
-
+  
   use.dir <- all(vapply(
     X = c(mtx.file, metadata.file, molecules.file),
     FUN = function(x) {
@@ -1720,7 +1720,7 @@ ReadNanostring <- function(
     },
     FUN.VALUE = logical(length = 1L)
   ))
-
+  
   if (use.dir && !dir.exists(paths = data.dir)) {
     stop("Cannot find Nanostring directory ", data.dir)
   }
@@ -1731,7 +1731,7 @@ ReadNanostring <- function(
     molecules.file = molecules.file %||% '[_a-zA-Z0-9]*_tx_file.csv',
     segmentations.file = segmentations.file %||% '[_a-zA-Z0-9]*-polygons.csv'
   )
-
+  
   files <- vapply(
     X = files,
     FUN = function(x) {
@@ -1752,7 +1752,7 @@ ReadNanostring <- function(
     USE.NAMES = TRUE
   )
   files[!file.exists(files)] <- NA_character_
-
+  
   if (all(is.na(x = files))) {
     stop("Cannot find Nanostring input files in ", data.dir)
   }
@@ -1770,7 +1770,7 @@ ReadNanostring <- function(
       data.table = FALSE,
       verbose = FALSE
     )
-
+    
     # filter metadata file by FOVs
     if (!is.null(x = fov.filter)) {
       md <- md[md$fov %in% fov.filter,]
@@ -1790,7 +1790,7 @@ ReadNanostring <- function(
       data.table = FALSE,
       verbose = FALSE
     )
-
+    
     # filter metadata file by FOVs
     if (!is.null(x = fov.filter)) {
       segs <- segs[segs$fov %in% fov.filter,]
@@ -1810,17 +1810,17 @@ ReadNanostring <- function(
       sep = ',',
       verbose = FALSE
     )
-
+    
     # filter molecules file by FOVs
     if (!is.null(x = fov.filter)) {
       mx <- mx[mx$fov %in% fov.filter,]
     }
-
+    
     # Molecules outside of a cell have a cell_ID of 0
     if (cell.mols.only) {
       mx <- mx[mx$cell_ID != 0,]
     }
-
+    
     if (!is.na(x = mols.filter)) {
       ppremol(
         message = paste("Filtering molecules with pattern", mols.filter),
@@ -1836,7 +1836,7 @@ ReadNanostring <- function(
     files <- files[setdiff(x = names(x = files), y = 'molecules.file')]
   }
   files <- files[!is.na(x = files)]
-
+  
   outs <- list("matrix"=NULL, "pixels"=NULL, "centroids"=NULL)
   if (!is.null(metadata)) {
     outs <- append(outs, list("metadata" = NULL))
@@ -1844,7 +1844,7 @@ ReadNanostring <- function(
   if ("segmentations" %in% type) {
     outs <- append(outs, list("segmentations" = NULL))
   }
-
+  
   for (otype in names(x = outs)) {
     outs[[otype]] <- switch(
       EXPR = otype,
@@ -1871,7 +1871,7 @@ ReadNanostring <- function(
           }
           tx <- subset(tx, select = -c(fov, cell_ID))
         }
-
+        
         tx <- as.data.frame(t(x = as.matrix(x = tx[, -1, drop = FALSE])))
         if (!is.na(x = genes.filter)) {
           ptx(
@@ -1884,7 +1884,7 @@ ReadNanostring <- function(
         # only keep cells with counts greater than 0
         tx <- tx[, which(colSums(tx) != 0)]
         ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4)
-
+        
         if ((sum(tx == 0) / length(x = tx)) > ratio) {
           ptx(
             message = 'Converting counts to sparse matrix',
@@ -1893,9 +1893,9 @@ ReadNanostring <- function(
           )
           tx <- as.sparse(x = tx)
         }
-
+        
         ptx(type = 'finish')
-
+        
         tx
       },
       'centroids' = {
@@ -2014,10 +2014,10 @@ ReadNanostring <- function(
 #' @concept preprocessing
 #'
 ReadXenium <- function(
-  data.dir,
-  outs = c("matrix", "microns"),
-  type = "centroids",
-  mols.qv.threshold = 20
+    data.dir,
+    outs = c("matrix", "microns"),
+    type = "centroids",
+    mols.qv.threshold = 20
 ) {
   # Argument checking
   type <- match.arg(
@@ -2025,17 +2025,17 @@ ReadXenium <- function(
     choices = c("centroids", "segmentations"),
     several.ok = TRUE
   )
-
+  
   outs <- match.arg(
     arg = outs,
     choices = c("matrix", "microns"),
     several.ok = TRUE
   )
-
+  
   outs <- c(outs, type)
-
+  
   has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
-
+  
   data <- sapply(outs, function(otype) {
     switch(
       EXPR = otype,
@@ -2074,7 +2074,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-
+        
         # load cell boundaries
         if (has_dt) {
           cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
@@ -2092,7 +2092,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-
+        
         # molecules
         if (has_dt) {
           tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz")))
@@ -2101,7 +2101,7 @@ ReadXenium <- function(
           transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
           transcripts <- subset(transcripts, qv >= mols.qv.threshold)
         }
-
+        
         df <-
           data.frame(
             x = transcripts$x_location,
@@ -2219,11 +2219,11 @@ ReadSlideSeq <- function(coord.file, assay = 'Spatial') {
 #' }
 #'
 ReadVitessce <- function(
-  counts = NULL,
-  coords = NULL,
-  molecules = NULL,
-  type = c('segmentations', 'centroids'),
-  filter = NA_character_
+    counts = NULL,
+    coords = NULL,
+    molecules = NULL,
+    type = c('segmentations', 'centroids'),
+    filter = NA_character_
 ) {
   if (!requireNamespace('jsonlite', quietly = TRUE)) {
     stop("Please install 'jsonlite' for this function")
@@ -2724,11 +2724,15 @@ ReadVizgen <- function(
                               full.names = TRUE, 
                               recursive = TRUE)
                  if (length(files2scan)) {
-                   file2read <- files2scan %>%
+                   file2read <- 
+                     files2scan %>%
                      grep(coord.space, ., value = TRUE) %>% 
-                     grep(".parquet$", ., value = TRUE)
+                     { if (is.empty(.)) {
+                       # support only single .parquet file
+                       files2scan %>%
+                         grep(".parquet$", ., value = TRUE)
+                     } else { (.) } }
                  }
-                 
                  # read .parquet file ----
                  parq <- sfarrow::st_read_parquet(file2read)
                  
@@ -2989,9 +2993,9 @@ RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE) {
 #' @concept preprocessing
 #'
 RunMarkVario <- function(
-  spatial.location,
-  data,
-  ...
+    spatial.location,
+    data,
+    ...
 ) {
   pp <- ppp(
     x = spatial.location[, 1],
@@ -3107,10 +3111,10 @@ RunMoransI <- function(data, pos, verbose = TRUE) {
 #' head(x = downsampled)
 #'
 SampleUMI <- function(
-  data,
-  max.umi = 1000,
-  upsample = FALSE,
-  verbose = FALSE
+    data,
+    max.umi = 1000,
+    upsample = FALSE,
+    verbose = FALSE
 ) {
   data <- as.sparse(x = data)
   if (length(x = max.umi) == 1) {
@@ -3195,24 +3199,24 @@ SampleUMI <- function(
 #' SCTransform(object = pbmc_small)
 #'
 SCTransform <- function(
-  object,
-  assay = 'RNA',
-  new.assay.name = 'SCT',
-  reference.SCT.model = NULL,
-  do.correct.umi = TRUE,
-  ncells = 5000,
-  residual.features = NULL,
-  variable.features.n = 3000,
-  variable.features.rv.th = 1.3,
-  vars.to.regress = NULL,
-  do.scale = FALSE,
-  do.center = TRUE,
-  clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)),
-  conserve.memory = FALSE,
-  return.only.var.genes = TRUE,
-  seed.use = 1448145,
-  verbose = TRUE,
-  ...
+    object,
+    assay = 'RNA',
+    new.assay.name = 'SCT',
+    reference.SCT.model = NULL,
+    do.correct.umi = TRUE,
+    ncells = 5000,
+    residual.features = NULL,
+    variable.features.n = 3000,
+    variable.features.rv.th = 1.3,
+    vars.to.regress = NULL,
+    do.scale = FALSE,
+    do.center = TRUE,
+    clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)),
+    conserve.memory = FALSE,
+    return.only.var.genes = TRUE,
+    seed.use = 1448145,
+    verbose = TRUE,
+    ...
 ) {
   if (!is.null(x = seed.use)) {
     set.seed(seed = seed.use)
@@ -3242,7 +3246,7 @@ SCTransform <- function(
     if (reference.SCT.model$model_str != 'y ~ log_umi') {
       stop('reference.SCT.model must be derived using default SCT regression formula, `y ~ log_umi`')
     }
-
+    
   }
   # check for latent_var in meta data
   if ('latent_var' %in% names(x = vst.args)) {
@@ -3281,7 +3285,7 @@ SCTransform <- function(
   vst.args[['n_cells']] <- min(ncells, ncol(x = umi))
   residual.type <- vst.args[['residual_type']] %||% 'pearson'
   res.clip.range <- vst.args[['res_clip_range']] %||% c(-sqrt(x = ncol(x = umi)), sqrt(x = ncol(x = umi)))
- # set sct normalization method
+  # set sct normalization method
   if (!is.null( reference.SCT.model)) {
     sct.method <- "reference.model"
   } else if (!is.null(x = residual.features)) {
@@ -3291,7 +3295,7 @@ SCTransform <- function(
   } else {
     sct.method <- "default"
   }
-
+  
   # set vst model
   vst.out <- switch(
     EXPR = sct.method,
@@ -3348,7 +3352,7 @@ SCTransform <- function(
       vst.out <- do.call(what = 'vst', args = vst.args)
       vst.out
     })
-
+  
   feature.variance <- vst.out$gene_attr[,"residual_variance"]
   names(x = feature.variance) <- rownames(x = vst.out$gene_attr)
   if (verbose) {
@@ -3360,7 +3364,7 @@ SCTransform <- function(
   } else {
     top.features <- names(x = feature.variance)[feature.variance >= variable.features.rv.th]
   }
-
+  
   # get residuals
   vst.out <- switch(
     EXPR = sct.method,
@@ -3570,16 +3574,16 @@ SubsetByBarcodeInflections <- function(object) {
 #' @export
 #'
 FindVariableFeatures.default <- function(
-  object,
-  selection.method = "vst",
-  loess.span = 0.3,
-  clip.max = 'auto',
-  mean.function = FastExpMean,
-  dispersion.function = FastLogVMR,
-  num.bin = 20,
-  binning.method = "equal_width",
-  verbose = TRUE,
-  ...
+    object,
+    selection.method = "vst",
+    loess.span = 0.3,
+    clip.max = 'auto',
+    mean.function = FastExpMean,
+    dispersion.function = FastLogVMR,
+    num.bin = 20,
+    binning.method = "equal_width",
+    verbose = TRUE,
+    ...
 ) {
   CheckDots(...)
   if (!inherits(x = object, 'Matrix')) {
@@ -3670,19 +3674,19 @@ FindVariableFeatures.default <- function(
 #' @method FindVariableFeatures Assay
 #'
 FindVariableFeatures.Assay <- function(
-  object,
-  selection.method = "vst",
-  loess.span = 0.3,
-  clip.max = 'auto',
-  mean.function = FastExpMean,
-  dispersion.function = FastLogVMR,
-  num.bin = 20,
-  binning.method = "equal_width",
-  nfeatures = 2000,
-  mean.cutoff = c(0.1, 8),
-  dispersion.cutoff = c(1, Inf),
-  verbose = TRUE,
-  ...
+    object,
+    selection.method = "vst",
+    loess.span = 0.3,
+    clip.max = 'auto',
+    mean.function = FastExpMean,
+    dispersion.function = FastLogVMR,
+    num.bin = 20,
+    binning.method = "equal_width",
+    nfeatures = 2000,
+    mean.cutoff = c(0.1, 8),
+    dispersion.cutoff = c(1, Inf),
+    verbose = TRUE,
+    ...
 ) {
   if (length(x = mean.cutoff) != 2 || length(x = dispersion.cutoff) != 2) {
     stop("Both 'mean.cutoff' and 'dispersion.cutoff' must be two numbers")
@@ -3749,9 +3753,9 @@ FindVariableFeatures.Assay <- function(
 #' @method FindVariableFeatures SCTAssay
 #'
 FindVariableFeatures.SCTAssay <- function(
-  object,
-  nfeatures = 2000,
-  ...
+    object,
+    nfeatures = 2000,
+    ...
 ) {
   if (length(x = slot(object = object, name = "SCTModel.list")) > 1) {
     stop("SCT assay is comprised of multiple SCT models. To change the variable features, please set manually with VariableFeatures<-", call. = FALSE)
@@ -3771,20 +3775,20 @@ FindVariableFeatures.SCTAssay <- function(
 #' @method FindVariableFeatures Seurat
 #'
 FindVariableFeatures.Seurat <- function(
-  object,
-  assay = NULL,
-  selection.method = "vst",
-  loess.span = 0.3,
-  clip.max = 'auto',
-  mean.function = FastExpMean,
-  dispersion.function = FastLogVMR,
-  num.bin = 20,
-  binning.method = "equal_width",
-  nfeatures = 2000,
-  mean.cutoff = c(0.1, 8),
-  dispersion.cutoff = c(1, Inf),
-  verbose = TRUE,
-  ...
+    object,
+    assay = NULL,
+    selection.method = "vst",
+    loess.span = 0.3,
+    clip.max = 'auto',
+    mean.function = FastExpMean,
+    dispersion.function = FastLogVMR,
+    num.bin = 20,
+    binning.method = "equal_width",
+    nfeatures = 2000,
+    mean.cutoff = c(0.1, 8),
+    dispersion.cutoff = c(1, Inf),
+    verbose = TRUE,
+    ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -3840,14 +3844,14 @@ FindVariableFeatures.Seurat <- function(
 #'
 #'
 FindSpatiallyVariableFeatures.default <- function(
-  object,
-  spatial.location,
-  selection.method = c('markvariogram', 'moransi'),
-  r.metric = 5,
-  x.cuts = NULL,
-  y.cuts = NULL,
-  verbose = TRUE,
-  ...
+    object,
+    spatial.location,
+    selection.method = c('markvariogram', 'moransi'),
+    r.metric = 5,
+    x.cuts = NULL,
+    y.cuts = NULL,
+    verbose = TRUE,
+    ...
 ) {
   # error check dimensions
   if (ncol(x = object) != nrow(x = spatial.location)) {
@@ -3892,17 +3896,17 @@ FindSpatiallyVariableFeatures.default <- function(
 #' @export
 #'
 FindSpatiallyVariableFeatures.Assay <- function(
-  object,
-  slot = "scale.data",
-  spatial.location,
-  selection.method = c('markvariogram', 'moransi'),
-  features = NULL,
-  r.metric = 5,
-  x.cuts = NULL,
-  y.cuts = NULL,
-  nfeatures = nfeatures,
-  verbose = TRUE,
-  ...
+    object,
+    slot = "scale.data",
+    spatial.location,
+    selection.method = c('markvariogram', 'moransi'),
+    features = NULL,
+    r.metric = 5,
+    x.cuts = NULL,
+    y.cuts = NULL,
+    nfeatures = nfeatures,
+    verbose = TRUE,
+    ...
 ) {
   features <- features %||% rownames(x = object)
   if (selection.method == "markvariogram" && "markvariogram" %in% names(x = Misc(object = object))) {
@@ -3984,18 +3988,18 @@ FindSpatiallyVariableFeatures.Assay <- function(
 #' @export
 #'
 FindSpatiallyVariableFeatures.Seurat <- function(
-  object,
-  assay = NULL,
-  slot = "scale.data",
-  features = NULL,
-  image = NULL,
-  selection.method = c('markvariogram', 'moransi'),
-  r.metric = 5,
-  x.cuts = NULL,
-  y.cuts = NULL,
-  nfeatures = 2000,
-  verbose = TRUE,
-  ...
+    object,
+    assay = NULL,
+    slot = "scale.data",
+    features = NULL,
+    image = NULL,
+    selection.method = c('markvariogram', 'moransi'),
+    r.metric = 5,
+    x.cuts = NULL,
+    y.cuts = NULL,
+    nfeatures = 2000,
+    verbose = TRUE,
+    ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   features <- features %||% rownames(x = object[[assay]])
@@ -4043,13 +4047,13 @@ FindSpatiallyVariableFeatures.Seurat <- function(
 #' @export
 #'
 NormalizeData.default <- function(
-  object,
-  normalization.method = "LogNormalize",
-  scale.factor = 1e4,
-  margin = 1,
-  block.size = NULL,
-  verbose = TRUE,
-  ...
+    object,
+    normalization.method = "LogNormalize",
+    scale.factor = 1e4,
+    margin = 1,
+    block.size = NULL,
+    verbose = TRUE,
+    ...
 ) {
   CheckDots(...)
   if (is.null(x = normalization.method)) {
@@ -4150,12 +4154,12 @@ NormalizeData.default <- function(
 #' @method NormalizeData Assay
 #'
 NormalizeData.Assay <- function(
-  object,
-  normalization.method = "LogNormalize",
-  scale.factor = 1e4,
-  margin = 1,
-  verbose = TRUE,
-  ...
+    object,
+    normalization.method = "LogNormalize",
+    scale.factor = 1e4,
+    margin = 1,
+    verbose = TRUE,
+    ...
 ) {
   object <- SetAssayData(
     object = object,
@@ -4187,13 +4191,13 @@ NormalizeData.Assay <- function(
 #' }
 #'
 NormalizeData.Seurat <- function(
-  object,
-  assay = NULL,
-  normalization.method = "LogNormalize",
-  scale.factor = 1e4,
-  margin = 1,
-  verbose = TRUE,
-  ...
+    object,
+    assay = NULL,
+    normalization.method = "LogNormalize",
+    scale.factor = 1e4,
+    margin = 1,
+    verbose = TRUE,
+    ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -4243,20 +4247,20 @@ NormalizeData.Seurat <- function(
 #' @export
 #'
 ScaleData.default <- function(
-  object,
-  features = NULL,
-  vars.to.regress = NULL,
-  latent.data = NULL,
-  split.by = NULL,
-  model.use = 'linear',
-  use.umi = FALSE,
-  do.scale = TRUE,
-  do.center = TRUE,
-  scale.max = 10,
-  block.size = 1000,
-  min.cells.to.block = 3000,
-  verbose = TRUE,
-  ...
+    object,
+    features = NULL,
+    vars.to.regress = NULL,
+    latent.data = NULL,
+    split.by = NULL,
+    model.use = 'linear',
+    use.umi = FALSE,
+    do.scale = TRUE,
+    do.center = TRUE,
+    scale.max = 10,
+    block.size = 1000,
+    min.cells.to.block = 3000,
+    verbose = TRUE,
+    ...
 ) {
   CheckDots(...)
   features <- features %||% rownames(x = object)
@@ -4484,20 +4488,20 @@ ScaleData.default <- function(
 #' @method ScaleData Assay
 #'
 ScaleData.Assay <- function(
-  object,
-  features = NULL,
-  vars.to.regress = NULL,
-  latent.data = NULL,
-  split.by = NULL,
-  model.use = 'linear',
-  use.umi = FALSE,
-  do.scale = TRUE,
-  do.center = TRUE,
-  scale.max = 10,
-  block.size = 1000,
-  min.cells.to.block = 3000,
-  verbose = TRUE,
-  ...
+    object,
+    features = NULL,
+    vars.to.regress = NULL,
+    latent.data = NULL,
+    split.by = NULL,
+    model.use = 'linear',
+    use.umi = FALSE,
+    do.scale = TRUE,
+    do.center = TRUE,
+    scale.max = 10,
+    block.size = 1000,
+    min.cells.to.block = 3000,
+    verbose = TRUE,
+    ...
 ) {
   use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi)
   slot.use <- ifelse(test = use.umi, yes = 'counts', no = 'data')
@@ -4542,20 +4546,20 @@ ScaleData.Assay <- function(
 #' @method ScaleData Seurat
 #'
 ScaleData.Seurat <- function(
-  object,
-  features = NULL,
-  assay = NULL,
-  vars.to.regress = NULL,
-  split.by = NULL,
-  model.use = 'linear',
-  use.umi = FALSE,
-  do.scale = TRUE,
-  do.center = TRUE,
-  scale.max = 10,
-  block.size = 1000,
-  min.cells.to.block = 3000,
-  verbose = TRUE,
-  ...
+    object,
+    features = NULL,
+    assay = NULL,
+    vars.to.regress = NULL,
+    split.by = NULL,
+    model.use = 'linear',
+    use.umi = FALSE,
+    do.scale = TRUE,
+    do.center = TRUE,
+    scale.max = 10,
+    block.size = 1000,
+    min.cells.to.block = 3000,
+    verbose = TRUE,
+    ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -4955,13 +4959,13 @@ FindThresh <- function(call.list) {
 #' @importFrom sctransform get_residuals
 #
 GetResidualSCTModel <- function(
-  object,
-  assay,
-  SCTModel,
-  new_features,
-  clip.range,
-  replace.value,
-  verbose
+    object,
+    assay,
+    SCTModel,
+    new_features,
+    clip.range,
+    replace.value,
+    verbose
 ) {
   clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct
   model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel))
@@ -4970,14 +4974,14 @@ GetResidualSCTModel <- function(
   sct.method <-  SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default"
   scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data"))
   if (length(x = setdiff(x = model.cells, y =  scale.data.cells)) == 0) {
-  existing_features <- names(x = which(x = ! apply(
-    X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells],
-    MARGIN = 1,
-    FUN = anyNA)
-  ))
- } else {
-   existing_features <- character()
- }
+    existing_features <- names(x = which(x = ! apply(
+      X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells],
+      MARGIN = 1,
+      FUN = anyNA)
+    ))
+  } else {
+    existing_features <- character()
+  }
   if (replace.value) {
     features_to_compute <- new_features
   } else {
@@ -4991,7 +4995,7 @@ GetResidualSCTModel <- function(
   }
   if (!umi.assay %in% Assays(object = object)) {
     warning("The umi assay (", umi.assay, ") is not present in the object. ",
-             "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE)
+            "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE)
     return(NULL)
   }
   diff_features <- setdiff(x = features_to_compute, y = model.features)
@@ -5129,12 +5133,12 @@ NBResiduals <- function(fmla, regression.mat, gene, return.mode = FALSE) {
 #' @importFrom utils txtProgressBar setTxtProgressBar
 #
 RegressOutMatrix <- function(
-  data.expr,
-  latent.data = NULL,
-  features.regress = NULL,
-  model.use = NULL,
-  use.umi = FALSE,
-  verbose = TRUE
+    data.expr,
+    latent.data = NULL,
+    features.regress = NULL,
+    model.use = NULL,
+    use.umi = FALSE,
+    verbose = TRUE
 ) {
   # Do we bypass regression and simply return data.expr?
   bypass <- vapply(

From 5ca10699f0d45cfd7adc9d844e24b96f508946ce Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 31 May 2023 22:26:56 +0200
Subject: [PATCH 10/24] major fix for `.parquet` segmentations

---
 R/preprocessing.R | 223 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 158 insertions(+), 65 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index cfbb2eaaf..1d67359f0 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -1,7 +1,5 @@
 #' @include generics.R
 #' @importFrom progressr progressor
-#' @import dplyr
-#' @importFrom magrittr %>% %<>%
 #'
 NULL
 
@@ -2397,8 +2395,8 @@ ReadVitessce <- function(
 #' @param z Z-index to load; must be between 0 and 6, inclusive
 #' @param use.BiocParallel If to use \code{BiocParallel::bplapply}, 
 #' default is \code{TRUE}, if \code{FALSE}, uses \code{future} library
-#' @param workers.total Number of cores to use for \code{BiocParallel::bplapply}
-#' @param DTthreads.pct Set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
+#' @param workers.MulticoreParam Number of cores to use for \code{BiocParallel::bplapply}
+#' @param DTthreads.pct Optional, set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
 #' if set to \code{NULL} will use default setting as in \code{data.table::getDTthreads(verbose = T)}
 #' @param coord.space Type of molecule spatial coordinate space to use;
 #' choose one or more of:
@@ -2406,6 +2404,8 @@ ReadVitessce <- function(
 #'  \item \dQuote{pixel}: molecule coordinates in pixel space
 #'  \item \dQuote{micron}: molecule coordinates in micron space
 #' }
+#' @param use.furrr When working with lists, use \code{furrr} with \code{future} parallelization;
+#'  if \conde{FALSE} standard \code{purrr} will be used
 #' @param use.cellpose.out If \code{TRUE}, and \code{./Cellpose} folder exists, 
 #' will load results from current MERSCOPE Instrument output. Default to \code{TRUE}; 
 #' set it to \code{FALSE} if to use previous outputs (ie. non-Cellpose).
@@ -2431,6 +2431,13 @@ ReadVitessce <- function(
 #' }
 #'
 #' @importFrom future.apply future_lapply
+#' @import dplyr
+#' @import tibble
+#' @import purrr
+#' @import furrr
+#' @import magrittr
+#' @importFrom spatstat.geom is.empty
+#' @importFrom progressr progressor
 #'
 #' @export
 #'
@@ -2449,38 +2456,38 @@ ReadVizgen <- function(
     transcripts = NULL,
     spatial = NULL,
     molecules = NULL,
-    type = 'segmentations',
+    type = c('segmentations', 'centroids'),
     mol.type = 'microns',
     metadata = NULL,
     filter = NA_character_,	
     z = 3L,
     use.BiocParallel = TRUE,
-    workers.total = 12,
+    workers.MulticoreParam = 12,
     DTthreads.pct = NULL,
-    coord.space = "micron", 
+    coord.space = 'micron',
+    use.furrr = TRUE,
     use.cellpose.out = TRUE
 ) {
-  
   # TODO: handle multiple segmentations per z-plane
   
-  if (!requireNamespace("data.table", quietly = TRUE)) {
-    stop("Please install 'data.table' for this function")
-  }
-  if (!requireNamespace("BiocParallel", quietly = TRUE)) {
-    stop("Please install 'BiocParallel' for parallelization")
-  }
-  
-  if (!requireNamespace("sfarrow", quietly = TRUE)) {
-    stop("Please install 'sfarrow' for reading cell boundaries from `.parquet` files")
-  }
+  # packages that needs to be installed a priori
+  pkgs <- c("data.table", "arrow", "sfarrow",
+            "tidyverse", "furrr", "BiocParallel")
+  lapply(pkgs %>% length %>% seq, function(i) 
+  { !requireNamespace(pkgs[i], quietly = TRUE) } ) %>% 
+    unlist %>% 
+    { if (c(which(.) > 0) %>% any()) 
+    { c("Please install ->", "\n",
+        paste0("'", pkgs[which(.)], "'", collapse = ", "), " for this function") %>% 
+        stop(., call. = FALSE) } }
   
   # setting workers to use for parallel computing - `parallel` ----
   if (use.BiocParallel) {
     message("Using parallelization with: `BiocParallel`")
-    if (is.null(workers.total)) { 
-      workers.total <- quantile(BiocParallel::multicoreWorkers() %>% seq)[4] %>% ceiling
-      message(workers.total, " of total workers available will be used")
-    } else { message("Setting total workers to: ", workers.total) }   
+    if (is.null(workers.MulticoreParam)) { 
+      workers.MulticoreParam <- quantile(BiocParallel::multicoreWorkers() %>% seq)[4] %>% ceiling
+      message(workers.MulticoreParam, " of total workers available will be used")
+    } else { message("Setting total workers to: ", workers.MulticoreParam) }   
   } else { message("Using parallelization with: `future`", "\n",
                    "NOTE: set workers for parallelization, eg: `future::plan('multisession', workers = 10)`")
   }
@@ -2529,19 +2536,25 @@ ReadVizgen <- function(
   ))
   if (use.dir && !dir.exists(paths = data.dir)) {
     stop("Cannot find Vizgen directory ", data.dir)
+  } else { 
+    message("Reading data from:" , "\n", data.dir)
   }
   
+  # TODO:
+  # - optimize to find and read only ".parquet$" files
+  # - else read .hdf5 file from ./cell_boundaries dir
+  
   # use Cellpose output ----
   if (use.cellpose.out && grep("Cellpose", list.files(data.dir)) %>% any) {
     message("Cellpose output will be used..")
     # Identify input files
     files <- c(transcripts = transcripts %||% paste0(data.dir, 
                                                      paste0("/", list.files(data.dir, pattern = "Cellpose"), 
-                                                            "/cellpose_cell_by_gene.csv")),
+                                                            "/*cell_by_gene.csv")),
                
                spatial = spatial %||% paste0(data.dir, 
                                              paste0("/", list.files(data.dir, pattern = "Cellpose"), 
-                                                    "/cellpose_cell_metadata.csv")),
+                                                    "/*cell_metadata.csv")),
                
                molecules = molecules %||% "detected_transcripts[_a-zA-Z0-9]*.csv")
   } else { 
@@ -2610,9 +2623,6 @@ ReadVizgen <- function(
           immediate. = TRUE
         )
         FALSE
-      } else if (!dir.exists(paths = h5dir) && !use.cellpose.out) {
-        warning("Cannot find cell boundary H5 files", immediate. = TRUE)
-        FALSE
       } else if (use.cellpose.out) { # added for Cellpose output ----
         files2scan <-
           list.files(data.dir, pattern = ".parquet$", 
@@ -2621,7 +2631,7 @@ ReadVizgen <- function(
                      recursive = TRUE)
         if (length(files2scan)) { 
           message("Cell segmentations are found in `.parquet` file(s)", "\n", 
-                  "..using ", coord.space, " space coordinates")
+                  ">>> using ", coord.space, " space coordinates")
         }
         TRUE
       }
@@ -2733,70 +2743,151 @@ ReadVizgen <- function(
                          grep(".parquet$", ., value = TRUE)
                      } else { (.) } }
                  }
-                 # read .parquet file ----
+                 
+                 # Read .parquet file
                  parq <- sfarrow::st_read_parquet(file2read)
                  
                  # get all cell segmentations
-                 segs <- filter(parq, ZIndex == z) %>% pull(Geometry)
+                 segs <- filter(parq, ZIndex == z) %>% 
+                   pull(Geometry) %>%
+                   # add cell ID
+                   set_names(filter(parq, ZIndex == z) %>% 
+                               pull(EntityID) %>% as.character)
+                 
+                 ## Sanity checks on segmentation polygons - part 1 ----
+                 test.segs <- lapply(segs %>% seq, 
+                                     function(i) segs[[i]] %>% length) %>% unlist()
+                 if (any(test.segs > 1)) {
+                   segs.art.index <- which(test.segs > 1)
+                   segs.empty.index <- which(test.segs < 1)
+                   message("Sanity checks on cell segmentation polygons:", "\n", 
+                           ">>> found ", segs.art.index %>% length,
+                           " cells with > 1 (nested) polygon lists:", "\n",
+                           ">>> flattening polygon lists", "\n",
+                           ">>> removing ", segs.empty.index %>% length, 
+                           " empty polygon lists")
+                   
+                   # step 1 - find polygon nested lists with > 1 length
+                   # (if exists) get originally planned future session
+                   if (is(future::plan(), "multisession")) {
+                     orig.plan <- future::plan()
+                   }
+                   segs.1 <- segs %>% 
+                     purrr::keep(., c(purrr::map(., length) %>% unlist > 1)) %>%
+                     # flatten each list
+                     { 
+                       if (use.furrr) {
+                         # set temporary workers
+                         f.plan <- future::plan("multisession", workers = 4L, gc = TRUE)
+                         #on.exit(f.plan %>% future::plan()) # exiting doesn't to work with furrr
+                         # using furrr (ie, faster purrr with future) 
+                         furrr::future_map(., purrr::flatten)
+                       } else {
+                         purrr::map(., purrr::flatten)
+                       }
+                       # since furrr loads future, hide startup messages
+                     } %>% suppressPackageStartupMessages()
+                   # set back originally plannned session
+                   if (exists("orig.plan")) { 
+                     future::plan(orig.plan)
+                   } else { 
+                     #..or plan sequential
+                     future::plan("sequential")
+                   }
+                   
+                   # step 2 - apply multiple filtering  
+                   segs.2 <- 
+                     segs %>%
+                     # remove empty elements               
+                     purrr::keep(., !c(purrr::map(., length) %>% unlist < 1)) %>%
+                     # keep lists with length == 1 polygon information      
+                     purrr::keep(., c(purrr::map(., length) %>% unlist == 1)) %>%
+                     # collapse to a list
+                     purrr::flatten(.)
+                   
+                   # step 3 - combine segmentaion lists     
+                   segs <- c(segs.1, segs.2) 
+                 }
+                 
+                 ## Sanity checks on segmentation polygons - part 2 ----
                  # check if any cells have > 1 segmentation boundary
-                 test.segs <- 
-                   lapply(segs %>% seq, function(i) segs[[i]][[1]] %>% length)
-                 if (which(unlist(test.segs) > 1) %>% any) {
-                   segs.art.index <- which(unlist(test.segs) > 1)
-                   message(segs.art.index %>% length, 
-                           " Cells have > 1 segmentaion boundary artifacts", "\n",
-                           "..removing artifacts", "\n",
-                           "..keeping cell boundaries with maximum coords")
+                 test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()
+                 if (any(test.segs %>% unlist > 1)) {
+                   segs.art.index <- which(test.segs %>% unlist > 1)
+                   message("Found ", segs.art.index %>% length,
+                           c(" cells with > 1 polygon artifacts:",
+                             ">>> removing artifacts",
+                             ">>> keeping cell boundary with maximum coords") %>% 
+                             paste0(., "\n"))
                    # usually artifacts have small boundaries/coords
                    # find cell boundaries with maximum coords
+                   
+                   # TODO: (optionally)
+                   # - calculate geometric properties: 
+                   # eg, use sphericity to keep single polygon with high sphericity values
+                   
                    for (i in segs.art.index %>% seq) { 
-                     dims <- lapply(segs[[segs.art.index[i]]][[1]] %>% seq(), 
-                                    function(d) { dim(segs[[segs.art.index[i]]][[1]][[d]])[1] } )
+                     dims <- lapply(segs[[segs.art.index[i]]] %>% seq(), 
+                                    function(d) { dim(segs[[segs.art.index[i]]][[d]])[1] } )
                      # get & keep boundaries with maximum coords
-                     maxs.segs <- which(unlist(dims) == max(unlist(dims)))  
-                     segs[[segs.art.index[i]]][[1]] <- segs[[segs.art.index[i]]][[1]][maxs.segs]
+                     maxs.segs <- which(unlist(dims) == max(unlist(dims)))
+                     segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][maxs.segs]
                    }
-                 } else { message("All cells have 1 segmentaion boundary (no artifacts)") }
-                 # add cell names 
-                 names(segs) <- filter(parq, ZIndex == z) %>% pull(EntityID) %>% as.character       
+                 } else { message("All cells have single polygon boundary (no artifacts)") }                 
                  
+                 # some cells might have > 1 polygon boundary with identical lenght
+                 #..in this case, keep only the 1st polygon boundary
+                 test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()                
+                 if (any(test.segs > 1)) {
+                   segs.art.index <- which(test.segs > 1)
+                   message("Additionally found ", segs.art.index %>% length,
+                           c(" cells with > 1 polygons (identical length):",
+                             ">>> only the 1st polygon boundary will be kept") %>% 
+                             paste0(., "\n"))
+                   for (i in segs.art.index %>% seq) { 
+                     # TODO: (optionally)
+                     # - calculate geometric properties: 
+                     # here too, use sphericity to keep single polygon with high sphericity values
+                     segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][1]
+                   }
+                 }              
+                 
+                 ## Extract cell boundaries per cells
                  # TODO: (optionally) resample & make cell boundaries equidistant!  
+
                  if (use.BiocParallel) {
-                   # extract cell boundaries per cells (faster)
                    gc() %>% invisible() # free up memory
-                   message("Using fast extraction of cell boundaries..")  
+                   message("Extracting cell segmentations - using `BiocParallel`")  
                    segs_list <-
                      BiocParallel::bplapply(segs %>% seq,
                                             function(i) {
                                               segs[[i]][[1]] %>% 
-                                                data.table::as.data.table(x = .) %>%
+                                                data.table::as.data.table(.) %>%
                                                 #as.data.frame %>%
                                                 mutate(cell = names(segs)[i]) },
-                                            BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                                            BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, 
+                                                                                   tasks = 50L, 
                                                                                    force.GC = FALSE, 
                                                                                    progressbar = TRUE)
                      )
                  } else {
-                   # extract cell boundaries per cells
-                   message("Extracting of cell boundaries..")    
+                   message("Extracting cell segmentations - using `future`")    
                    segs_list <-
                      future.apply::future_lapply(segs %>% seq,
                                                  function(i) {
-                                                   segs[[i]][[1]] %>%
-                                                     data.table::as.data.table(x = .) %>%
+                                                   segs[[i]] %>%
+                                                     data.table::as.data.table(.) %>%
                                                      #as.data.frame %>% 
                                                      mutate(cell = names(segs)[i])
-                                                   }
-                                                 )
+                                                 }
+                     )
                  }
                  
-                 #segs_list %>% length
                  # df of all cell segmentations
-                 gc() %>% invisible() # free up memory
-                 #segs <- do.call("rbind", segs_list)
-                 segs <- data.table::rbindlist(segs_list)
-                 names(segs)[1:2] <- c("x", "y") 
-                 message("All cell boundaries (with no artifacts) are loaded..")        
+                 segs <- 
+                   data.table::rbindlist(segs_list) %>% 
+                   data.table::setnames(c("x", "y", "cell"))
+                 message("All cell segmentations are loaded..")        
                  segs
                } else { # use non-Cellpose segmentations
                  ppoly <- progressor(steps = length(x = unique(x = sp$fov)))
@@ -2827,12 +2918,12 @@ ReadVizgen <- function(
                        }, error = function(...) {
                          return(NULL)
                        }))
-                     }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                     }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
                                                                force.GC = FALSE, 
                                                                progressbar = TRUE))   
                      ppoly()
                      return(do.call(what = "rbind", args = df))
-                   }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                   }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
                                                              force.GC = FALSE, 
                                                              progressbar = TRUE))
                  } else { 
@@ -2891,7 +2982,7 @@ ReadVizgen <- function(
                    df <- df[c(1, 3, 4, 2), , drop = FALSE]
                    pbox()
                    return(df)
-                 }, BPPARAM = BiocParallel::MulticoreParam(workers.total, tasks = 100L, 
+                 }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
                                                            force.GC = FALSE, progressbar = TRUE)
                  )
                } else {
@@ -2933,7 +3024,9 @@ ReadVizgen <- function(
   }
   
   # add z-slice index for cells ----
-  outs$zIndex <- data.frame(z = rep_len(z, length.out = outs$centroids$cell %>% length), cell = outs$centroids$cell)
+  outs$zIndex <- 
+    data.frame(z = rep_len(z, length.out = outs$centroids %>% pull(cell) %>% length), 
+               cell = outs$centroids %>% pull(cell))
   
   return(outs)
 }

From 9389ce99c6ed892ef8a7019fa4a530c3b7a6bf09 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 31 May 2023 22:28:33 +0200
Subject: [PATCH 11/24] fix for `LoadVizgen`

---
 R/convenience.R | 165 +++++++++++++++++++++++++++---------------------
 1 file changed, 94 insertions(+), 71 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 5e5c16bfb..3e99f0dbd 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -1,6 +1,5 @@
 #' @include generics.R
 #' @include visualization.R
-#' @importFrom magrittr %>% %<>%
 #'
 NULL
 
@@ -21,11 +20,11 @@ NULL
 #' @rdname ReadAkoya
 #'
 LoadAkoya <- function(
-  filename,
-  type = c('inform', 'processor', 'qupath'),
-  fov,
-  assay = 'Akoya',
-  ...
+    filename,
+    type = c('inform', 'processor', 'qupath'),
+    fov,
+    assay = 'Akoya',
+    ...
 ) {
   # read in matrix and centroids
   data <- ReadAkoya(filename = filename, type = type)
@@ -116,7 +115,7 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
     assay = assay
   )
   obj <- CreateSeuratObject(counts = data$matrix, assay = assay)
-
+  
   # subset both object and coords based on the cells shared by both
   cells <- intersect(
     Cells(x = coords, boundary = "segmentation"),
@@ -136,6 +135,7 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 #'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
 #' CreateSegmentation CreateSeuratObject
+#' @import dplyr
 #'
 #' @export
 #'
@@ -146,17 +146,32 @@ LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
                        ...) {
   data <- ReadVizgen(data.dir = data.dir, ...)
   
-  # if "segmentations" are not present, use cell bounding boxes instead
-  if (!"segmentations" %in% names(data)) { 
-    bound.boxes <- CreateSegmentation(data[["boxes"]])
-    cents <- CreateCentroids(data[["centroids"]])
-    bound.boxes.data <- list(centroids = cents, boxes = bound.boxes)
-    message("Creating FOVs", "\n",
-            "..using box coordinates instead of segmentations")  
-    coords <- CreateFOV(coords = bound.boxes.data, type = c("boxes",
-                                                            "centroids"), molecules = data[[mol.type]], assay = assay)
-    message("Creating Seurat object")  
-    obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
+  #gc() %>% invisible()
+  message("Creating Seurat object..")  
+  obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
+  
+  # in case no segmentation is present, use boxes
+  if (!"segmentations" %in% names(data)) {
+    if ("boxes" %in% names(data)) {
+      bound.boxes <- CreateSegmentation(data[["boxes"]])
+      cents <- CreateCentroids(data[["centroids"]])
+      bound.boxes.data <- list(centroids = cents, 
+                               boxes = bound.boxes)
+      message("Creating FOVs..", "\n",
+              ">>> using box coordinates instead of segmentations")
+      coords <- CreateFOV(coords = bound.boxes.data, 
+                          type = c("boxes", "centroids"), 
+                          molecules = data[[mol.type]], 
+                          assay = assay)
+    } else { # in case no segmentation & no boxes are present, use centroids only
+      cents <- CreateCentroids(data[["centroids"]])
+      message("Creating FOVs..", "\n",
+              ">>> using only centroids")
+      coords <- CreateFOV(coords = list(centroids = cents), 
+                          type = c("centroids"),
+                          molecules = data[[mol.type]], 
+                          assay = assay)
+    }
     # only consider the cells we have counts and a segmentation for
     # Cells which don't have a segmentation are probably found in other z slices.
     coords <- subset(x = coords, 
@@ -166,24 +181,22 @@ LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
     segs <- CreateSegmentation(data[["segmentations"]])
     cents <- CreateCentroids(data[["centroids"]])
     segmentations.data <- list(centroids = cents, segmentation = segs)
-    message("Creating FOVs", "\n", 
-            "..using segmentations")    
-    coords <- CreateFOV(coords = segmentations.data, type = c("segmentation",
-                                                              "centroids"), molecules = data[[mol.type]], assay = assay)
-    message("Creating Seurat object..")  
-    obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
-    # only consider the cells we have counts and a segmentation for
-    # Cells which don't have a segmentation are probably found in other z slices.
+    message("Creating FOVs..", "\n", 
+            ">>> using segmentations")    
+    coords <- CreateFOV(coords = segmentations.data, 
+                        type = c("segmentation", "centroids"), 
+                        molecules = data[[mol.type]], 
+                        assay = assay)
     coords <- subset(x = coords,
                      cells = intersect(x = Cells(x = coords[["segmentation"]]),
                                        y = Cells(x = obj)))    
   }
   
   # add z-stack index for cells
-  if (add.zIndex) { obj$z <- data$zIndex$z }
+  if (add.zIndex) { obj$z <- data$zIndex %>% pull(z) }
   
   # add metadata vars
-  message("..adding metadata infos")  
+  message(">>> adding metadata infos")  
   if (c("metadata" %in% names(data))) {
     metadata <- match.arg(arg = "metadata", choices = names(data), several.ok = TRUE)
     meta.vars <- names(data[[metadata]])
@@ -196,16 +209,26 @@ LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
   # sanity on fov name
   fov %<>% gsub("_|-", ".", .) 
   
-  #message("..adding FOV")
-  # add coords to seurat object
+  message(">>> adding FOV")
   obj[[fov]] <- coords
   
+  ## filter - keep cells with counts > 0
+  # small helper function to return metadata
+  callmeta <- function (object = NULL) { return(object@meta.data) }
+  nCount <- grep("nCount", callmeta(obj) %>% names, value = TRUE)
+  if (any(obj[[nCount]] > 0)) {
+    message(">>> filtering object - keep cells with counts > 0")
+    obj %<>% subset(subset = !!base::as.symbol(nCount) > 0)
+  }
+  
   if (update.object) { 
     message("Updating object..")
     obj %<>% UpdateSeuratObject() } 
   
-  #message("Object is ready!")    
+  message("Object is ready!")    
   return(obj)
+  
+  gc() %>% invisible()
 }
 
 #' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object
@@ -226,7 +249,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     data.dir = data.dir,
     type = c("centroids", "segmentations"),
   )
-
+  
   segmentations.data <- list(
     "centroids" = CreateCentroids(data$centroids),
     "segmentation" = CreateSegmentation(data$segmentations)
@@ -237,7 +260,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     molecules = data$microns,
     assay = assay
   )
-
+  
   xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
   if("Blank Codeword" %in% names(data$matrix))
     xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
@@ -245,7 +268,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Unassigned Codeword"]])
   xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
   xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])
-
+  
   xenium.obj[[fov]] <- coords
   return(xenium.obj)
 }
@@ -279,27 +302,27 @@ PCAPlot <- function(object, ...) {
 #' @export
 #'
 SpatialDimPlot <- function(
-  object,
-  group.by = NULL,
-  images = NULL,
-  cols = NULL,
-  crop = TRUE,
-  cells.highlight = NULL,
-  cols.highlight = c('#DE2D26', 'grey50'),
-  facet.highlight = FALSE,
-  label = FALSE,
-  label.size = 7,
-  label.color = 'white',
-  repel = FALSE,
-  ncol = NULL,
-  combine = TRUE,
-  pt.size.factor = 1.6,
-  alpha = c(1, 1),
-  image.alpha = 1,
-  stroke = 0.25,
-  label.box = TRUE,
-  interactive = FALSE,
-  information = NULL
+    object,
+    group.by = NULL,
+    images = NULL,
+    cols = NULL,
+    crop = TRUE,
+    cells.highlight = NULL,
+    cols.highlight = c('#DE2D26', 'grey50'),
+    facet.highlight = FALSE,
+    label = FALSE,
+    label.size = 7,
+    label.color = 'white',
+    repel = FALSE,
+    ncol = NULL,
+    combine = TRUE,
+    pt.size.factor = 1.6,
+    alpha = c(1, 1),
+    image.alpha = 1,
+    stroke = 0.25,
+    label.box = TRUE,
+    interactive = FALSE,
+    information = NULL
 ) {
   return(SpatialPlot(
     object = object,
@@ -332,22 +355,22 @@ SpatialDimPlot <- function(
 #' @export
 #'
 SpatialFeaturePlot <- function(
-  object,
-  features,
-  images = NULL,
-  crop = TRUE,
-  slot = 'data',
-  keep.scale = "feature",
-  min.cutoff = NA,
-  max.cutoff = NA,
-  ncol = NULL,
-  combine = TRUE,
-  pt.size.factor = 1.6,
-  alpha = c(1, 1),
-  image.alpha = 1,
-  stroke = 0.25,
-  interactive = FALSE,
-  information = NULL
+    object,
+    features,
+    images = NULL,
+    crop = TRUE,
+    slot = 'data',
+    keep.scale = "feature",
+    min.cutoff = NA,
+    max.cutoff = NA,
+    ncol = NULL,
+    combine = TRUE,
+    pt.size.factor = 1.6,
+    alpha = c(1, 1),
+    image.alpha = 1,
+    stroke = 0.25,
+    interactive = FALSE,
+    information = NULL
 ) {
   return(SpatialPlot(
     object = object,

From dc029acbe9066a0a41f5fbdd786c14691bb55410 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Tue, 6 Jun 2023 11:22:08 +0200
Subject: [PATCH 12/24] major fix for `ReadVizgen()`

..reads old-new data, optimized parallelization, few bugs fixed
---
 R/preprocessing.R | 345 +++++++++++++++++++++++++++-------------------
 1 file changed, 207 insertions(+), 138 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 1d67359f0..9bd1a8ef1 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2398,17 +2398,9 @@ ReadVitessce <- function(
 #' @param workers.MulticoreParam Number of cores to use for \code{BiocParallel::bplapply}
 #' @param DTthreads.pct Optional, set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
 #' if set to \code{NULL} will use default setting as in \code{data.table::getDTthreads(verbose = T)}
-#' @param coord.space Type of molecule spatial coordinate space to use;
-#' choose one or more of:
-#' \itemize{
-#'  \item \dQuote{pixel}: molecule coordinates in pixel space
-#'  \item \dQuote{micron}: molecule coordinates in micron space
-#' }
 #' @param use.furrr When working with lists, use \code{furrr} with \code{future} parallelization;
 #'  if \conde{FALSE} standard \code{purrr} will be used
-#' @param use.cellpose.out If \code{TRUE}, and \code{./Cellpose} folder exists, 
-#' will load results from current MERSCOPE Instrument output. Default to \code{TRUE}; 
-#' set it to \code{FALSE} if to use previous outputs (ie. non-Cellpose).
+#' @param verbose If to print processing messages using \code{message}; default is to \code{FALSE}
 #'
 #' @return \code{ReadVizgen}: A list with some combination of the
 #' following values:
@@ -2456,7 +2448,7 @@ ReadVizgen <- function(
     transcripts = NULL,
     spatial = NULL,
     molecules = NULL,
-    type = c('segmentations', 'centroids'),
+    type = c('centroids', 'segmentations'),
     mol.type = 'microns',
     metadata = NULL,
     filter = NA_character_,	
@@ -2464,11 +2456,11 @@ ReadVizgen <- function(
     use.BiocParallel = TRUE,
     workers.MulticoreParam = 12,
     DTthreads.pct = NULL,
-    coord.space = 'micron',
     use.furrr = TRUE,
-    use.cellpose.out = TRUE
+    verbose = FALSE
 ) {
   # TODO: handle multiple segmentations per z-plane
+  # NOTE: this is only needed when segmentations differ between z-planes
   
   # packages that needs to be installed a priori
   pkgs <- c("data.table", "arrow", "sfarrow",
@@ -2481,25 +2473,37 @@ ReadVizgen <- function(
         paste0("'", pkgs[which(.)], "'", collapse = ", "), " for this function") %>% 
         stop(., call. = FALSE) } }
   
-  # setting workers to use for parallel computing - `parallel` ----
+  # setting workers to use for parallelization - `BiocParallel` ----
   if (use.BiocParallel) {
-    message("Using parallelization with: `BiocParallel`")
+    if (verbose) { message("Using parallelization with: `BiocParallel`") }
     if (is.null(workers.MulticoreParam)) { 
       workers.MulticoreParam <- quantile(BiocParallel::multicoreWorkers() %>% seq)[4] %>% ceiling
-      message(workers.MulticoreParam, " of total workers available will be used")
-    } else { message("Setting total workers to: ", workers.MulticoreParam) }   
-  } else { message("Using parallelization with: `future`", "\n",
-                   "NOTE: set workers for parallelization, eg: `future::plan('multisession', workers = 10)`")
-  }
+      if (verbose) { message(workers.MulticoreParam, " of total workers available will be used") }
+    } else { if (verbose) { message("Setting total workers to: ", workers.MulticoreParam) } }   
+  } else { if (verbose) { message("Using parallelization with: `future`", "\n",
+                                  "NOTE: set workers for parallelization, eg: `future::plan('multisession', workers = 10)`") }
+  }
+  # support parallelization on unix and windows
+  # credit to https://github.com/Bioconductor/BiocParallel/issues/98
+  BPParam <-
+    if (.Platform$OS.type == "windows") {
+      if (verbose) { message("Using parallelization for Windows with: `BiocParallel::SerialParam`") }
+      BiocParallel::SerialParam(force.GC = FALSE, progressbar = TRUE)
+    } else {
+      BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
+                                   force.GC = FALSE, progressbar = TRUE)
+    }
   
-  # setting cores to use for parallel computing - `data.table`
+  # (optional) 
+  # setting additional cores to use for parallelization in `data.table`
+  # might allow to read large files faster
   if (!is.null(DTthreads.pct)) {
-    message("Using parallelization with: `data.table`", "\n",
-            "..for `data.table::fread`")
+    if (verbose) { message("Using parallelization with: `data.table`", "\n",
+                           "..for `data.table::fread`") }
     data.table::setDTthreads(threads = 0) # all cores
     DTthreads <- data.table::getDTthreads() # max cores
     DTthreads <- c((DTthreads * DTthreads.pct) / 100) %>% ceiling # percentage from total threads
-    message("Setting DTthreads to: ", DTthreads, " (", paste0(DTthreads.pct, "%"), ")")
+    if (verbose) { message("Setting DTthreads to: ", DTthreads, " (", paste0(DTthreads.pct, "%"), ")") }
     data.table::setDTthreads(threads = DTthreads) # set
   }
   
@@ -2537,31 +2541,66 @@ ReadVizgen <- function(
   if (use.dir && !dir.exists(paths = data.dir)) {
     stop("Cannot find Vizgen directory ", data.dir)
   } else { 
-    message("Reading data from:" , "\n", data.dir)
+    if (verbose) { message("Reading data from:" , "\n", data.dir) }
   }
   
-  # TODO:
-  # - optimize to find and read only ".parquet$" files
-  # - else read .hdf5 file from ./cell_boundaries dir
+  # Use segmentation output from ".parquet" file ----
+  # check if file exists
+  parq <-
+    # look in the current directory
+    list.files(data.dir, 
+               pattern = ".parquet$",
+               full.names = TRUE) %>%
+    { if (is.empty(.)) { 
+      # look in the sub directory (if nothing is found)
+      list.files(data.dir,
+                 pattern = ".parquet$",
+                 full.names = TRUE,
+                 recursive = TRUE)
+    } else { (.) }}
+  # set to use .parquet" file if present
+  use.parquet <- 
+    parq %>% length() %>% any
   
-  # use Cellpose output ----
-  if (use.cellpose.out && grep("Cellpose", list.files(data.dir)) %>% any) {
-    message("Cellpose output will be used..")
-    # Identify input files
-    files <- c(transcripts = transcripts %||% paste0(data.dir, 
-                                                     paste0("/", list.files(data.dir, pattern = "Cellpose"), 
-                                                            "/*cell_by_gene.csv")),
-               
-               spatial = spatial %||% paste0(data.dir, 
-                                             paste0("/", list.files(data.dir, pattern = "Cellpose"), 
-                                                    "/*cell_metadata.csv")),
-               
-               molecules = molecules %||% "detected_transcripts[_a-zA-Z0-9]*.csv")
-  } else { 
-    files <- c(transcripts = transcripts %||% "cell_by_gene[_a-zA-Z0-9]*.csv",
-               spatial = spatial %||% "cell_metadata[_a-zA-Z0-9]*.csv",
-               molecules = molecules %||% "detected_transcripts[_a-zA-Z0-9]*.csv")
-  }
+  if (use.parquet) {
+    if (verbose) { message("Cell segmentations are found in `.parquet` file", "\n", 
+                           ">>> using ", gsub("s", "", mol.type), " space coordinates") }}
+  # Identify input files..
+  # if no files are found in the current directory..
+  #..look for them in the sub directory
+  files <- c(transcripts = transcripts %||% 
+               list.files(data.dir, 
+                          pattern = "cell_by_gene",
+                          full.names = TRUE) %>%
+               { if (is.empty(.)) { 
+                 list.files(data.dir,
+                            pattern = "cell_by_gene",
+                            full.names = TRUE,
+                            recursive = TRUE)
+               } else { (.) }},
+             
+             spatial = spatial %||% 
+               list.files(data.dir, 
+                          pattern = "cell_metadata",
+                          full.names = TRUE) %>%
+               { if (is.empty(.)) { 
+                 list.files(data.dir,
+                            pattern = "cell_metadata",
+                            full.names = TRUE,
+                            recursive = TRUE)
+               } else { (.) }},
+             
+             molecules = molecules %||% 
+               list.files(data.dir, 
+                          pattern = "detected_transcripts",
+                          full.names = TRUE) %>%
+               { if (is.empty(.)) { 
+                 list.files(data.dir,
+                            pattern = "detected_transcripts",
+                            full.names = TRUE,
+                            recursive = TRUE)
+               } else { (.) }}
+  )
   
   files[is.na(x = files)] <- NA_character_
   h5dir <- file.path(
@@ -2614,24 +2653,25 @@ ReadVizgen <- function(
     )
     pprecoord(type = 'finish')
     rownames(x = sp) <- as.character(x = sp[, 1])
-    sp <- sp[, -1, drop = FALSE]
+    #sp <- sp[, -1, drop = FALSE]
+    if ((names(sp) == "transcript_count") %>% any) {
+      if (verbose) { message(">>> filtering `cell_metadata` - keep cells with `transcript_count` > 0") }
+      sp %<>% select(-1) %>% filter(transcript_count > 0)
+    } else { sp %<>% select(-1) }
+    
     # Check to see if we should load segmentations
     if ('segmentations' %in% type) {
-      poly <- if (isFALSE(x = hdf5) && !use.cellpose.out) {
+      poly <- if (isFALSE(x = hdf5) && !use.parquet) {
         warning(
           "Cannot find hdf5r; unable to load segmentation vertices",
           immediate. = TRUE
         )
         FALSE
-      } else if (use.cellpose.out) { # added for Cellpose output ----
-        files2scan <-
-          list.files(data.dir, pattern = ".parquet$", 
-                     all.files = TRUE, 
-                     full.names = TRUE, 
-                     recursive = TRUE)
-        if (length(files2scan)) { 
-          message("Cell segmentations are found in `.parquet` file(s)", "\n", 
-                  ">>> using ", coord.space, " space coordinates")
+      } else if (!dir.exists(paths = h5dir) && !use.parquet) {
+        warning("Cannot find cell boundary H5 files", immediate. = TRUE)
+        FALSE
+      } else if (use.parquet) { # for non .hdf5 files
+        if (length(parq)) {
         }
         TRUE
       }
@@ -2700,19 +2740,37 @@ ReadVizgen <- function(
                tx <- data.table::fread(file = files[[otype]], sep = ",",
                                        data.table = FALSE, verbose = FALSE)
                rownames(x = tx) <- as.character(x = tx[, 1])
-               tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE]))
-               if (!is.na(x = filter)) {
-                 ptx(message = paste("Filtering genes with pattern",
-                                     filter), class = "sticky", amount = 0)
-                 tx <- tx[!grepl(pattern = filter, x = rownames(x = tx)),
-                          , drop = FALSE]
+               # avoid converting to dense matrix?
+               #tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE]))
+               
+               # keep cells with `transcript_count` > 0
+               if (verbose) { message(">>> filtering `cell_by_gene` - keep cells with counts > 0") }
+               tx %<>% select(-1) %>%
+                 filter_all(any_vars(. > 0)) %>% {
+                   if ((names(sp) == "transcript_count") %>% any) {
+                     # match cells to filtered data from spatial (cell_metadata)
+                     filter(., rownames(.) %in% rownames(sp)) 
+                   } else { (.) } } %>% # return filtered count data
+                 t() %>% as.sparse()
+               
+               # filter cell metadata df
+               # match filtered cell IDs from count matrix to cell metadata df
+               if (!(names(sp) == "transcript_count") %>% any) {
+                 sp %<>% filter(rownames(.) %in% colnames(tx))
                }
+               
                ratio <- getOption(x = "Seurat.input.sparse_ratio",
                                   default = 0.4)
                if ((sum(tx == 0)/length(x = tx)) > ratio) {
-                 ptx(message = "Converting counts to sparse matrix",
+                 ptx(message = "Counts are converted to sparse matrix",
                      class = "sticky", amount = 0)
-                 tx <- as.sparse(x = tx)
+                 #tx <- as.sparse(x = tx)
+               }
+               if (!is.na(x = filter)) {
+                 ptx(message = paste("Filtering genes with pattern",
+                                     filter), class = "sticky", amount = 0)
+                 tx <- tx[!grepl(pattern = filter, x = rownames(x = tx)),
+                          , drop = FALSE]
                }
                ptx(type = "finish")
                tx
@@ -2725,27 +2783,32 @@ ReadVizgen <- function(
                data.frame(x = sp$center_x, y = sp$center_y, cell = rownames(x = sp),
                           stringsAsFactors = FALSE)
                
-               # use Cellpose segmentations
+               # use segmentations from ".parquet"
              }, segmentations = {
-               if (use.cellpose.out) { 
-                 files2scan <-
-                   list.files(data.dir, pattern = ".parquet$", 
-                              all.files = TRUE, 
-                              full.names = TRUE, 
-                              recursive = TRUE)
-                 if (length(files2scan)) {
-                   file2read <- 
-                     files2scan %>%
-                     grep(coord.space, ., value = TRUE) %>% 
+               if (use.parquet) {
+                 if (length(parq) > 1) {
+                   # eg, if two files are present:
+                   # `cellpose_micron_space.parquet`
+                   # `cellpose_mosaic_space.parquet`
+                   parq %<>% { 
+                     if (mol.type == "pixels") {
+                       # for `cellpose_mosaic_space.parquet`
+                       grep("mosaic", ., value = TRUE)
+                     } else if (mol.type == "microns") {
+                       # for `cellpose_micron_space.parquet`
+                       grep(gsub("s", "", mol.type), ., value = TRUE)
+                     }
+                     } %>%
                      { if (is.empty(.)) {
-                       # support only single .parquet file
-                       files2scan %>%
+                       # only if single ".parquet" file present
+                       # eg, `cell_boundaries.parquet`
+                       parq %>%
                          grep(".parquet$", ., value = TRUE)
                      } else { (.) } }
                  }
                  
                  # Read .parquet file
-                 parq <- sfarrow::st_read_parquet(file2read)
+                 parq %<>% sfarrow::st_read_parquet(.)
                  
                  # get all cell segmentations
                  segs <- filter(parq, ZIndex == z) %>% 
@@ -2760,15 +2823,18 @@ ReadVizgen <- function(
                  if (any(test.segs > 1)) {
                    segs.art.index <- which(test.segs > 1)
                    segs.empty.index <- which(test.segs < 1)
-                   message("Sanity checks on cell segmentation polygons:", "\n", 
-                           ">>> found ", segs.art.index %>% length,
-                           " cells with > 1 (nested) polygon lists:", "\n",
-                           ">>> flattening polygon lists", "\n",
-                           ">>> removing ", segs.empty.index %>% length, 
-                           " empty polygon lists")
+                   if (verbose) { message("Sanity checks on cell segmentation polygons:", "\n", 
+                                          ">>> found ", segs.art.index %>% length,
+                                          " cells with > 1 (nested) polygon lists:", "\n",
+                                          ">>> flattening polygon lists", "\n",
+                                          if (c(segs.empty.index %>% length) > 0) {
+                                            paste0(">>> removing ", 
+                                                   segs.empty.index %>% length,
+                                                   " empty polygon lists") }
+                   ) }
                    
                    # step 1 - find polygon nested lists with > 1 length
-                   # (if exists) get originally planned future session
+                   # get original planned session if exists
                    if (is(future::plan(), "multisession")) {
                      orig.plan <- future::plan()
                    }
@@ -2780,18 +2846,17 @@ ReadVizgen <- function(
                          # set temporary workers
                          f.plan <- future::plan("multisession", workers = 4L, gc = TRUE)
                          #on.exit(f.plan %>% future::plan()) # exiting doesn't to work with furrr
-                         # using furrr (ie, faster purrr with future) 
+                         # using furrr (faster purrr with future) 
                          furrr::future_map(., purrr::flatten)
                        } else {
                          purrr::map(., purrr::flatten)
                        }
-                       # since furrr loads future, hide startup messages
                      } %>% suppressPackageStartupMessages()
-                   # set back originally plannned session
+                   # set originally plannned session back
                    if (exists("orig.plan")) { 
                      future::plan(orig.plan)
                    } else { 
-                     #..or plan sequential
+                     # or plan sequential
                      future::plan("sequential")
                    }
                    
@@ -2811,20 +2876,23 @@ ReadVizgen <- function(
                  
                  ## Sanity checks on segmentation polygons - part 2 ----
                  # check if any cells have > 1 segmentation boundary
+                 # TODO: (optionally) optimize sanity code using purrr?
                  test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()
                  if (any(test.segs %>% unlist > 1)) {
                    segs.art.index <- which(test.segs %>% unlist > 1)
-                   message("Found ", segs.art.index %>% length,
-                           c(" cells with > 1 polygon artifacts:",
-                             ">>> removing artifacts",
-                             ">>> keeping cell boundary with maximum coords") %>% 
-                             paste0(., "\n"))
+                   if (verbose) { 
+                     message("Found ", segs.art.index %>% length,
+                             c(" cells with > 1 polygon artifacts:",
+                               ">>> removing artifacts",
+                               ">>> keeping cell boundary with maximum coords") %>% 
+                               paste0(., "\n"))
+                   }
                    # usually artifacts have small boundaries/coords
                    # find cell boundaries with maximum coords
                    
                    # TODO: (optionally)
-                   # - calculate geometric properties: 
-                   # eg, use sphericity to keep single polygon with high sphericity values
+                   # - calculate geometric properties, eg circularity:
+                   # - keep single polygon with high circularity values (likely a cell?)
                    
                    for (i in segs.art.index %>% seq) { 
                      dims <- lapply(segs[[segs.art.index[i]]] %>% seq(), 
@@ -2833,51 +2901,46 @@ ReadVizgen <- function(
                      maxs.segs <- which(unlist(dims) == max(unlist(dims)))
                      segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][maxs.segs]
                    }
-                 } else { message("All cells have single polygon boundary (no artifacts)") }                 
+                 } else { if (verbose) { message("All cells have single polygon boundary (no artifacts)") } }                 
                  
                  # some cells might have > 1 polygon boundary with identical lenght
                  #..in this case, keep only the 1st polygon boundary
                  test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()                
                  if (any(test.segs > 1)) {
                    segs.art.index <- which(test.segs > 1)
-                   message("Additionally found ", segs.art.index %>% length,
-                           c(" cells with > 1 polygons (identical length):",
-                             ">>> only the 1st polygon boundary will be kept") %>% 
-                             paste0(., "\n"))
+                   if (verbose) { message("Additionally found ", segs.art.index %>% length,
+                                          c(" cells with > 1 polygons (identical length):",
+                                            ">>> only the 1st polygon boundary will be kept") %>% 
+                                            paste0(., "\n")) }
                    for (i in segs.art.index %>% seq) { 
                      # TODO: (optionally)
-                     # - calculate geometric properties: 
-                     # here too, use sphericity to keep single polygon with high sphericity values
+                     # - calculate geometric properties, eg circularity:
+                     # - keep single polygon with high circularity values (likely a cell?)
+                     
                      segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][1]
                    }
                  }              
                  
                  ## Extract cell boundaries per cells
-                 # TODO: (optionally) resample & make cell boundaries equidistant!  
-
+                 # TODO: (optionally) resample & make cell boundaries equidistant? 
+                 # TODO: (optionally) optimize with purrr::map to get df per list instance?
                  if (use.BiocParallel) {
                    gc() %>% invisible() # free up memory
-                   message("Extracting cell segmentations - using `BiocParallel`")  
+                   if (verbose) { message("Extracting cell segmentations - using `BiocParallel`") } 
                    segs_list <-
                      BiocParallel::bplapply(segs %>% seq,
                                             function(i) {
                                               segs[[i]][[1]] %>% 
                                                 data.table::as.data.table(.) %>%
-                                                #as.data.frame %>%
                                                 mutate(cell = names(segs)[i]) },
-                                            BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, 
-                                                                                   tasks = 50L, 
-                                                                                   force.GC = FALSE, 
-                                                                                   progressbar = TRUE)
-                     )
+                                            BPPARAM = BPParam)
                  } else {
-                   message("Extracting cell segmentations - using `future`")    
+                   if (verbose) { message("Extracting cell segmentations - using `future`") }    
                    segs_list <-
                      future.apply::future_lapply(segs %>% seq,
                                                  function(i) {
                                                    segs[[i]] %>%
                                                      data.table::as.data.table(.) %>%
-                                                     #as.data.frame %>% 
                                                      mutate(cell = names(segs)[i])
                                                  }
                      )
@@ -2887,14 +2950,17 @@ ReadVizgen <- function(
                  segs <- 
                    data.table::rbindlist(segs_list) %>% 
                    data.table::setnames(c("x", "y", "cell"))
-                 message("All cell segmentations are loaded..")        
+                 #names(segs)[1:2] <- c("x", "y")
+                 if (verbose) { message("All cell segmentations are loaded..") }      
                  segs
-               } else { # use non-Cellpose segmentations
+               } else { 
+                 # else use ".hdf5" files from ./cell_boundaries (older version)
                  ppoly <- progressor(steps = length(x = unique(x = sp$fov)))
                  ppoly(message = "Creating polygon coordinates", class = "sticky",
                        amount = 0)
-                 # use parallel or future
-                 if (use.BiocParallel) { 
+                 # use `BiocParallel` or `future`
+                 if (use.BiocParallel) {
+                   if (verbose) { message("Reading '.hdf5' files..") }
                    pg <- BiocParallel::bplapply(X = unique(x = sp$fov), FUN = function(f, ...) {
                      fname <- file.path(h5dir, paste0("feature_data_", f, ".hdf5"))
                      if (!file.exists(fname)) {
@@ -2908,7 +2974,7 @@ ReadVizgen <- function(
                      on.exit(expr = hfile$close_all())
                      cells <- rownames(x = subset(x = sp, subset = fov == f))     
                      # creating df for cell boundaries     
-                     df <- BiocParallel::bplapply(X = cells, FUN = function(x) {
+                     df <- lapply(X = cells, FUN = function(x) {
                        return(tryCatch(expr = {
                          cc <- hfile[["featuredata"]][[x]][[zidx]][["p_0"]][["coordinates"]]$read()
                          cc <- as.data.frame(x = t(x = cc))
@@ -2918,14 +2984,11 @@ ReadVizgen <- function(
                        }, error = function(...) {
                          return(NULL)
                        }))
-                     }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
-                                                               force.GC = FALSE, 
-                                                               progressbar = TRUE))   
+                     })   
                      ppoly()
-                     return(do.call(what = "rbind", args = df))
-                   }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
-                                                             force.GC = FALSE, 
-                                                             progressbar = TRUE))
+                     #return(do.call(what = "rbind", args = df))
+                     return(data.table::rbindlist(df))
+                   }, BPPARAM = BPParam)
                  } else { 
                    pg <- 
                      future.apply::future_lapply(X = unique(x = sp$fov), 
@@ -2973,25 +3036,31 @@ ReadVizgen <- function(
                     amount = 0)
                # use parallel or future
                if (use.BiocParallel) {
-                 message("Creating box coordinates..")
+                 if (verbose) { message("Creating box coordinates..") }
                  bx <- BiocParallel::bplapply(X = rownames(x = sp), FUN = function(cell) {
                    row <- sp[cell, ]
-                   df <- expand.grid(x = c(row$min_x, row$max_x),
-                                     y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
-                                     stringsAsFactors = FALSE)
-                   df <- df[c(1, 3, 4, 2), , drop = FALSE]
+                   # faster version for grid construction
+                   df <- data.table::CJ(x = c(row$min_x, row$max_x),
+                                        y = c(row$min_y, row$max_y), 
+                                        cell = cell) %>% 
+                     slice(c(1, 3, 4, 2))
+                   #df <- expand.grid(x = c(row$min_x, row$max_x),
+                   #                 y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
+                   #                stringsAsFactors = FALSE)
+                   #df <- df[c(1, 3, 4, 2), , drop = FALSE]
                    pbox()
                    return(df)
-                 }, BPPARAM = BiocParallel::MulticoreParam(workers.MulticoreParam, tasks = 50L, 
-                                                           force.GC = FALSE, progressbar = TRUE)
-                 )
+                 }, BPPARAM = BPParam)
                } else {
                  bx <- future.apply::future_lapply(X = rownames(x = sp), FUN = function(cell) {
                    row <- sp[cell, ]
-                   df <- expand.grid(x = c(row$min_x, row$max_x),
-                                     y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
-                                     stringsAsFactors = FALSE)
-                   df <- df[c(1, 3, 4, 2), , drop = FALSE]
+                   df <- data.table::CJ(x = c(row$min_x, row$max_x),
+                                        y = c(row$min_y, row$max_y), cell = cell) %>%
+                     slice(c(1, 3, 4, 2))
+                   #df <- expand.grid(x = c(row$min_x, row$max_x),
+                   #y = c(row$min_y, row$max_y), cell = cell, KEEP.OUT.ATTRS = FALSE,
+                   #stringsAsFactors = FALSE)
+                   #df <- df[c(1, 3, 4, 2), , drop = FALSE]
                    pbox()
                    return(df)
                  })

From 8f7153e69c9c558f1b2010819f7ea768dd9400b6 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Tue, 6 Jun 2023 11:31:43 +0200
Subject: [PATCH 13/24] fix for `LoadVizgen()`

..supports args:
`mol.type`, `filter`, `z`
added `verbose` for optional verbosity
---
 R/convenience.R | 92 +++++++++++++++++++++++++++++++------------------
 1 file changed, 59 insertions(+), 33 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 3e99f0dbd..fb6f33eea 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -129,8 +129,10 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 
 #' @return \code{LoadVizgen}: A \code{\link[SeuratObject]{Seurat}} object
 #'
+#' @param fov Name to store FOV as
+#' @param assay Name to store expression matrix as
 #' @param add.zIndex If to add \code{z} slice index to a cell
-#' @param update.object If to update final object, default to TRUE.
+#' @param update.object If to update final object, default to TRUE
 #' @param ... Arguments passed to \code{ReadVizgen}
 #'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
@@ -141,13 +143,27 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 #'
 #' @rdname ReadVizgen
 #'
-LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
-                       add.zIndex = TRUE, update.object = TRUE,
-                       ...) {
-  data <- ReadVizgen(data.dir = data.dir, ...)
+LoadVizgen <- function(
+    data.dir, 
+    fov = 'vz', 
+    assay = 'Vizgen',
+    mol.type = 'microns',
+    filter = '^Blank-',
+    z = 3L,
+    add.zIndex = TRUE, 
+    update.object = TRUE,
+    verbose,
+    ...)
+{
+  # reading data..
+  data <- ReadVizgen(data.dir = data.dir,
+                     mol.type = mol.type,
+                     filter = filter,
+                     z = z,
+                     verbose = verbose,
+                     ...)
   
-  #gc() %>% invisible()
-  message("Creating Seurat object..")  
+  if (verbose) { message("Creating Seurat object..") }  
   obj <- CreateSeuratObject(counts = data[["transcripts"]], assay = assay)
   
   # in case no segmentation is present, use boxes
@@ -157,46 +173,53 @@ LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
       cents <- CreateCentroids(data[["centroids"]])
       bound.boxes.data <- list(centroids = cents, 
                                boxes = bound.boxes)
-      message("Creating FOVs..", "\n",
-              ">>> using box coordinates instead of segmentations")
+      if (verbose) { 
+        message("Creating FOVs..", "\n",
+                ">>> using box coordinates instead of segmentations") 
+      }
       coords <- CreateFOV(coords = bound.boxes.data, 
-                          type = c("boxes", "centroids"), 
+                          type = c("boxes", "centroids"),
                           molecules = data[[mol.type]], 
                           assay = assay)
-    } else { # in case no segmentation & no boxes are present, use centroids only
+    } else { 
+      # in case no segmentation & no boxes are present, use centroids only
       cents <- CreateCentroids(data[["centroids"]])
-      message("Creating FOVs..", "\n",
-              ">>> using only centroids")
+      if (verbose) { 
+        message("Creating FOVs..", "\n",
+                ">>> using only centroids") 
+      }
       coords <- CreateFOV(coords = list(centroids = cents), 
                           type = c("centroids"),
                           molecules = data[[mol.type]], 
                           assay = assay)
+      coords <- subset(x = coords, 
+                       cells = intersect(x = Cells(x = coords[["centroids"]]),
+                                         y = Cells(x = obj))) 
     }
-    # only consider the cells we have counts and a segmentation for
-    # Cells which don't have a segmentation are probably found in other z slices.
-    coords <- subset(x = coords, 
-                     cells = intersect(x = Cells(x = coords[["boxes"]]),
-                                       y = Cells(x = obj)))
-  } else {
+  } else if ("segmentations" %in% names(data)) {
     segs <- CreateSegmentation(data[["segmentations"]])
     cents <- CreateCentroids(data[["centroids"]])
     segmentations.data <- list(centroids = cents, segmentation = segs)
-    message("Creating FOVs..", "\n", 
-            ">>> using segmentations")    
+    if (verbose) { 
+      message("Creating FOVs..", "\n", 
+              ">>> using segmentations") 
+    }
     coords <- CreateFOV(coords = segmentations.data, 
                         type = c("segmentation", "centroids"), 
                         molecules = data[[mol.type]], 
                         assay = assay)
+    # only consider the cells we have counts and a segmentation.
+    # Cells which don't have a segmentation are probably found in other z slices.
     coords <- subset(x = coords,
                      cells = intersect(x = Cells(x = coords[["segmentation"]]),
-                                       y = Cells(x = obj)))    
+                                       y = Cells(x = obj)))
   }
   
   # add z-stack index for cells
   if (add.zIndex) { obj$z <- data$zIndex %>% pull(z) }
   
   # add metadata vars
-  message(">>> adding metadata infos")  
+  if (verbose) { message(">>> adding metadata infos") }
   if (c("metadata" %in% names(data))) {
     metadata <- match.arg(arg = "metadata", choices = names(data), several.ok = TRUE)
     meta.vars <- names(data[[metadata]])
@@ -207,28 +230,31 @@ LoadVizgen <- function(data.dir, fov = "vz", assay = 'Vizgen',
   }
   
   # sanity on fov name
-  fov %<>% gsub("_|-", ".", .) 
+  fov %<>% gsub("_|-", ".", .)
   
-  message(">>> adding FOV")
+  if (verbose) { message(">>> adding FOV") }
   obj[[fov]] <- coords
   
   ## filter - keep cells with counts > 0
-  # small helper function to return metadata
+  # helper function to return metadata
   callmeta <- function (object = NULL) { return(object@meta.data) }
   nCount <- grep("nCount", callmeta(obj) %>% names, value = TRUE)
-  if (any(obj[[nCount]] > 0)) {
-    message(">>> filtering object - keep cells with counts > 0")
+  if (any(obj[[nCount]] == 0)) {
+    if (verbose) { message(">>> filtering object - keeping cells with counts > 0") }
     obj %<>% subset(subset = !!base::as.symbol(nCount) > 0)
-  }
+  } else { if (verbose) { message(">>> all counts are > 0") } }
   
   if (update.object) { 
-    message("Updating object..")
-    obj %<>% UpdateSeuratObject() } 
+    if (verbose) { message("Updating object:") 
+      obj %<>% UpdateSeuratObject()
+    } else { 
+      obj %<>% 
+        UpdateSeuratObject() %>% 
+        suppressMessages() } }
   
-  message("Object is ready!")    
+  if (verbose) { message("Object is ready!") } 
   return(obj)
   
-  gc() %>% invisible()
 }
 
 #' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object

From 799323da6ddd18fed2aa548532992eebb4032825 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 26 Jul 2023 09:50:02 +0200
Subject: [PATCH 14/24] update `ReadVizgen()`

..resolve some small conflict for
`Read10X_Image()`
---
 R/preprocessing.R | 51 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 14 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 9bd1a8ef1..9b47aeb8c 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -519,10 +519,8 @@ Load10X_Spatial <- function(
   }
   object <- CreateSeuratObject(counts = data, assay = assay)
   if (is.null(x = image)) {
-    image <- Read10X_Image(
-      image.dir = file.path(data.dir, 'spatial'),
-      filter.matrix = filter.matrix
-    )
+    image <- Read10X_Image(image.dir = file.path(data.dir,"spatial"),
+                           filter.matrix = filter.matrix)
   } else {
     if (!inherits(x = image, what = "VisiumV1"))
       stop("Image must be an object of class 'VisiumV1'.")
@@ -2464,7 +2462,7 @@ ReadVizgen <- function(
   
   # packages that needs to be installed a priori
   pkgs <- c("data.table", "arrow", "sfarrow",
-            "tidyverse", "furrr", "BiocParallel")
+            "tidyverse", "furrr", "BiocParallel", "Matrix")
   lapply(pkgs %>% length %>% seq, function(i) 
   { !requireNamespace(pkgs[i], quietly = TRUE) } ) %>% 
     unlist %>% 
@@ -2524,7 +2522,7 @@ ReadVizgen <- function(
   if (!is.null(x = metadata)) {
     metadata <- match.arg(
       arg = metadata,
-      choices = c("volume", "fov"),
+      choices = c('volume', 'fov'),
       several.ok = TRUE
     )
   }
@@ -2565,10 +2563,11 @@ ReadVizgen <- function(
   if (use.parquet) {
     if (verbose) { message("Cell segmentations are found in `.parquet` file", "\n", 
                            ">>> using ", gsub("s", "", mol.type), " space coordinates") }}
+  
   # Identify input files..
   # if no files are found in the current directory..
   #..look for them in the sub directory
-  files <- c(transcripts = transcripts %||% 
+  files <- c(transcripts = NULL %||% 
                list.files(data.dir, 
                           pattern = "cell_by_gene",
                           full.names = TRUE) %>%
@@ -2577,9 +2576,15 @@ ReadVizgen <- function(
                             pattern = "cell_by_gene",
                             full.names = TRUE,
                             recursive = TRUE)
+               } else { (.) }} %>% 
+               { if (length(.) > 1) { 
+                 stop("There are > 1 `cell_by_gene` files",
+                      "\n", "make sure only 1 file is read")
+               } else if (!length(.)) {
+                 stop("No `cell_by_gene` file is available")
                } else { (.) }},
              
-             spatial = spatial %||% 
+             spatial = NULL %||% 
                list.files(data.dir, 
                           pattern = "cell_metadata",
                           full.names = TRUE) %>%
@@ -2588,9 +2593,15 @@ ReadVizgen <- function(
                             pattern = "cell_metadata",
                             full.names = TRUE,
                             recursive = TRUE)
+               } else { (.) }} %>% 
+               { if (length(.) > 1) { 
+                 stop("There are > 1 `cell_metadata` files",
+                      "\n", "make sure only 1 file is read")
+               } else if (!length(.)) {
+                 stop("No `cell_metadata` file is available")
                } else { (.) }},
              
-             molecules = molecules %||% 
+             molecules = NULL %||% 
                list.files(data.dir, 
                           pattern = "detected_transcripts",
                           full.names = TRUE) %>%
@@ -2599,6 +2610,12 @@ ReadVizgen <- function(
                             pattern = "detected_transcripts",
                             full.names = TRUE,
                             recursive = TRUE)
+               } else { (.) }} %>%
+               { if(length(.) > 1) { 
+                 stop("There are > 1 `detected_transcripts` files", 
+                      "\n", "make sure only 1 file is read")
+               } else if (!length(.)) {
+                 stop("No `detected_transcripts` file is available")
                } else { (.) }}
   )
   
@@ -2668,7 +2685,7 @@ ReadVizgen <- function(
         )
         FALSE
       } else if (!dir.exists(paths = h5dir) && !use.parquet) {
-        warning("Cannot find cell boundary H5 files", immediate. = TRUE)
+        warning("Cannot find cell boundary H5 or `.parquet` file(s)", immediate. = TRUE)
         FALSE
       } else if (use.parquet) { # for non .hdf5 files
         if (length(parq)) {
@@ -2678,11 +2695,12 @@ ReadVizgen <- function(
       else {
         TRUE
       }
-      if (isFALSE(x = poly)) {
+      if (isFALSE(x = poly) && isFALSE(use.parquet)) {
         type <- setdiff(x = type, y = 'segmentations')
       }
     }
-    spatials <- rep_len(x = files[['spatial']], length.out = length(x = type))
+    spatials <- rep_len(x = files[['spatial']], 
+                        length.out = length(x = type))
     names(x = spatials) <- type
     files <- c(files, spatials)
     files <- files[setdiff(x = names(x = files), y = 'spatial')]
@@ -2701,6 +2719,8 @@ ReadVizgen <- function(
       class = 'sticky',
       amount = 0
     )
+   
+    # load molecules
     mx <- data.table::fread(
       file = files[['molecules']],
       sep = ',',
@@ -2751,7 +2771,10 @@ ReadVizgen <- function(
                      # match cells to filtered data from spatial (cell_metadata)
                      filter(., rownames(.) %in% rownames(sp)) 
                    } else { (.) } } %>% # return filtered count data
-                 t() %>% as.sparse()
+                 # transpose data.frame without converting to dense matrix
+                 #t() %>%
+                 as.sparse() %>% # convert to sparse matrix
+                 Matrix::t() # transpose sparse matrix
                
                # filter cell metadata df
                # match filtered cell IDs from count matrix to cell metadata df
@@ -2798,7 +2821,7 @@ ReadVizgen <- function(
                        # for `cellpose_micron_space.parquet`
                        grep(gsub("s", "", mol.type), ., value = TRUE)
                      }
-                     } %>%
+                   } %>%
                      { if (is.empty(.)) {
                        # only if single ".parquet" file present
                        # eg, `cell_boundaries.parquet`

From 9ea4458e338f961fc7875aa6bba891d2f85ff894 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 26 Jul 2023 09:54:37 +0200
Subject: [PATCH 15/24] update `LoadVizgen()`

..adding of molecules to the object is optional `add.molecules = TRUE`
---
 R/convenience.R | 65 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index fb6f33eea..5e9b0cf74 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -133,6 +133,8 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
 #' @param assay Name to store expression matrix as
 #' @param add.zIndex If to add \code{z} slice index to a cell
 #' @param update.object If to update final object, default to TRUE
+#' @param add.molecules If to add \code{molecules} coordinates to FOV of the object, 
+#'  default to TRUE
 #' @param ... Arguments passed to \code{ReadVizgen}
 #'
 #' @importFrom SeuratObject Cells CreateCentroids CreateFOV
@@ -152,6 +154,7 @@ LoadVizgen <- function(
     z = 3L,
     add.zIndex = TRUE, 
     update.object = TRUE,
+    add.molecules = TRUE,
     verbose,
     ...)
 {
@@ -174,27 +177,40 @@ LoadVizgen <- function(
       bound.boxes.data <- list(centroids = cents, 
                                boxes = bound.boxes)
       if (verbose) { 
-        message("Creating FOVs..", "\n",
+        message("Creating FOVs..", "\n", 
+                if (!add.molecules) { ">>> `molecules` coordidates will be skipped" }, 
+                "\n",
                 ">>> using box coordinates instead of segmentations") 
       }
-      coords <- CreateFOV(coords = bound.boxes.data, 
-                          type = c("boxes", "centroids"),
-                          molecules = data[[mol.type]], 
-                          assay = assay)
+      coords <- 
+        CreateFOV(coords = bound.boxes.data,
+                  type = c("boxes", "centroids"),
+                  molecules = 
+                    if (add.molecules) {
+                      data[[mol.type]] } else { NULL }, 
+                  assay = assay) %>%
+        subset(x = .,
+               cells = intersect(x = Cells(x = .[["boxes"]]),
+                                 y = Cells(x = obj)))
     } else { 
       # in case no segmentation & no boxes are present, use centroids only
       cents <- CreateCentroids(data[["centroids"]])
       if (verbose) { 
-        message("Creating FOVs..", "\n",
+        message("Creating FOVs..", "\n", 
+                if (!add.molecules) { ">>> `molecules` coordidates will be skipped" }, 
+                "\n", 
                 ">>> using only centroids") 
       }
-      coords <- CreateFOV(coords = list(centroids = cents), 
-                          type = c("centroids"),
-                          molecules = data[[mol.type]], 
-                          assay = assay)
-      coords <- subset(x = coords, 
-                       cells = intersect(x = Cells(x = coords[["centroids"]]),
-                                         y = Cells(x = obj))) 
+      coords <-
+        CreateFOV(coords = list(centroids = cents),
+                  type = c("centroids"),
+                  molecules = 
+                    if (add.molecules) {
+                      data[[mol.type]] } else { NULL }, 
+                  assay = assay) %>%
+        subset(x = ., 
+               cells = intersect(x = Cells(x = .[["centroids"]]),
+                                 y = Cells(x = obj)))
     }
   } else if ("segmentations" %in% names(data)) {
     segs <- CreateSegmentation(data[["segmentations"]])
@@ -202,17 +218,22 @@ LoadVizgen <- function(
     segmentations.data <- list(centroids = cents, segmentation = segs)
     if (verbose) { 
       message("Creating FOVs..", "\n", 
+              if (!add.molecules) { ">>> `molecules` coordidates will be skipped" }, 
+              "\n", 
               ">>> using segmentations") 
     }
-    coords <- CreateFOV(coords = segmentations.data, 
-                        type = c("segmentation", "centroids"), 
-                        molecules = data[[mol.type]], 
-                        assay = assay)
-    # only consider the cells we have counts and a segmentation.
-    # Cells which don't have a segmentation are probably found in other z slices.
-    coords <- subset(x = coords,
-                     cells = intersect(x = Cells(x = coords[["segmentation"]]),
-                                       y = Cells(x = obj)))
+    coords <-
+      CreateFOV(coords = segmentations.data, 
+                type = c("segmentation", "centroids"), 
+                molecules = 
+                  if (add.molecules) {
+                    data[[mol.type]] } else { NULL }, 
+                assay = assay) %>%
+      # only consider the cells we have counts and a segmentation.
+      # Cells which don't have a segmentation are probably found in other z slices.
+      subset(x = .,
+             cells = intersect(x = Cells(x = .[["segmentation"]]),
+                               y = Cells(x = obj)))
   }
   
   # add z-stack index for cells

From 38fad2a9bc58981f7c7e25382a0f656ce2c8bfea Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 26 Jul 2023 09:57:33 +0200
Subject: [PATCH 16/24] resolving some conflicts in preprocessing.R

---
 R/preprocessing.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 9b47aeb8c..b6644c838 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -521,7 +521,8 @@ Load10X_Spatial <- function(
   if (is.null(x = image)) {
     image <- Read10X_Image(image.dir = file.path(data.dir,"spatial"),
                            filter.matrix = filter.matrix)
-  } else {
+  }
+  else {
     if (!inherits(x = image, what = "VisiumV1"))
       stop("Image must be an object of class 'VisiumV1'.")
   }

From bf15b6e7a1fa25867f568193f8627efd8b48bc37 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Wed, 26 Jul 2023 10:08:44 +0200
Subject: [PATCH 17/24] ..update preprocessing.R from `develop`

---
 R/preprocessing.R | 756 +++++++++++++++++++++++++++-------------------
 1 file changed, 447 insertions(+), 309 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index b6644c838..c53f20e43 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -56,11 +56,11 @@ globalVariables(
 #' CalculateBarcodeInflections(pbmc_small, group.column = 'groups')
 #'
 CalculateBarcodeInflections <- function(
-    object,
-    barcode.column = "nCount_RNA",
-    group.column = "orig.ident",
-    threshold.low = NULL,
-    threshold.high = NULL
+  object,
+  barcode.column = "nCount_RNA",
+  group.column = "orig.ident",
+  threshold.low = NULL,
+  threshold.high = NULL
 ) {
   ## Check that barcode.column exists in meta.data
   if (!(barcode.column %in% colnames(x = object[[]]))) {
@@ -201,15 +201,15 @@ CalculateBarcodeInflections <- function(
 #' }
 #'
 HTODemux <- function(
-    object,
-    assay = "HTO",
-    positive.quantile = 0.99,
-    init = NULL,
-    nstarts = 100,
-    kfunc = "clara",
-    nsamples = 100,
-    seed = 42,
-    verbose = TRUE
+  object,
+  assay = "HTO",
+  positive.quantile = 0.99,
+  init = NULL,
+  nstarts = 100,
+  kfunc = "clara",
+  nsamples = 100,
+  seed = 42,
+  verbose = TRUE
 ) {
   if (!is.null(x = seed)) {
     set.seed(seed = seed)
@@ -368,14 +368,14 @@ HTODemux <- function(
 #' pbmc_small <- GetResidual(object = pbmc_small, features = c('MS4A1', 'TCL1A'))
 #'
 GetResidual <- function(
-    object,
-    features,
-    assay = NULL,
-    umi.assay = NULL,
-    clip.range = NULL,
-    replace.value = FALSE,
-    na.rm = TRUE,
-    verbose = TRUE
+  object,
+  features,
+  assay = NULL,
+  umi.assay = NULL,
+  clip.range = NULL,
+  replace.value = FALSE,
+  na.rm = TRUE,
+  verbose = TRUE
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   if (IsSCT(assay = object[[assay]])) {
@@ -500,24 +500,39 @@ GetResidual <- function(
 #' }
 #'
 Load10X_Spatial <- function(
-    data.dir,
-    filename = 'filtered_feature_bc_matrix.h5',
-    assay = 'Spatial',
-    slice = 'slice1',
-    filter.matrix = TRUE,
-    to.upper = FALSE,
-    image = NULL,
-    ...
+  data.dir,
+  filename = 'filtered_feature_bc_matrix.h5',
+  assay = 'Spatial',
+  slice = 'slice1',
+  filter.matrix = TRUE,
+  to.upper = FALSE,
+  image = NULL,
+  ...
 ) {
   if (length(x = data.dir) > 1) {
-    warning("'Load10X_Spatial' accepts only one 'data.dir'", immediate. = TRUE)
+    warning("'Load10X_Spatial' accepts only one 'data.dir'",
+            immediate. = TRUE)
     data.dir <- data.dir[1]
   }
-  data <- Read10X_h5(filename = file.path(data.dir, filename), ...)
+  data <- Read10X_h5(filename = file.path(data.dir, filename),
+                     ...)
+
   if (to.upper) {
-    rownames(x = data) <- toupper(x = rownames(x = data))
+    data <- imap(data, ~{
+      rownames(.x) <- toupper(rownames(.x))
+      .x
+    })
+  }
+  if (is.list(data) & "Antibody Capture" %in% names(data)) {
+    matrix_gex <- data$`Gene Expression`
+    matrix_protein <- data$`Antibody Capture`
+    object <- CreateSeuratObject(counts = matrix_gex, assay = assay)
+    object_protein <- CreateAssayObject(counts = matrix_protein)
+    object[["Protein"]] <- object_protein
+  }
+  else {
+    object <- CreateSeuratObject(counts = data, assay = assay)
   }
-  object <- CreateSeuratObject(counts = data, assay = assay)
   if (is.null(x = image)) {
     image <- Read10X_Image(image.dir = file.path(data.dir,"spatial"),
                            filter.matrix = filter.matrix)
@@ -529,9 +544,47 @@ Load10X_Spatial <- function(
   image <- image[Cells(x = object)]
   DefaultAssay(object = image) <- assay
   object[[slice]] <- image
+
+  # if using the meta-data available for probes add to @misc slot
+  file_path <- file.path(data.dir, filename)
+  infile <- hdf5r::H5File$new(filename = file_path, mode = 'r')
+  if("matrix/features/probe_region" %in% hdf5r::list.objects(infile)) {
+    probe.metadata <- Read10X_probe_metadata(data.dir, filename)
+    Misc(object = object[['Spatial']], slot = "probe_metadata") <- probe.metadata
+  }
   return(object)
 }
 
+#' Read10x Probe Metadata
+#'
+#' This function reads the probe metadata from a 10x Genomics probe barcode matrix file in HDF5 format.
+#'
+#' @param data.dir The directory where the file is located.
+#' @param filename The name of the file containing the raw probe barcode matrix in HDF5 format. The default filename is 'raw_probe_bc_matrix.h5'.
+#'
+#' @return Returns a data.frame containing the probe metadata.
+#'
+#' @export
+Read10X_probe_metadata <- function(
+  data.dir,
+  filename = 'raw_probe_bc_matrix.h5'
+) {
+  if (!requireNamespace('hdf5r', quietly = TRUE)) {
+    stop("Please install hdf5r to read HDF5 files")
+  }
+  file.path = paste0(data.dir,"/", filename)
+  if (!file.exists(file.path)) {
+    stop("File not found")
+  }
+  infile <- hdf5r::H5File$new(filename = file.path, mode = 'r')
+  if("matrix/features/probe_region" %in% hdf5r::list.objects(infile)) {
+    probe.name <- infile[['matrix/features/name']][]
+    probe.region<- infile[['matrix/features/probe_region']][]
+    meta.data <- data.frame(probe.name, probe.region)
+    return(meta.data)
+  }
+}
+
 #' Load STARmap data
 #'
 #' @param data.dir location of data directory that contains the counts matrix,
@@ -554,13 +607,13 @@ Load10X_Spatial <- function(
 #' @concept preprocessing
 #'
 LoadSTARmap <- function(
-    data.dir,
-    counts.file = "cell_barcode_count.csv",
-    gene.file = "genes.csv",
-    qhull.file = "qhulls.tsv",
-    centroid.file = "centroids.tsv",
-    assay = "Spatial",
-    image = "image"
+  data.dir,
+  counts.file = "cell_barcode_count.csv",
+  gene.file = "genes.csv",
+  qhull.file = "qhulls.tsv",
+  centroid.file = "centroids.tsv",
+  assay = "Spatial",
+  image = "image"
 ) {
   if (!dir.exists(paths = data.dir)) {
     stop("Cannot find directory ", data.dir, call. = FALSE)
@@ -603,6 +656,91 @@ LoadSTARmap <- function(
   return(starmap)
 }
 
+#' Load Curio Seeker data
+#'
+#' @param data.dir location of data directory that contains the counts matrix,
+#' gene names, barcodes/beads, and barcodes/bead location files.
+#' @param assay Name of assay to associate spatial data to
+#'
+#' @return A \code{\link{Seurat}} object
+#'
+#' @importFrom Matrix readMM
+#'
+#' @export
+#' @concept preprocessing
+#'
+LoadCurioSeeker <- function(data.dir, assay = "Spatial") {
+  # check and find input files
+  if (length(x = data.dir) > 1) {
+    warning("'LoadCurioSeeker' accepts only one 'data.dir'",
+            immediate. = TRUE)
+    data.dir <- data.dir[1]
+  }
+  mtx.file <- list.files(
+    data.dir,
+    pattern = "*MoleculesPerMatchedBead.mtx",
+    full.names = TRUE)
+  if (length(x = mtx.file) > 1) {
+    warning("Multiple files matched the pattern '*MoleculesPerMatchedBead.mtx'",
+            immediate. = TRUE)
+  } else if (length(x = mtx.file) == 0) {
+    stop("No file matched the pattern '*MoleculesPerMatchedBead.mtx'", call. = FALSE)
+  }
+  mtx.file <- mtx.file[1]
+  barcodes.file <- list.files(
+    data.dir,
+    pattern = "*barcodes.tsv",
+    full.names = TRUE)
+  if (length(x = barcodes.file) > 1) {
+    warning("Multiple files matched the pattern '*barcodes.tsv'",
+            immediate. = TRUE)
+  } else if (length(x = barcodes.file) == 0) {
+    stop("No file matched the pattern '*barcodes.tsv'", call. = FALSE)
+  }
+  barcodes.file <- barcodes.file[1]
+  genes.file <- list.files(
+    data.dir,
+    pattern = "*genes.tsv",
+    full.names = TRUE)
+  if (length(x = genes.file) > 1) {
+    warning("Multiple files matched the pattern '*genes.tsv'",
+            immediate. = TRUE)
+  } else if (length(x = genes.file) == 0) {
+    stop("No file matched the pattern '*genes.tsv'", call. = FALSE)
+  }
+  genes.file <- genes.file[1]
+  coordinates.file <- list.files(
+    data.dir,
+    pattern = "*MatchedBeadLocation.csv",
+    full.names = TRUE)
+  if (length(x = coordinates.file) > 1) {
+    warning("Multiple files matched the pattern '*MatchedBeadLocation.csv'",
+            immediate. = TRUE)
+  } else if (length(x = coordinates.file) == 0) {
+    stop("No file matched the pattern '*MatchedBeadLocation.csv'", call. = FALSE)
+  }
+  coordinates.file <- coordinates.file[1]
+
+  # load counts matrix and create seurat object
+  mtx <- readMM(mtx.file)
+  mtx <- as.sparse(mtx)
+  barcodes <- read.csv(barcodes.file, header = FALSE)
+  genes <- read.csv(genes.file, header = FALSE)
+  colnames(mtx) <- barcodes$V1
+  rownames(mtx) <- genes$V1
+  object <- CreateSeuratObject(counts = mtx, assay = assay)
+
+  # load positions of each bead and store in a SlideSeq object in images slot
+  coords <- read.csv(coordinates.file)
+  colnames(coords) <- c("cell", "x", "y")
+  coords$y <- -coords$y
+  rownames(coords) <- coords$cell
+  coords$cell <- NULL
+  image <- new(Class = 'SlideSeq', assay = assay, coordinates = coords)
+  object[["Slice"]] <- image
+  return(object)
+}
+
 #' Normalize raw data
 #'
 #' Normalize count data per cell and transform to log scale
@@ -666,13 +804,13 @@ LogNormalize <- function(data, scale.factor = 1e4, verbose = TRUE) {
 #' }
 #'
 MULTIseqDemux <- function(
-    object,
-    assay = "HTO",
-    quantile = 0.7,
-    autoThresh = FALSE,
-    maxiter = 5,
-    qrange = seq(from = 0.1, to = 0.9, by = 0.05),
-    verbose = TRUE
+  object,
+  assay = "HTO",
+  quantile = 0.7,
+  autoThresh = FALSE,
+  maxiter = 5,
+  qrange = seq(from = 0.1, to = 0.9, by = 0.05),
+  verbose = TRUE
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   multi_data_norm <- t(x = GetAssayData(
@@ -784,11 +922,11 @@ MULTIseqDemux <- function(
 #' }
 #'
 Read10X <- function(
-    data.dir,
-    gene.column = 2,
-    cell.column = 1,
-    unique.features = TRUE,
-    strip.suffix = FALSE
+  data.dir,
+  gene.column = 2,
+  cell.column = 1,
+  unique.features = TRUE,
+  strip.suffix = FALSE
 ) {
   full.data <- list()
   has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
@@ -825,7 +963,7 @@ Read10X <- function(
     } else {
       cell.barcodes <- read.table(file = barcode.loc, header = FALSE, sep = '\t', row.names = NULL)
     }
-    
+
     if (ncol(x = cell.barcodes) > 1) {
       cell.names <- cell.barcodes[, cell.column]
     } else {
@@ -848,7 +986,7 @@ Read10X <- function(
     } else {
       colnames(x = data) <- paste0(names(x = data.dir)[i], "_", cell.names)
     }
-    
+
     if (has_dt) {
       feature.names <- as.data.frame(data.table::fread(ifelse(test = pre_ver_3, yes = gene.loc, no = features.loc), header = FALSE))
     } else {
@@ -858,7 +996,7 @@ Read10X <- function(
         stringsAsFactors = FALSE
       )
     }
-    
+
     if (any(is.na(x = feature.names[, gene.column]))) {
       warning(
         'Some features names are NA. Replacing NA names with ID from the opposite column requested',
@@ -1105,10 +1243,10 @@ Read10X_Image <- function(image.dir, image.name = "tissue_lowres_image.png", fil
 #' @template note-reqdpkg
 #'
 ReadAkoya <- function(
-    filename,
-    type = c('inform', 'processor', 'qupath'),
-    filter = 'DAPI|Blank|Empty',
-    inform.quant = c('mean', 'total', 'min', 'max', 'std')
+  filename,
+  type = c('inform', 'processor', 'qupath'),
+  filter = 'DAPI|Blank|Empty',
+  inform.quant = c('mean', 'total', 'min', 'max', 'std')
 ) {
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
@@ -1426,18 +1564,18 @@ ReadAkoya <- function(
 #' }
 #'
 ReadMtx <- function(
-    mtx,
-    cells,
-    features,
-    cell.column = 1,
-    feature.column = 2,
-    cell.sep = "\t",
-    feature.sep = "\t",
-    skip.cell = 0,
-    skip.feature = 0,
-    mtx.transpose = FALSE,
-    unique.features = TRUE,
-    strip.suffix = FALSE
+  mtx,
+  cells,
+  features,
+  cell.column = 1,
+  feature.column = 2,
+  cell.sep = "\t",
+  feature.sep = "\t",
+  skip.cell = 0,
+  skip.feature = 0,
+  mtx.transpose = FALSE,
+  unique.features = TRUE,
+  strip.suffix = FALSE
 ) {
   all.files <- list(
     "expression matrix" = mtx,
@@ -1536,7 +1674,7 @@ ReadMtx <- function(
         feature.column,
         ". Try specifiying a different column.",
         call. = FALSE
-      )
+        )
     } else {
       warning(
         "Some features names are NA in column ",
@@ -1545,7 +1683,7 @@ ReadMtx <- function(
         replacement.column,
         ".",
         call. = FALSE
-      )
+        )
     }
     feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column]
   }
@@ -1569,7 +1707,7 @@ ReadMtx <- function(
         no = ""
       ),
       call. = FALSE
-    )
+      )
   }
   if (length(x = feature.names) != nrow(x = data)) {
     stop(
@@ -1583,9 +1721,9 @@ ReadMtx <- function(
         no = ""
       ),
       call. = FALSE
-    )
+      )
   }
-  
+
   colnames(x = data) <- cell.names
   rownames(x = data) <- feature.names
   data <- as.sparse(x = data)
@@ -1669,24 +1807,24 @@ ReadMtx <- function(
 #' @template note-reqdpkg
 #'
 ReadNanostring <- function(
-    data.dir,
-    mtx.file = NULL,
-    metadata.file = NULL,
-    molecules.file = NULL,
-    segmentations.file = NULL,
-    type = 'centroids',
-    mol.type = 'pixels',
-    metadata = NULL,
-    mols.filter = NA_character_,
-    genes.filter = NA_character_,
-    fov.filter = NULL,
-    subset.counts.matrix = NULL,
-    cell.mols.only = TRUE
+  data.dir,
+  mtx.file = NULL,
+  metadata.file = NULL,
+  molecules.file = NULL,
+  segmentations.file = NULL,
+  type = 'centroids',
+  mol.type = 'pixels',
+  metadata = NULL,
+  mols.filter = NA_character_,
+  genes.filter = NA_character_,
+  fov.filter = NULL,
+  subset.counts.matrix = NULL,
+  cell.mols.only = TRUE
 ) {
   if (!requireNamespace("data.table", quietly = TRUE)) {
     stop("Please install 'data.table' for this function")
   }
-  
+
   # Argument checking
   type <- match.arg(
     arg = type,
@@ -1709,7 +1847,7 @@ ReadNanostring <- function(
       several.ok = TRUE
     )
   }
-  
+
   use.dir <- all(vapply(
     X = c(mtx.file, metadata.file, molecules.file),
     FUN = function(x) {
@@ -1717,7 +1855,7 @@ ReadNanostring <- function(
     },
     FUN.VALUE = logical(length = 1L)
   ))
-  
+
   if (use.dir && !dir.exists(paths = data.dir)) {
     stop("Cannot find Nanostring directory ", data.dir)
   }
@@ -1728,7 +1866,7 @@ ReadNanostring <- function(
     molecules.file = molecules.file %||% '[_a-zA-Z0-9]*_tx_file.csv',
     segmentations.file = segmentations.file %||% '[_a-zA-Z0-9]*-polygons.csv'
   )
-  
+
   files <- vapply(
     X = files,
     FUN = function(x) {
@@ -1749,7 +1887,7 @@ ReadNanostring <- function(
     USE.NAMES = TRUE
   )
   files[!file.exists(files)] <- NA_character_
-  
+
   if (all(is.na(x = files))) {
     stop("Cannot find Nanostring input files in ", data.dir)
   }
@@ -1767,7 +1905,7 @@ ReadNanostring <- function(
       data.table = FALSE,
       verbose = FALSE
     )
-    
+
     # filter metadata file by FOVs
     if (!is.null(x = fov.filter)) {
       md <- md[md$fov %in% fov.filter,]
@@ -1787,7 +1925,7 @@ ReadNanostring <- function(
       data.table = FALSE,
       verbose = FALSE
     )
-    
+
     # filter metadata file by FOVs
     if (!is.null(x = fov.filter)) {
       segs <- segs[segs$fov %in% fov.filter,]
@@ -1807,17 +1945,17 @@ ReadNanostring <- function(
       sep = ',',
       verbose = FALSE
     )
-    
+
     # filter molecules file by FOVs
     if (!is.null(x = fov.filter)) {
       mx <- mx[mx$fov %in% fov.filter,]
     }
-    
+
     # Molecules outside of a cell have a cell_ID of 0
     if (cell.mols.only) {
       mx <- mx[mx$cell_ID != 0,]
     }
-    
+
     if (!is.na(x = mols.filter)) {
       ppremol(
         message = paste("Filtering molecules with pattern", mols.filter),
@@ -1833,7 +1971,7 @@ ReadNanostring <- function(
     files <- files[setdiff(x = names(x = files), y = 'molecules.file')]
   }
   files <- files[!is.na(x = files)]
-  
+
   outs <- list("matrix"=NULL, "pixels"=NULL, "centroids"=NULL)
   if (!is.null(metadata)) {
     outs <- append(outs, list("metadata" = NULL))
@@ -1841,7 +1979,7 @@ ReadNanostring <- function(
   if ("segmentations" %in% type) {
     outs <- append(outs, list("segmentations" = NULL))
   }
-  
+
   for (otype in names(x = outs)) {
     outs[[otype]] <- switch(
       EXPR = otype,
@@ -1868,8 +2006,8 @@ ReadNanostring <- function(
           }
           tx <- subset(tx, select = -c(fov, cell_ID))
         }
-        
-        tx <- as.data.frame(t(x = as.matrix(x = tx[, -1, drop = FALSE])))
+
+        tx <- as.data.frame(t(x = as.matrix(x = tx)))
         if (!is.na(x = genes.filter)) {
           ptx(
             message = paste("Filtering genes with pattern", genes.filter),
@@ -1881,7 +2019,7 @@ ReadNanostring <- function(
         # only keep cells with counts greater than 0
         tx <- tx[, which(colSums(tx) != 0)]
         ratio <- getOption(x = 'Seurat.input.sparse_ratio', default = 0.4)
-        
+
         if ((sum(tx == 0) / length(x = tx)) > ratio) {
           ptx(
             message = 'Converting counts to sparse matrix',
@@ -1890,9 +2028,9 @@ ReadNanostring <- function(
           )
           tx <- as.sparse(x = tx)
         }
-        
+
         ptx(type = 'finish')
-        
+
         tx
       },
       'centroids' = {
@@ -2011,10 +2149,10 @@ ReadNanostring <- function(
 #' @concept preprocessing
 #'
 ReadXenium <- function(
-    data.dir,
-    outs = c("matrix", "microns"),
-    type = "centroids",
-    mols.qv.threshold = 20
+  data.dir,
+  outs = c("matrix", "microns"),
+  type = "centroids",
+  mols.qv.threshold = 20
 ) {
   # Argument checking
   type <- match.arg(
@@ -2022,17 +2160,17 @@ ReadXenium <- function(
     choices = c("centroids", "segmentations"),
     several.ok = TRUE
   )
-  
+
   outs <- match.arg(
     arg = outs,
     choices = c("matrix", "microns"),
     several.ok = TRUE
   )
-  
+
   outs <- c(outs, type)
-  
+
   has_dt <- requireNamespace("data.table", quietly = TRUE) && requireNamespace("R.utils", quietly = TRUE)
-  
+
   data <- sapply(outs, function(otype) {
     switch(
       EXPR = otype,
@@ -2071,7 +2209,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        
+
         # load cell boundaries
         if (has_dt) {
           cell_boundaries_df <- as.data.frame(data.table::fread(file.path(data.dir, "cell_boundaries.csv.gz")))
@@ -2089,7 +2227,7 @@ ReadXenium <- function(
           class = 'sticky',
           amount = 0
         )
-        
+
         # molecules
         if (has_dt) {
           tx_dt <- as.data.frame(data.table::fread(file.path(data.dir, "transcripts.csv.gz")))
@@ -2098,7 +2236,7 @@ ReadXenium <- function(
           transcripts <- read.csv(file.path(data.dir, "transcripts.csv.gz"))
           transcripts <- subset(transcripts, qv >= mols.qv.threshold)
         }
-        
+
         df <-
           data.frame(
             x = transcripts$x_location,
@@ -2216,11 +2354,11 @@ ReadSlideSeq <- function(coord.file, assay = 'Spatial') {
 #' }
 #'
 ReadVitessce <- function(
-    counts = NULL,
-    coords = NULL,
-    molecules = NULL,
-    type = c('segmentations', 'centroids'),
-    filter = NA_character_
+  counts = NULL,
+  coords = NULL,
+  molecules = NULL,
+  type = c('segmentations', 'centroids'),
+  filter = NA_character_
 ) {
   if (!requireNamespace('jsonlite', quietly = TRUE)) {
     stop("Please install 'jsonlite' for this function")
@@ -3179,9 +3317,9 @@ RelativeCounts <- function(data, scale.factor = 1, verbose = TRUE) {
 #' @concept preprocessing
 #'
 RunMarkVario <- function(
-    spatial.location,
-    data,
-    ...
+  spatial.location,
+  data,
+  ...
 ) {
   pp <- ppp(
     x = spatial.location[, 1],
@@ -3297,10 +3435,10 @@ RunMoransI <- function(data, pos, verbose = TRUE) {
 #' head(x = downsampled)
 #'
 SampleUMI <- function(
-    data,
-    max.umi = 1000,
-    upsample = FALSE,
-    verbose = FALSE
+  data,
+  max.umi = 1000,
+  upsample = FALSE,
+  verbose = FALSE
 ) {
   data <- as.sparse(x = data)
   if (length(x = max.umi) == 1) {
@@ -3327,7 +3465,7 @@ SampleUMI <- function(
 #' Use regularized negative binomial regression to normalize UMI count data
 #'
 #' This function calls sctransform::vst. The sctransform package is available at
-#' https://github.com/ChristophH/sctransform.
+#' https://github.com/satijalab/sctransform.
 #' Use this function as an alternative to the NormalizeData,
 #' FindVariableFeatures, ScaleData workflow. Results are saved in a new assay
 #' (named SCT by default) with counts being (corrected) counts, data being log1p(counts),
@@ -3385,24 +3523,24 @@ SampleUMI <- function(
 #' SCTransform(object = pbmc_small)
 #'
 SCTransform <- function(
-    object,
-    assay = 'RNA',
-    new.assay.name = 'SCT',
-    reference.SCT.model = NULL,
-    do.correct.umi = TRUE,
-    ncells = 5000,
-    residual.features = NULL,
-    variable.features.n = 3000,
-    variable.features.rv.th = 1.3,
-    vars.to.regress = NULL,
-    do.scale = FALSE,
-    do.center = TRUE,
-    clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)),
-    conserve.memory = FALSE,
-    return.only.var.genes = TRUE,
-    seed.use = 1448145,
-    verbose = TRUE,
-    ...
+  object,
+  assay = 'RNA',
+  new.assay.name = 'SCT',
+  reference.SCT.model = NULL,
+  do.correct.umi = TRUE,
+  ncells = 5000,
+  residual.features = NULL,
+  variable.features.n = 3000,
+  variable.features.rv.th = 1.3,
+  vars.to.regress = NULL,
+  do.scale = FALSE,
+  do.center = TRUE,
+  clip.range = c(-sqrt(x = ncol(x = object[[assay]]) / 30), sqrt(x = ncol(x = object[[assay]]) / 30)),
+  conserve.memory = FALSE,
+  return.only.var.genes = TRUE,
+  seed.use = 1448145,
+  verbose = TRUE,
+  ...
 ) {
   if (!is.null(x = seed.use)) {
     set.seed(seed = seed.use)
@@ -3432,7 +3570,7 @@ SCTransform <- function(
     if (reference.SCT.model$model_str != 'y ~ log_umi') {
       stop('reference.SCT.model must be derived using default SCT regression formula, `y ~ log_umi`')
     }
-    
+
   }
   # check for latent_var in meta data
   if ('latent_var' %in% names(x = vst.args)) {
@@ -3471,7 +3609,7 @@ SCTransform <- function(
   vst.args[['n_cells']] <- min(ncells, ncol(x = umi))
   residual.type <- vst.args[['residual_type']] %||% 'pearson'
   res.clip.range <- vst.args[['res_clip_range']] %||% c(-sqrt(x = ncol(x = umi)), sqrt(x = ncol(x = umi)))
-  # set sct normalization method
+ # set sct normalization method
   if (!is.null( reference.SCT.model)) {
     sct.method <- "reference.model"
   } else if (!is.null(x = residual.features)) {
@@ -3481,7 +3619,7 @@ SCTransform <- function(
   } else {
     sct.method <- "default"
   }
-  
+
   # set vst model
   vst.out <- switch(
     EXPR = sct.method,
@@ -3538,7 +3676,7 @@ SCTransform <- function(
       vst.out <- do.call(what = 'vst', args = vst.args)
       vst.out
     })
-  
+
   feature.variance <- vst.out$gene_attr[,"residual_variance"]
   names(x = feature.variance) <- rownames(x = vst.out$gene_attr)
   if (verbose) {
@@ -3550,7 +3688,7 @@ SCTransform <- function(
   } else {
     top.features <- names(x = feature.variance)[feature.variance >= variable.features.rv.th]
   }
-  
+
   # get residuals
   vst.out <- switch(
     EXPR = sct.method,
@@ -3760,16 +3898,16 @@ SubsetByBarcodeInflections <- function(object) {
 #' @export
 #'
 FindVariableFeatures.default <- function(
-    object,
-    selection.method = "vst",
-    loess.span = 0.3,
-    clip.max = 'auto',
-    mean.function = FastExpMean,
-    dispersion.function = FastLogVMR,
-    num.bin = 20,
-    binning.method = "equal_width",
-    verbose = TRUE,
-    ...
+  object,
+  selection.method = "vst",
+  loess.span = 0.3,
+  clip.max = 'auto',
+  mean.function = FastExpMean,
+  dispersion.function = FastLogVMR,
+  num.bin = 20,
+  binning.method = "equal_width",
+  verbose = TRUE,
+  ...
 ) {
   CheckDots(...)
   if (!inherits(x = object, 'Matrix')) {
@@ -3860,19 +3998,19 @@ FindVariableFeatures.default <- function(
 #' @method FindVariableFeatures Assay
 #'
 FindVariableFeatures.Assay <- function(
-    object,
-    selection.method = "vst",
-    loess.span = 0.3,
-    clip.max = 'auto',
-    mean.function = FastExpMean,
-    dispersion.function = FastLogVMR,
-    num.bin = 20,
-    binning.method = "equal_width",
-    nfeatures = 2000,
-    mean.cutoff = c(0.1, 8),
-    dispersion.cutoff = c(1, Inf),
-    verbose = TRUE,
-    ...
+  object,
+  selection.method = "vst",
+  loess.span = 0.3,
+  clip.max = 'auto',
+  mean.function = FastExpMean,
+  dispersion.function = FastLogVMR,
+  num.bin = 20,
+  binning.method = "equal_width",
+  nfeatures = 2000,
+  mean.cutoff = c(0.1, 8),
+  dispersion.cutoff = c(1, Inf),
+  verbose = TRUE,
+  ...
 ) {
   if (length(x = mean.cutoff) != 2 || length(x = dispersion.cutoff) != 2) {
     stop("Both 'mean.cutoff' and 'dispersion.cutoff' must be two numbers")
@@ -3939,9 +4077,9 @@ FindVariableFeatures.Assay <- function(
 #' @method FindVariableFeatures SCTAssay
 #'
 FindVariableFeatures.SCTAssay <- function(
-    object,
-    nfeatures = 2000,
-    ...
+  object,
+  nfeatures = 2000,
+  ...
 ) {
   if (length(x = slot(object = object, name = "SCTModel.list")) > 1) {
     stop("SCT assay is comprised of multiple SCT models. To change the variable features, please set manually with VariableFeatures<-", call. = FALSE)
@@ -3961,20 +4099,20 @@ FindVariableFeatures.SCTAssay <- function(
 #' @method FindVariableFeatures Seurat
 #'
 FindVariableFeatures.Seurat <- function(
-    object,
-    assay = NULL,
-    selection.method = "vst",
-    loess.span = 0.3,
-    clip.max = 'auto',
-    mean.function = FastExpMean,
-    dispersion.function = FastLogVMR,
-    num.bin = 20,
-    binning.method = "equal_width",
-    nfeatures = 2000,
-    mean.cutoff = c(0.1, 8),
-    dispersion.cutoff = c(1, Inf),
-    verbose = TRUE,
-    ...
+  object,
+  assay = NULL,
+  selection.method = "vst",
+  loess.span = 0.3,
+  clip.max = 'auto',
+  mean.function = FastExpMean,
+  dispersion.function = FastLogVMR,
+  num.bin = 20,
+  binning.method = "equal_width",
+  nfeatures = 2000,
+  mean.cutoff = c(0.1, 8),
+  dispersion.cutoff = c(1, Inf),
+  verbose = TRUE,
+  ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -4030,14 +4168,14 @@ FindVariableFeatures.Seurat <- function(
 #'
 #'
 FindSpatiallyVariableFeatures.default <- function(
-    object,
-    spatial.location,
-    selection.method = c('markvariogram', 'moransi'),
-    r.metric = 5,
-    x.cuts = NULL,
-    y.cuts = NULL,
-    verbose = TRUE,
-    ...
+  object,
+  spatial.location,
+  selection.method = c('markvariogram', 'moransi'),
+  r.metric = 5,
+  x.cuts = NULL,
+  y.cuts = NULL,
+  verbose = TRUE,
+  ...
 ) {
   # error check dimensions
   if (ncol(x = object) != nrow(x = spatial.location)) {
@@ -4082,17 +4220,17 @@ FindSpatiallyVariableFeatures.default <- function(
 #' @export
 #'
 FindSpatiallyVariableFeatures.Assay <- function(
-    object,
-    slot = "scale.data",
-    spatial.location,
-    selection.method = c('markvariogram', 'moransi'),
-    features = NULL,
-    r.metric = 5,
-    x.cuts = NULL,
-    y.cuts = NULL,
-    nfeatures = nfeatures,
-    verbose = TRUE,
-    ...
+  object,
+  slot = "scale.data",
+  spatial.location,
+  selection.method = c('markvariogram', 'moransi'),
+  features = NULL,
+  r.metric = 5,
+  x.cuts = NULL,
+  y.cuts = NULL,
+  nfeatures = nfeatures,
+  verbose = TRUE,
+  ...
 ) {
   features <- features %||% rownames(x = object)
   if (selection.method == "markvariogram" && "markvariogram" %in% names(x = Misc(object = object))) {
@@ -4174,18 +4312,18 @@ FindSpatiallyVariableFeatures.Assay <- function(
 #' @export
 #'
 FindSpatiallyVariableFeatures.Seurat <- function(
-    object,
-    assay = NULL,
-    slot = "scale.data",
-    features = NULL,
-    image = NULL,
-    selection.method = c('markvariogram', 'moransi'),
-    r.metric = 5,
-    x.cuts = NULL,
-    y.cuts = NULL,
-    nfeatures = 2000,
-    verbose = TRUE,
-    ...
+  object,
+  assay = NULL,
+  slot = "scale.data",
+  features = NULL,
+  image = NULL,
+  selection.method = c('markvariogram', 'moransi'),
+  r.metric = 5,
+  x.cuts = NULL,
+  y.cuts = NULL,
+  nfeatures = 2000,
+  verbose = TRUE,
+  ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   features <- features %||% rownames(x = object[[assay]])
@@ -4233,13 +4371,13 @@ FindSpatiallyVariableFeatures.Seurat <- function(
 #' @export
 #'
 NormalizeData.default <- function(
-    object,
-    normalization.method = "LogNormalize",
-    scale.factor = 1e4,
-    margin = 1,
-    block.size = NULL,
-    verbose = TRUE,
-    ...
+  object,
+  normalization.method = "LogNormalize",
+  scale.factor = 1e4,
+  margin = 1,
+  block.size = NULL,
+  verbose = TRUE,
+  ...
 ) {
   CheckDots(...)
   if (is.null(x = normalization.method)) {
@@ -4340,12 +4478,12 @@ NormalizeData.default <- function(
 #' @method NormalizeData Assay
 #'
 NormalizeData.Assay <- function(
-    object,
-    normalization.method = "LogNormalize",
-    scale.factor = 1e4,
-    margin = 1,
-    verbose = TRUE,
-    ...
+  object,
+  normalization.method = "LogNormalize",
+  scale.factor = 1e4,
+  margin = 1,
+  verbose = TRUE,
+  ...
 ) {
   object <- SetAssayData(
     object = object,
@@ -4377,13 +4515,13 @@ NormalizeData.Assay <- function(
 #' }
 #'
 NormalizeData.Seurat <- function(
-    object,
-    assay = NULL,
-    normalization.method = "LogNormalize",
-    scale.factor = 1e4,
-    margin = 1,
-    verbose = TRUE,
-    ...
+  object,
+  assay = NULL,
+  normalization.method = "LogNormalize",
+  scale.factor = 1e4,
+  margin = 1,
+  verbose = TRUE,
+  ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -4433,20 +4571,20 @@ NormalizeData.Seurat <- function(
 #' @export
 #'
 ScaleData.default <- function(
-    object,
-    features = NULL,
-    vars.to.regress = NULL,
-    latent.data = NULL,
-    split.by = NULL,
-    model.use = 'linear',
-    use.umi = FALSE,
-    do.scale = TRUE,
-    do.center = TRUE,
-    scale.max = 10,
-    block.size = 1000,
-    min.cells.to.block = 3000,
-    verbose = TRUE,
-    ...
+  object,
+  features = NULL,
+  vars.to.regress = NULL,
+  latent.data = NULL,
+  split.by = NULL,
+  model.use = 'linear',
+  use.umi = FALSE,
+  do.scale = TRUE,
+  do.center = TRUE,
+  scale.max = 10,
+  block.size = 1000,
+  min.cells.to.block = 3000,
+  verbose = TRUE,
+  ...
 ) {
   CheckDots(...)
   features <- features %||% rownames(x = object)
@@ -4674,20 +4812,20 @@ ScaleData.default <- function(
 #' @method ScaleData Assay
 #'
 ScaleData.Assay <- function(
-    object,
-    features = NULL,
-    vars.to.regress = NULL,
-    latent.data = NULL,
-    split.by = NULL,
-    model.use = 'linear',
-    use.umi = FALSE,
-    do.scale = TRUE,
-    do.center = TRUE,
-    scale.max = 10,
-    block.size = 1000,
-    min.cells.to.block = 3000,
-    verbose = TRUE,
-    ...
+  object,
+  features = NULL,
+  vars.to.regress = NULL,
+  latent.data = NULL,
+  split.by = NULL,
+  model.use = 'linear',
+  use.umi = FALSE,
+  do.scale = TRUE,
+  do.center = TRUE,
+  scale.max = 10,
+  block.size = 1000,
+  min.cells.to.block = 3000,
+  verbose = TRUE,
+  ...
 ) {
   use.umi <- ifelse(test = model.use != 'linear', yes = TRUE, no = use.umi)
   slot.use <- ifelse(test = use.umi, yes = 'counts', no = 'data')
@@ -4732,20 +4870,20 @@ ScaleData.Assay <- function(
 #' @method ScaleData Seurat
 #'
 ScaleData.Seurat <- function(
-    object,
-    features = NULL,
-    assay = NULL,
-    vars.to.regress = NULL,
-    split.by = NULL,
-    model.use = 'linear',
-    use.umi = FALSE,
-    do.scale = TRUE,
-    do.center = TRUE,
-    scale.max = 10,
-    block.size = 1000,
-    min.cells.to.block = 3000,
-    verbose = TRUE,
-    ...
+  object,
+  features = NULL,
+  assay = NULL,
+  vars.to.regress = NULL,
+  split.by = NULL,
+  model.use = 'linear',
+  use.umi = FALSE,
+  do.scale = TRUE,
+  do.center = TRUE,
+  scale.max = 10,
+  block.size = 1000,
+  min.cells.to.block = 3000,
+  verbose = TRUE,
+  ...
 ) {
   assay <- assay %||% DefaultAssay(object = object)
   assay.data <- GetAssay(object = object, assay = assay)
@@ -5145,13 +5283,13 @@ FindThresh <- function(call.list) {
 #' @importFrom sctransform get_residuals
 #
 GetResidualSCTModel <- function(
-    object,
-    assay,
-    SCTModel,
-    new_features,
-    clip.range,
-    replace.value,
-    verbose
+  object,
+  assay,
+  SCTModel,
+  new_features,
+  clip.range,
+  replace.value,
+  verbose
 ) {
   clip.range <- clip.range %||% SCTResults(object = object[[assay]], slot = "clips", model = SCTModel)$sct
   model.features <- rownames(x = SCTResults(object = object[[assay]], slot = "feature.attributes", model = SCTModel))
@@ -5160,14 +5298,14 @@ GetResidualSCTModel <- function(
   sct.method <-  SCTResults(object = object[[assay]], slot = "arguments", model = SCTModel)$sct.method %||% "default"
   scale.data.cells <- colnames(x = GetAssayData(object = object, assay = assay, slot = "scale.data"))
   if (length(x = setdiff(x = model.cells, y =  scale.data.cells)) == 0) {
-    existing_features <- names(x = which(x = ! apply(
-      X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells],
-      MARGIN = 1,
-      FUN = anyNA)
-    ))
-  } else {
-    existing_features <- character()
-  }
+  existing_features <- names(x = which(x = ! apply(
+    X = GetAssayData(object = object, assay = assay, slot = "scale.data")[, model.cells],
+    MARGIN = 1,
+    FUN = anyNA)
+  ))
+ } else {
+   existing_features <- character()
+ }
   if (replace.value) {
     features_to_compute <- new_features
   } else {
@@ -5181,7 +5319,7 @@ GetResidualSCTModel <- function(
   }
   if (!umi.assay %in% Assays(object = object)) {
     warning("The umi assay (", umi.assay, ") is not present in the object. ",
-            "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE)
+             "Cannot compute additional residuals.", call. = FALSE, immediate. = TRUE)
     return(NULL)
   }
   diff_features <- setdiff(x = features_to_compute, y = model.features)
@@ -5319,12 +5457,12 @@ NBResiduals <- function(fmla, regression.mat, gene, return.mode = FALSE) {
 #' @importFrom utils txtProgressBar setTxtProgressBar
 #
 RegressOutMatrix <- function(
-    data.expr,
-    latent.data = NULL,
-    features.regress = NULL,
-    model.use = NULL,
-    use.umi = FALSE,
-    verbose = TRUE
+  data.expr,
+  latent.data = NULL,
+  features.regress = NULL,
+  model.use = NULL,
+  use.umi = FALSE,
+  verbose = TRUE
 ) {
   # Do we bypass regression and simply return data.expr?
   bypass <- vapply(

From f8461ca9161ea033b6c483118d3b0ec99029b39c Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Fri, 18 Aug 2023 14:30:06 +0200
Subject: [PATCH 18/24] cleaning `ReadVizgen`

few small edits for cleaner code.
---
 R/preprocessing.R | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index c53f20e43..e5bc7d83b 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2936,8 +2936,8 @@ ReadVizgen <- function(
                }
                ptx(type = "finish")
                tx
-               
-             }, centroids = {
+             }, 
+             centroids = {
                pcents <- progressor()
                pcents(message = "Creating centroid coordinates",
                       class = "sticky", amount = 0)
@@ -2946,7 +2946,8 @@ ReadVizgen <- function(
                           stringsAsFactors = FALSE)
                
                # use segmentations from ".parquet"
-             }, segmentations = {
+             }, 
+             segmentations = {
                if (use.parquet) {
                  if (length(parq) > 1) {
                    # eg, if two files are present:
@@ -3040,8 +3041,8 @@ ReadVizgen <- function(
                  # check if any cells have > 1 segmentation boundary
                  # TODO: (optionally) optimize sanity code using purrr?
                  test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()
-                 if (any(test.segs %>% unlist > 1)) {
-                   segs.art.index <- which(test.segs %>% unlist > 1)
+                 if (any(test.segs > 1)) {
+                   segs.art.index <- which(test.segs > 1)
                    if (verbose) { 
                      message("Found ", segs.art.index %>% length,
                              c(" cells with > 1 polygon artifacts:",
@@ -3192,7 +3193,8 @@ ReadVizgen <- function(
                  }
                  pg # final segmentaions out..
                }
-             }, boxes = {
+             }, 
+             boxes = {
                pbox <- progressor(steps = nrow(x = sp))
                pbox(message = "Creating box coordinates", class = "sticky",
                     amount = 0)
@@ -3229,13 +3231,15 @@ ReadVizgen <- function(
                }
                pbox(type = "finish")
                do.call(what = "rbind", args = bx)
-             }, metadata = {
+             }, 
+             metadata = {
                pmeta <- progressor()
                pmeta(message = "Loading metadata", class = "sticky",
                      amount = 0)
                pmeta(type = "finish")
                sp[, metadata, drop = FALSE]
-             }, pixels = {
+             }, 
+             pixels = {
                ppixels <- progressor()
                ppixels(message = "Creating pixel-level molecule coordinates",
                        class = "sticky", amount = 0)
@@ -3243,7 +3247,8 @@ ReadVizgen <- function(
                                 stringsAsFactors = FALSE)
                ppixels(type = "finish")
                df
-             }, microns = {
+             }, 
+             microns = {
                pmicrons <- progressor()
                pmicrons(message = "Creating micron-level molecule coordinates",
                         class = "sticky", amount = 0)

From a7be25a7c209a7b958c673d648896bced133ebb1 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Tue, 22 Aug 2023 14:17:18 +0200
Subject: [PATCH 19/24] small bug fix in `ReadVizgen`

---
 R/preprocessing.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index e5bc7d83b..3b4badf8d 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -3102,7 +3102,7 @@ ReadVizgen <- function(
                    segs_list <-
                      future.apply::future_lapply(segs %>% seq,
                                                  function(i) {
-                                                   segs[[i]] %>%
+                                                   segs[[i]][[1]] %>%
                                                      data.table::as.data.table(.) %>%
                                                      mutate(cell = names(segs)[i])
                                                  }

From 6352d56af8e839cfc5c1237e96e4a068f22f40ea Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Mon, 28 Aug 2023 15:50:05 +0200
Subject: [PATCH 20/24] added `sf` & filter polygons -> `ReadVizgen()`

..inspired by [`SpatialFeatureExperiment`](https://github.com/pachterlab/SpatialFeatureExperiment) using`sf` package to work/filter segmentation geometries.
Updates:
- adding support to efficiently filter segmentation polygons (removing small artifacts etc..)
  - added helper function `.filter_polygons()` (a bit modified version from `SpatialFeatureExperiment:::.filter_polygons`) for filtering given `min.area` - minimal polygon area as a threshold
- allow for multiple segmentation polygons per cell (..still using single z-plane)
---
 R/preprocessing.R | 197 +++++++++++++---------------------------------
 1 file changed, 53 insertions(+), 144 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 3b4badf8d..34d291284 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2535,8 +2535,8 @@ ReadVitessce <- function(
 #' @param workers.MulticoreParam Number of cores to use for \code{BiocParallel::bplapply}
 #' @param DTthreads.pct Optional, set percentage eg \code{50} of total threads to use for \code{data.table::fread}, 
 #' if set to \code{NULL} will use default setting as in \code{data.table::getDTthreads(verbose = T)}
-#' @param use.furrr When working with lists, use \code{furrr} with \code{future} parallelization;
-#'  if \conde{FALSE} standard \code{purrr} will be used
+#' @param min.area Minimal polygon area to use as a threshold for filtering, if set to \code{NULL} 
+#' the maximal area (ie, the largest polygon) will be used instead.
 #' @param verbose If to print processing messages using \code{message}; default is to \code{FALSE}
 #'
 #' @return \code{ReadVizgen}: A list with some combination of the
@@ -2561,11 +2561,9 @@ ReadVitessce <- function(
 #'
 #' @importFrom future.apply future_lapply
 #' @import dplyr
-#' @import tibble
-#' @import purrr
-#' @import furrr
+#' @importFrom purrr lmap
+#' @import sf
 #' @import magrittr
-#' @importFrom spatstat.geom is.empty
 #' @importFrom progressr progressor
 #'
 #' @export
@@ -2593,15 +2591,15 @@ ReadVizgen <- function(
     use.BiocParallel = TRUE,
     workers.MulticoreParam = 12,
     DTthreads.pct = NULL,
-    use.furrr = TRUE,
+    min.area = 5,
     verbose = FALSE
 ) {
   # TODO: handle multiple segmentations per z-plane
   # NOTE: this is only needed when segmentations differ between z-planes
   
-  # packages that needs to be installed a priori
+  # packages that need to be installed a priori
   pkgs <- c("data.table", "arrow", "sfarrow",
-            "tidyverse", "furrr", "BiocParallel", "Matrix")
+            "tidyverse", "sf", "BiocParallel", "Matrix")
   lapply(pkgs %>% length %>% seq, function(i) 
   { !requireNamespace(pkgs[i], quietly = TRUE) } ) %>% 
     unlist %>% 
@@ -2688,7 +2686,7 @@ ReadVizgen <- function(
     list.files(data.dir, 
                pattern = ".parquet$",
                full.names = TRUE) %>%
-    { if (is.empty(.)) { 
+    { if (length(.) == 0) { 
       # look in the sub directory (if nothing is found)
       list.files(data.dir,
                  pattern = ".parquet$",
@@ -2710,7 +2708,7 @@ ReadVizgen <- function(
                list.files(data.dir, 
                           pattern = "cell_by_gene",
                           full.names = TRUE) %>%
-               { if (is.empty(.)) { 
+               { if (length(.) == 0) { 
                  list.files(data.dir,
                             pattern = "cell_by_gene",
                             full.names = TRUE,
@@ -2727,7 +2725,7 @@ ReadVizgen <- function(
                list.files(data.dir, 
                           pattern = "cell_metadata",
                           full.names = TRUE) %>%
-               { if (is.empty(.)) { 
+               { if (length(.) == 0) { 
                  list.files(data.dir,
                             pattern = "cell_metadata",
                             full.names = TRUE,
@@ -2744,7 +2742,7 @@ ReadVizgen <- function(
                list.files(data.dir, 
                           pattern = "detected_transcripts",
                           full.names = TRUE) %>%
-               { if (is.empty(.)) { 
+               { if (length(.) == 0) { 
                  list.files(data.dir,
                             pattern = "detected_transcripts",
                             full.names = TRUE,
@@ -2858,8 +2856,8 @@ ReadVizgen <- function(
       class = 'sticky',
       amount = 0
     )
-   
-    # load molecules
+    
+    # optionally - load molecules
     mx <- data.table::fread(
       file = files[['molecules']],
       sep = ',',
@@ -2899,9 +2897,6 @@ ReadVizgen <- function(
                tx <- data.table::fread(file = files[[otype]], sep = ",",
                                        data.table = FALSE, verbose = FALSE)
                rownames(x = tx) <- as.character(x = tx[, 1])
-               # avoid converting to dense matrix?
-               #tx <- t(x = as.matrix(x = tx[, -1, drop = FALSE]))
-               
                # keep cells with `transcript_count` > 0
                if (verbose) { message(">>> filtering `cell_by_gene` - keep cells with counts > 0") }
                tx %<>% select(-1) %>%
@@ -2910,8 +2905,6 @@ ReadVizgen <- function(
                      # match cells to filtered data from spatial (cell_metadata)
                      filter(., rownames(.) %in% rownames(sp)) 
                    } else { (.) } } %>% # return filtered count data
-                 # transpose data.frame without converting to dense matrix
-                 #t() %>%
                  as.sparse() %>% # convert to sparse matrix
                  Matrix::t() # transpose sparse matrix
                
@@ -2936,6 +2929,7 @@ ReadVizgen <- function(
                }
                ptx(type = "finish")
                tx
+               
              }, 
              centroids = {
                pcents <- progressor()
@@ -2944,10 +2938,9 @@ ReadVizgen <- function(
                pcents(type = "finish")
                data.frame(x = sp$center_x, y = sp$center_y, cell = rownames(x = sp),
                           stringsAsFactors = FALSE)
-               
-               # use segmentations from ".parquet"
              }, 
              segmentations = {
+               # use segmentations from ".parquet"
                if (use.parquet) {
                  if (length(parq) > 1) {
                    # eg, if two files are present:
@@ -2962,7 +2955,7 @@ ReadVizgen <- function(
                        grep(gsub("s", "", mol.type), ., value = TRUE)
                      }
                    } %>%
-                     { if (is.empty(.)) {
+                     { if (length(.) == 0) {
                        # only if single ".parquet" file present
                        # eg, `cell_boundaries.parquet`
                        parq %>%
@@ -2970,131 +2963,42 @@ ReadVizgen <- function(
                      } else { (.) } }
                  }
                  
-                 # Read .parquet file
-                 parq %<>% sfarrow::st_read_parquet(.)
+                 # Read .parquet file ----
+                 parq %<>% sfarrow::st_read_parquet(.) %>%
+                   # keep only selected z-plane
+                   filter(., ZIndex == z)
                  
-                 # get all cell segmentations
-                 segs <- filter(parq, ZIndex == z) %>% 
-                   pull(Geometry) %>%
-                   # add cell ID
-                   set_names(filter(parq, ZIndex == z) %>% 
-                               pull(EntityID) %>% as.character)
+                 # Sanity checks on segmentation polygons
+                 # Using `sf` for filtering polygons ----
+                 # keep multiple polygons pieces per single cell id
+                 parq %<>%
+                   .filter_polygons(., 
+                                    min.area = min.area, 
+                                    BPPARAM = 
+                                      if (exists("BPParam")) 
+                                        { BPParam } else { SerialParam() }, 
+                                    verbose = verbose)
                  
-                 ## Sanity checks on segmentation polygons - part 1 ----
-                 test.segs <- lapply(segs %>% seq, 
-                                     function(i) segs[[i]] %>% length) %>% unlist()
-                 if (any(test.segs > 1)) {
-                   segs.art.index <- which(test.segs > 1)
-                   segs.empty.index <- which(test.segs < 1)
-                   if (verbose) { message("Sanity checks on cell segmentation polygons:", "\n", 
-                                          ">>> found ", segs.art.index %>% length,
-                                          " cells with > 1 (nested) polygon lists:", "\n",
-                                          ">>> flattening polygon lists", "\n",
-                                          if (c(segs.empty.index %>% length) > 0) {
-                                            paste0(">>> removing ", 
-                                                   segs.empty.index %>% length,
-                                                   " empty polygon lists") }
-                   ) }
-                   
-                   # step 1 - find polygon nested lists with > 1 length
-                   # get original planned session if exists
-                   if (is(future::plan(), "multisession")) {
-                     orig.plan <- future::plan()
-                   }
-                   segs.1 <- segs %>% 
-                     purrr::keep(., c(purrr::map(., length) %>% unlist > 1)) %>%
-                     # flatten each list
-                     { 
-                       if (use.furrr) {
-                         # set temporary workers
-                         f.plan <- future::plan("multisession", workers = 4L, gc = TRUE)
-                         #on.exit(f.plan %>% future::plan()) # exiting doesn't to work with furrr
-                         # using furrr (faster purrr with future) 
-                         furrr::future_map(., purrr::flatten)
-                       } else {
-                         purrr::map(., purrr::flatten)
-                       }
-                     } %>% suppressPackageStartupMessages()
-                   # set originally plannned session back
-                   if (exists("orig.plan")) { 
-                     future::plan(orig.plan)
-                   } else { 
-                     # or plan sequential
-                     future::plan("sequential")
-                   }
-                   
-                   # step 2 - apply multiple filtering  
-                   segs.2 <- 
-                     segs %>%
-                     # remove empty elements               
-                     purrr::keep(., !c(purrr::map(., length) %>% unlist < 1)) %>%
-                     # keep lists with length == 1 polygon information      
-                     purrr::keep(., c(purrr::map(., length) %>% unlist == 1)) %>%
-                     # collapse to a list
-                     purrr::flatten(.)
-                   
-                   # step 3 - combine segmentaion lists     
-                   segs <- c(segs.1, segs.2) 
-                 }
-                 
-                 ## Sanity checks on segmentation polygons - part 2 ----
-                 # check if any cells have > 1 segmentation boundary
-                 # TODO: (optionally) optimize sanity code using purrr?
-                 test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()
-                 if (any(test.segs > 1)) {
-                   segs.art.index <- which(test.segs > 1)
-                   if (verbose) { 
-                     message("Found ", segs.art.index %>% length,
-                             c(" cells with > 1 polygon artifacts:",
-                               ">>> removing artifacts",
-                               ">>> keeping cell boundary with maximum coords") %>% 
-                               paste0(., "\n"))
-                   }
-                   # usually artifacts have small boundaries/coords
-                   # find cell boundaries with maximum coords
-                   
-                   # TODO: (optionally)
-                   # - calculate geometric properties, eg circularity:
-                   # - keep single polygon with high circularity values (likely a cell?)
-                   
-                   for (i in segs.art.index %>% seq) { 
-                     dims <- lapply(segs[[segs.art.index[i]]] %>% seq(), 
-                                    function(d) { dim(segs[[segs.art.index[i]]][[d]])[1] } )
-                     # get & keep boundaries with maximum coords
-                     maxs.segs <- which(unlist(dims) == max(unlist(dims)))
-                     segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][maxs.segs]
-                   }
-                 } else { if (verbose) { message("All cells have single polygon boundary (no artifacts)") } }                 
-                 
-                 # some cells might have > 1 polygon boundary with identical lenght
-                 #..in this case, keep only the 1st polygon boundary
-                 test.segs <- lapply(segs %>% seq, function(i) segs[[i]] %>% length) %>% unlist()                
-                 if (any(test.segs > 1)) {
-                   segs.art.index <- which(test.segs > 1)
-                   if (verbose) { message("Additionally found ", segs.art.index %>% length,
-                                          c(" cells with > 1 polygons (identical length):",
-                                            ">>> only the 1st polygon boundary will be kept") %>% 
-                                            paste0(., "\n")) }
-                   for (i in segs.art.index %>% seq) { 
-                     # TODO: (optionally)
-                     # - calculate geometric properties, eg circularity:
-                     # - keep single polygon with high circularity values (likely a cell?)
-                     
-                     segs[[segs.art.index[i]]] <- segs[[segs.art.index[i]]][1]
-                   }
-                 }              
+                 # Get filtered segmentation geometries/polygons
+                 segs <- 
+                   parq %>%
+                   st_geometry() %>%
+                   # add cell ID
+                   set_names(pull(parq, EntityID) %>% as.character)
                  
-                 ## Extract cell boundaries per cells
+                 # Extract cell boundaries per cell id ----
                  # TODO: (optionally) resample & make cell boundaries equidistant? 
-                 # TODO: (optionally) optimize with purrr::map to get df per list instance?
                  if (use.BiocParallel) {
                    gc() %>% invisible() # free up memory
                    if (verbose) { message("Extracting cell segmentations - using `BiocParallel`") } 
                    segs_list <-
                      BiocParallel::bplapply(segs %>% seq,
                                             function(i) {
-                                              segs[[i]][[1]] %>% 
-                                                data.table::as.data.table(.) %>%
+                                              # keep multiple polygons per cell
+                                              segs[[i]] %>% 
+                                                unique() %>% # remove any duplicates
+                                                purrr::lmap(.f = data.table::as.data.table) %>% 
+                                                data.table::rbindlist() %>%
                                                 mutate(cell = names(segs)[i]) },
                                             BPPARAM = BPParam)
                  } else {
@@ -3102,8 +3006,10 @@ ReadVizgen <- function(
                    segs_list <-
                      future.apply::future_lapply(segs %>% seq,
                                                  function(i) {
-                                                   segs[[i]][[1]] %>%
-                                                     data.table::as.data.table(.) %>%
+                                                   segs[[i]] %>% 
+                                                     unique() %>% # remove any duplicates
+                                                     purrr::lmap(.f = data.table::as.data.table) %>% 
+                                                     data.table::rbindlist() %>%
                                                      mutate(cell = names(segs)[i])
                                                  }
                      )
@@ -3113,10 +3019,13 @@ ReadVizgen <- function(
                  segs <- 
                    data.table::rbindlist(segs_list) %>% 
                    data.table::setnames(c("x", "y", "cell"))
-                 #names(segs)[1:2] <- c("x", "y")
-                 if (verbose) { message("All cell segmentations are loaded..") }      
-                 segs
-               } else { 
+                 if (verbose) { message("All cell segmentations are loaded..") }
+                 npg <- length(x = unique(x = segs$cell))
+                 if (npg < nrow(x = sp)) {
+                   warning(nrow(x = sp) - npg, " cells missing polygon information",
+                           immediate. = TRUE) }
+                 segs  # final segmentaions out..
+               } else {
                  # else use ".hdf5" files from ./cell_boundaries (older version)
                  ppoly <- progressor(steps = length(x = unique(x = sp$fov)))
                  ppoly(message = "Creating polygon coordinates", class = "sticky",

From f43fed0f89b3c52dcbf6024e5b1e8e7e11996cb4 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Mon, 28 Aug 2023 15:50:37 +0200
Subject: [PATCH 21/24] ..updated `LoadVizgen()`

---
 R/convenience.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/convenience.R b/R/convenience.R
index 5e9b0cf74..82222b409 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -155,6 +155,7 @@ LoadVizgen <- function(
     add.zIndex = TRUE, 
     update.object = TRUE,
     add.molecules = TRUE,
+    min.area = 5,
     verbose,
     ...)
 {
@@ -163,6 +164,7 @@ LoadVizgen <- function(
                      mol.type = mol.type,
                      filter = filter,
                      z = z,
+                     min.area = min.area,
                      verbose = verbose,
                      ...)
   
@@ -275,7 +277,6 @@ LoadVizgen <- function(
   
   if (verbose) { message("Object is ready!") } 
   return(obj)
-  
 }
 
 #' @return \code{LoadXenium}: A \code{\link[SeuratObject]{Seurat}} object

From 69ba89d5bd84266dd47d48bd231cf6729a3e372b Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Mon, 28 Aug 2023 16:14:08 +0200
Subject: [PATCH 22/24] adding `.filter_polygons()`

---
 R/preprocessing.R | 79 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 34d291284..8a1ad46ed 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -2484,6 +2484,85 @@ ReadVitessce <- function(
   return(outs)
 }
 
+#' @import dplyr
+#' @import sf
+#' @import magrittr
+#' Helper function to filter geometryy polygons using \code{sf} package
+#' modified function from \code{SpatialFeatureExperiment:::.filter_polygons}
+.filter_polygons <- 
+  function(polys, # object of class \code{sf} and \code{data.frame}
+           min.area, # minimal polygon area to use as a threshold
+           BPPARAM = BiocParallel::SerialParam(),
+           verbose) {
+    # Sanity check on nested polygon lists
+    test.segs <- lapply(polys %>% st_geometry() %>% seq, function(i) { 
+      polys %>% 
+        st_geometry() %>% .[[i]] %>% length() }) %>% unlist()
+    if (any(test.segs > 1)) {
+      segs.art.index <- which(test.segs > 1)
+      if (verbose) { message("Sanity checks on cell segmentation polygons:", "\n", 
+                             ">>> ..found ", segs.art.index %>% length,
+                             " cells with (nested) polygon lists", "\n",
+                             ">>> ..applying filtering") }
+    }
+    # remove empty elements
+    polys.orig <- polys
+    polys %<>% filter(!st_is_empty(.))
+    empty.inds <- which(!polys.orig$ID %in% polys$ID)
+    if (verbose && length(empty.inds)) { message(">>> ..removing ", 
+                                                 length(empty.inds), " empty polygons") }
+    
+    if (st_geometry_type(polys, by_geometry = FALSE) == "MULTIPOLYGON") {
+      polys_sep <- lapply(st_geometry(polys), function(x) {
+        st_cast(st_sfc(x), "POLYGON")
+      })
+      areas <- lapply(polys_sep, st_area)
+      
+      if (!is.null(min.area)) {
+        which_keep <- lapply(areas, function(x) which(x > min.area))
+        multi_inds <- which(lengths(which_keep) > 1L)
+        if (length(multi_inds)) {
+          if (verbose) { message("There are ", length(multi_inds), " cells with multiple",
+                                 " pieces in cell segmentation larger than min.area,",
+                                 " whose first 10 indices are: ",
+                                 paste(multi_inds %>% head(10), # print only first 10 indices
+                                       collapse = ", "),
+                                 ". The largest piece is kept.") }
+          which_keep[multi_inds] <- lapply(areas[multi_inds], which.max)
+        }
+        inds <- lengths(which_keep) > 0L
+        polys <- polys[inds,]
+        # using parallelization, else can take a while when `which_keep` length is towards 100K
+        which_keep <- unlist(which_keep[inds])
+        geo <- st_geometry(polys)
+        new_geo <- 
+          BiocParallel::bplapply(seq_along(which_keep), function(i) {
+            geo[[i]] <- st_cast(geo[i], "POLYGON")[[which_keep[i]]] %>% 
+              unique() %>% # remove any duplicates
+              st_polygon()
+          }, BPPARAM = BPPARAM) |> st_sfc()
+        st_geometry(polys) <- new_geo
+      } else if (is.null(min.area)) {
+        # use only maximal area, ie the largest polygon
+        if (verbose) { message(">>> ..keeping polygons with the largest area only") }
+        which_keep <- 
+          lapply(areas, function(x) which.max(x)) %>% unlist()
+        geo <- st_geometry(polys)  
+        new_geo <- 
+          BiocParallel::bplapply(seq_along(which_keep), function(i) {
+            geo[[i]] <- st_cast(geo[i], "POLYGON")[[which_keep[i]]] %>% 
+              unique() %>% # remove any duplicates
+              st_polygon()
+          }, BPPARAM = BPPARAM) |> st_sfc()
+        st_geometry(polys) <- new_geo
+      }
+    } else {
+      inds <- st_area(st_geometry(polys)) > min.area
+      polys <- polys[inds,]
+    }         
+    polys
+  }
+                 
 #' Read and Load MERFISH Input from Vizgen
 #'
 #' Read and load in MERFISH data from Vizgen-formatted files

From 038271fe97d2a4d184cfd540e6ed5ee22e9b8e21 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Mon, 28 Aug 2023 16:35:29 +0200
Subject: [PATCH 23/24] optimized parallelization for `.filter_polygons()`

..optimized parallelization `BiocParallel` for
`.filter_polygons()`, ie using 2 workers less that total provided workers
---
 R/preprocessing.R | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/R/preprocessing.R b/R/preprocessing.R
index 8a1ad46ed..ba87ffd07 100644
--- a/R/preprocessing.R
+++ b/R/preprocessing.R
@@ -3054,8 +3054,12 @@ ReadVizgen <- function(
                    .filter_polygons(., 
                                     min.area = min.area, 
                                     BPPARAM = 
-                                      if (exists("BPParam")) 
-                                        { BPParam } else { SerialParam() }, 
+                                      if (BPParam$workers > 2) {
+                                        # use 2 workers less than total workers
+                                        BiocParallel::MulticoreParam(14 - 2, tasks = 50L, 
+                                                                     force.GC = FALSE, progressbar = TRUE)
+                                      } else if (!BPParam$workers > 2)
+                                      { BPParam } else { SerialParam() }, 
                                     verbose = verbose)
                  
                  # Get filtered segmentation geometries/polygons

From 9db188a99af6881d449f05dabec7d9e20d581c48 Mon Sep 17 00:00:00 2001
From: alikhuseynov <52053807+alikhuseynov@users.noreply.github.com>
Date: Thu, 15 Feb 2024 12:03:12 +0100
Subject: [PATCH 24/24] rm space-only changes in `convenience.R`

---
 R/convenience.R | 94 ++++++++++++++++++++++++-------------------------
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/convenience.R b/R/convenience.R
index 82222b409..4a496e513 100644
--- a/R/convenience.R
+++ b/R/convenience.R
@@ -20,11 +20,11 @@ NULL
 #' @rdname ReadAkoya
 #'
 LoadAkoya <- function(
-    filename,
-    type = c('inform', 'processor', 'qupath'),
-    fov,
-    assay = 'Akoya',
-    ...
+  filename,
+  type = c('inform', 'processor', 'qupath'),
+  fov,
+  assay = 'Akoya',
+  ...
 ) {
   # read in matrix and centroids
   data <- ReadAkoya(filename = filename, type = type)
@@ -114,8 +114,8 @@ LoadNanostring <- function(data.dir, fov, assay = 'Nanostring') {
     molecules = data$pixels,
     assay = assay
   )
-  obj <- CreateSeuratObject(counts = data$matrix, assay = assay)
-  
+obj <- CreateSeuratObject(counts = data$matrix, assay = assay)
+    
   # subset both object and coords based on the cells shared by both
   cells <- intersect(
     Cells(x = coords, boundary = "segmentation"),
@@ -298,7 +298,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     type = c("centroids", "segmentations"),
   )
   
-  segmentations.data <- list(
+segmentations.data <- list(
     "centroids" = CreateCentroids(data$centroids),
     "segmentation" = CreateSegmentation(data$segmentations)
   )
@@ -309,7 +309,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
     assay = assay
   )
   
-  xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
+xenium.obj <- CreateSeuratObject(counts = data$matrix[["Gene Expression"]], assay = assay)
   if("Blank Codeword" %in% names(data$matrix))
     xenium.obj[["BlankCodeword"]] <- CreateAssayObject(counts = data$matrix[["Blank Codeword"]])
   else
@@ -317,7 +317,7 @@ LoadXenium <- function(data.dir, fov = 'fov', assay = 'Xenium') {
   xenium.obj[["ControlCodeword"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Codeword"]])
   xenium.obj[["ControlProbe"]] <- CreateAssayObject(counts = data$matrix[["Negative Control Probe"]])
   
-  xenium.obj[[fov]] <- coords
+xenium.obj[[fov]] <- coords
   return(xenium.obj)
 }
 
@@ -350,27 +350,27 @@ PCAPlot <- function(object, ...) {
 #' @export
 #'
 SpatialDimPlot <- function(
-    object,
-    group.by = NULL,
-    images = NULL,
-    cols = NULL,
-    crop = TRUE,
-    cells.highlight = NULL,
-    cols.highlight = c('#DE2D26', 'grey50'),
-    facet.highlight = FALSE,
-    label = FALSE,
-    label.size = 7,
-    label.color = 'white',
-    repel = FALSE,
-    ncol = NULL,
-    combine = TRUE,
-    pt.size.factor = 1.6,
-    alpha = c(1, 1),
-    image.alpha = 1,
-    stroke = 0.25,
-    label.box = TRUE,
-    interactive = FALSE,
-    information = NULL
+  object,
+  group.by = NULL,
+  images = NULL,
+  cols = NULL,
+  crop = TRUE,
+  cells.highlight = NULL,
+  cols.highlight = c('#DE2D26', 'grey50'),
+  facet.highlight = FALSE,
+  label = FALSE,
+  label.size = 7,
+  label.color = 'white',
+  repel = FALSE,
+  ncol = NULL,
+  combine = TRUE,
+  pt.size.factor = 1.6,
+  alpha = c(1, 1),
+  image.alpha = 1,
+  stroke = 0.25,
+  label.box = TRUE,
+  interactive = FALSE,
+  information = NULL
 ) {
   return(SpatialPlot(
     object = object,
@@ -403,22 +403,22 @@ SpatialDimPlot <- function(
 #' @export
 #'
 SpatialFeaturePlot <- function(
-    object,
-    features,
-    images = NULL,
-    crop = TRUE,
-    slot = 'data',
-    keep.scale = "feature",
-    min.cutoff = NA,
-    max.cutoff = NA,
-    ncol = NULL,
-    combine = TRUE,
-    pt.size.factor = 1.6,
-    alpha = c(1, 1),
-    image.alpha = 1,
-    stroke = 0.25,
-    interactive = FALSE,
-    information = NULL
+  object,
+  features,
+  images = NULL,
+  crop = TRUE,
+  slot = 'data',
+  keep.scale = "feature",
+  min.cutoff = NA,
+  max.cutoff = NA,
+  ncol = NULL,
+  combine = TRUE,
+  pt.size.factor = 1.6,
+  alpha = c(1, 1),
+  image.alpha = 1,
+  stroke = 0.25,
+  interactive = FALSE,
+  information = NULL
 ) {
   return(SpatialPlot(
     object = object,