Skip to content

Commit 76eb916

Browse files
committed
Fixing the read_nsx issue when the total data size exceeds 2^31-1. Using data streaming instead
1 parent 58c54e0 commit 76eb916

11 files changed

Lines changed: 1544 additions & 33 deletions

File tree

CRAN-SUBMISSION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
Version: 0.0.5
2-
Date: 2024-09-03 21:09:44 UTC
3-
SHA: 7d153c24e4360b76c6406a2d351a09e38db19551
1+
Version: 0.0.6
2+
Date: 2025-10-23 19:12:42 UTC
3+
SHA: 58c54e0a45b0dbd20e7287db953cf06ae4eaba09

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: readNSx
22
Title: Read 'Blackrock-Microsystems' Files ('NEV', 'NSx')
3-
Version: 0.0.6
3+
Version: 0.0.6.1
44
Authors@R:
55
c(
66
person(given = "Zhengjia", family = "Wang",
@@ -13,7 +13,7 @@ License: MPL-2.0 | file LICENSE
1313
Language: en-US
1414
Encoding: UTF-8
1515
Roxygen: list(markdown = TRUE)
16-
RoxygenNote: 7.3.2
16+
RoxygenNote: 7.3.3
1717
Imports:
1818
data.table,
1919
fastmap,

R/blackrock.R

Lines changed: 206 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,219 @@ import_nsp <- function(path, prefix = NULL, exclude_events = "spike",
125125
nsx <- substring(path, nchar(path) - 2)
126126
if(file.exists(path)) {
127127
inc_progress(sprintf("Parsing %s", nsx), "Importing NSx")
128-
results[[nsx]] <- read_nsx(path = path, prefix = prefix, partition_prefix = partition_prefix)
128+
if(isTRUE(getOption("ieegio.legacy.read_nsx", FALSE))) {
129+
results[[nsx]] <- read_nsx_legacy(path = path, prefix = prefix, partition_prefix = partition_prefix)
130+
} else {
131+
results[[nsx]] <- read_nsx(path = path, prefix = prefix, partition_prefix = partition_prefix)
132+
}
129133
} else {
130134
inc_progress(sprintf("Skipping %s", nsx), "Importing NSx")
131135
}
132136
}
133137
return(results)
134138
}
135139

140+
141+
#' Compare new chunked read_nsx with legacy version
142+
#' @description Helper function to validate the new chunked reading implementation
143+
#' by comparing results with the legacy version that reads all data at once.
144+
#' @param path path to 'NEV' or 'NSx' files
145+
#' @param exclude_events passed to \code{\link{import_nsp}}
146+
#' @param exclude_nsx passed to \code{\link{import_nsp}}
147+
#' @param partition_prefix passed to \code{\link{import_nsp}}
148+
#' @param tolerance numeric tolerance for comparing signal values (default 1e-10)
149+
#' @param verbose logical, print progress messages
150+
#' @return A list with comparison results including:
151+
#' \describe{
152+
#' \item{identical}{logical, TRUE if all results match}
153+
#' \item{new_result}{result from new chunked method}
154+
#' \item{legacy_result}{result from legacy method}
155+
#' \item{differences}{list of any differences found}
156+
#' }
157+
#' @keywords internal
158+
compare_nsx_methods <- function(path, exclude_events = "spike",
159+
exclude_nsx = NULL,
160+
partition_prefix = "/part",
161+
tolerance = 1e-10,
162+
verbose = TRUE) {
163+
164+
# Get the base pattern for all NSx/NEV files
165+
166+
path_pattern <- gsub("\\.(nev|ns[1-9])[\\\\/]{0,}$", "", path, ignore.case = TRUE)
167+
168+
# Find all related files
169+
all_files <- c(
170+
paste0(path_pattern, ".nev"),
171+
paste0(path_pattern, sprintf(".ns%d", seq_len(9)))
172+
)
173+
all_files <- all_files[file.exists(all_files)]
174+
175+
if(length(all_files) == 0) {
176+
stop("No .nev or .nsx files found at path: ", path)
177+
}
178+
179+
# Create two temporary directories
180+
temp_base <- tempdir()
181+
temp_new <- file.path(temp_base, "nsx_compare_new")
182+
temp_legacy <- file.path(temp_base, "nsx_compare_legacy")
183+
184+
# Clean up old temp dirs if they exist
185+
if(dir.exists(temp_new)) unlink(temp_new, recursive = TRUE)
186+
if(dir.exists(temp_legacy)) unlink(temp_legacy, recursive = TRUE)
187+
188+
dir.create(temp_new, recursive = TRUE)
189+
dir.create(temp_legacy, recursive = TRUE)
190+
191+
if(verbose) message("Copying files to temporary directories...")
192+
193+
# Copy files to both temp directories
194+
for(f in all_files) {
195+
fname <- basename(f)
196+
file.copy(f, file.path(temp_new, fname))
197+
file.copy(f, file.path(temp_legacy, fname))
198+
}
199+
200+
# Get the path to one of the files in temp dirs
201+
first_file <- basename(all_files[1])
202+
path_new <- file.path(temp_new, first_file)
203+
path_legacy <- file.path(temp_legacy, first_file)
204+
205+
prefix_new <- file.path(temp_new, "output")
206+
prefix_legacy <- file.path(temp_legacy, "output")
207+
208+
# Run new method
209+
if(verbose) message("Running new chunked method...")
210+
old_option <- getOption("ieegio.legacy.read_nsx")
211+
on.exit(options(ieegio.legacy.read_nsx = old_option), add = TRUE)
212+
213+
options(ieegio.legacy.read_nsx = FALSE)
214+
result_new <- import_nsp(
215+
path = path_new,
216+
prefix = prefix_new,
217+
exclude_events = exclude_events,
218+
exclude_nsx = exclude_nsx,
219+
verbose = FALSE,
220+
partition_prefix = partition_prefix
221+
)
222+
223+
# Run legacy method
224+
if(verbose) message("Running legacy method...")
225+
options(ieegio.legacy.read_nsx = TRUE)
226+
result_legacy <- import_nsp(
227+
path = path_legacy,
228+
prefix = prefix_legacy,
229+
exclude_events = exclude_events,
230+
exclude_nsx = exclude_nsx,
231+
verbose = FALSE,
232+
partition_prefix = partition_prefix
233+
)
234+
235+
# Compare results
236+
if(verbose) message("Comparing results...")
237+
238+
differences <- list()
239+
all_identical <- TRUE
240+
241+
# Compare each NSx
242+
nsx_names <- grep("^ns[1-9]$", names(result_new), value = TRUE)
243+
244+
for(nsx_name in nsx_names) {
245+
nsx_new <- result_new[[nsx_name]]
246+
nsx_legacy <- result_legacy[[nsx_name]]
247+
248+
if(is.null(nsx_new) && is.null(nsx_legacy)) next
249+
250+
if(is.null(nsx_new) || is.null(nsx_legacy)) {
251+
differences[[nsx_name]] <- "One result is NULL"
252+
all_identical <- FALSE
253+
next
254+
}
255+
256+
# Compare number of partitions
257+
if(nsx_new$nparts != nsx_legacy$nparts) {
258+
differences[[nsx_name]] <- sprintf(
259+
"Different number of partitions: new=%d, legacy=%d",
260+
nsx_new$nparts, nsx_legacy$nparts
261+
)
262+
all_identical <- FALSE
263+
next
264+
}
265+
266+
# Compare channel data for each partition
267+
n_channels <- nsx_new$header_basic$channel_count
268+
channel_diffs <- list()
269+
270+
for(part in seq_len(nsx_new$nparts)) {
271+
for(ch in seq_len(n_channels)) {
272+
channel_id <- nsx_new$header_extended$CC$electrode_id[ch]
273+
channel_label <- nsx_new$header_extended$CC$electrode_label[ch]
274+
275+
fname <- channel_filename(channel_id = channel_id, channel_label = channel_label)
276+
277+
path_data_new <- file.path(
278+
sprintf("%s_ieeg%s%d", prefix_new, partition_prefix, part),
279+
fname
280+
)
281+
path_data_legacy <- file.path(
282+
sprintf("%s_ieeg%s%d", prefix_legacy, partition_prefix, part),
283+
fname
284+
)
285+
286+
if(!file.exists(path_data_new) || !file.exists(path_data_legacy)) {
287+
channel_diffs[[sprintf("part%d_ch%d", part, channel_id)]] <- "File missing"
288+
all_identical <- FALSE
289+
next
290+
}
291+
292+
data_new <- load_h5(path_data_new, "data", ram = TRUE)
293+
data_legacy <- load_h5(path_data_legacy, "data", ram = TRUE)
294+
295+
if(length(data_new) != length(data_legacy)) {
296+
channel_diffs[[sprintf("part%d_ch%d", part, channel_id)]] <- sprintf(
297+
"Different lengths: new=%d, legacy=%d",
298+
length(data_new), length(data_legacy)
299+
)
300+
all_identical <- FALSE
301+
next
302+
}
303+
304+
max_diff <- max(abs(data_new - data_legacy))
305+
if(max_diff > tolerance) {
306+
channel_diffs[[sprintf("part%d_ch%d", part, channel_id)]] <- sprintf(
307+
"Max difference: %g", max_diff
308+
)
309+
all_identical <- FALSE
310+
}
311+
}
312+
}
313+
314+
if(length(channel_diffs) > 0) {
315+
differences[[nsx_name]] <- channel_diffs
316+
}
317+
}
318+
319+
if(verbose) {
320+
if(all_identical) {
321+
message("SUCCESS: All results are identical (within tolerance ", tolerance, ")")
322+
} else {
323+
message("DIFFERENCES FOUND:")
324+
print(differences)
325+
}
326+
}
327+
328+
# Clean up
329+
unlink(temp_new, recursive = TRUE)
330+
unlink(temp_legacy, recursive = TRUE)
331+
332+
list(
333+
identical = all_identical,
334+
new_result = result_new,
335+
legacy_result = result_legacy,
336+
differences = differences
337+
)
338+
}
339+
340+
136341
#' @title Load 'NEV' information from path prefix
137342
#' @param x path \code{prefix} specified in \code{\link{import_nsp}}, or
138343
#' \code{'nev/nsx'} object

R/cpp11.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ parseBCIParamDef <- function(statement) {
6464
.Call(`_readNSx_parseBCIParamDef`, statement)
6565
}
6666

67+
scanNSxPackets30 <- function(filePath, nBytes, nChannels, skipBytes) {
68+
.Call(`_readNSx_scanNSxPackets30`, filePath, nBytes, nChannels, skipBytes)
69+
}
70+
71+
scanNSxPackets2x <- function(filePath, nBytes, nChannels, skipBytes) {
72+
.Call(`_readNSx_scanNSxPackets2x`, filePath, nBytes, nChannels, skipBytes)
73+
}
74+
75+
readNSxPacketData <- function(filePath, byteOffset, nDataPoints, nChannels, slope, intercept, sampleOffset, sampleCount) {
76+
.Call(`_readNSx_readNSxPacketData`, filePath, byteOffset, nDataPoints, nChannels, slope, intercept, sampleOffset, sampleCount)
77+
}
78+
6779
readNSxDataPacket30 <- function(filePath, nBytes, sampleRate, nChannels, skipBytes, slope, intercept) {
6880
.Call(`_readNSx_readNSxDataPacket30`, filePath, nBytes, sampleRate, nChannels, skipBytes, slope, intercept)
6981
}

R/hdf5-alternative.R

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,102 @@ LazyFakeH5Internal <- R6::R6Class(
134134
return(meta$.dim)
135135
}
136136
return(meta$.length)
137+
},
138+
139+
allocate = function(dims, chunk = "auto", level = 4, ctype = "numeric",
140+
replace = TRUE, new_file = FALSE) {
141+
# Pre-allocate dataset with specified dimensions, filled with NA
142+
if (private$read_only) {
143+
stop("File is read-only. Cannot allocate dataset.")
144+
}
145+
146+
data_path <- private$ensure_data_path(new_file = new_file)
147+
data_rds <- file.path(data_path, "data.rds")
148+
meta_rds <- file.path(data_path, "meta.rds")
149+
150+
# Check if dataset exists
151+
if (file.exists(data_rds) && !replace) {
152+
stop("Dataset already exists. Use replace = TRUE to overwrite.")
153+
}
154+
155+
dims <- as.integer(dims)
156+
157+
# Create array filled with NA of appropriate type
158+
if (ctype == "integer") {
159+
x <- array(NA_integer_, dim = dims)
160+
} else {
161+
x <- array(NA_real_, dim = dims)
162+
}
163+
164+
# Save data and metadata
165+
saveRDS(x, file = data_rds)
166+
saveRDS(list(
167+
.length = prod(dims),
168+
.dim = dims,
169+
.r_storage = storage.mode(x),
170+
chunk = chunk,
171+
level = level,
172+
ctype = ctype
173+
), file = meta_rds)
174+
175+
invisible(self)
176+
},
177+
178+
write_slice = function(x, start) {
179+
# Write data to a specific location in the dataset
180+
# start: 1-based index vector (i, j, k, ...) for the starting position
181+
if (private$read_only) {
182+
stop("File is read-only. Cannot write to dataset.")
183+
}
184+
185+
data_path <- private$get_data_path()
186+
data_rds <- file.path(data_path, "data.rds")
187+
188+
if (!file.exists(data_rds)) {
189+
stop("Dataset does not exist. Call allocate_h5() first.")
190+
}
191+
192+
# Ensure start is a vector
193+
start <- as.integer(start)
194+
195+
# Get data dimensions
196+
if (is.null(dim(x))) {
197+
x_dims <- length(x)
198+
} else {
199+
x_dims <- dim(x)
200+
}
201+
202+
if (length(start) != length(x_dims)) {
203+
stop("start must have the same number of dimensions as the data")
204+
}
205+
206+
# Load existing data
207+
data <- readRDS(data_rds)
208+
dataset_dims <- if (is.null(dim(data))) length(data) else dim(data)
209+
210+
# Validate bounds
211+
end_idx <- start + x_dims - 1L
212+
if (any(end_idx > dataset_dims) || any(start < 1L)) {
213+
stop(sprintf(
214+
"Write out of bounds: start=%s, count=%s, dataset dims=%s",
215+
paste(start, collapse = ","),
216+
paste(x_dims, collapse = ","),
217+
paste(dataset_dims, collapse = ",")
218+
))
219+
}
220+
221+
# Generate indices for subassignment
222+
args <- lapply(seq_along(start), function(i) {
223+
seq.int(from = start[i], length.out = x_dims[i])
224+
})
225+
226+
# Subassign data
227+
data <- do.call(`[<-`, c(list(data), args, list(value = x)))
228+
229+
# Save back
230+
saveRDS(data, file = data_rds)
231+
232+
invisible(self)
137233
}
138234
)
139235
)
@@ -155,6 +251,27 @@ save_fakeh5 <- function(x, file, name, chunk = 'auto', level = 4,replace = TRUE,
155251
}
156252

157253

254+
allocate_fakeh5 <- function(file, name, dims, chunk = "auto", level = 4,
255+
replace = TRUE, new_file = FALSE, ctype = "numeric",
256+
quiet = FALSE) {
257+
# Pre-allocate a fake HDF5 dataset (RDS-based) with specified dimensions
258+
f <- LazyFakeH5Internal$new(file, name, read_only = FALSE, quiet = quiet)
259+
f$allocate(
260+
dims = dims, chunk = chunk, level = level,
261+
ctype = ctype, replace = replace, new_file = new_file
262+
)
263+
return(invisible(normalizePath(sprintf("%s.ralt", file), mustWork = FALSE)))
264+
}
265+
266+
267+
write_fakeh5_slice <- function(x, file, name, start, quiet = FALSE) {
268+
# Write data to a specific location in an existing fake HDF5 dataset
269+
f <- LazyFakeH5Internal$new(file, name, read_only = FALSE, quiet = quiet)
270+
f$write_slice(x = x, start = start)
271+
return(invisible(normalizePath(sprintf("%s.ralt", file), mustWork = FALSE)))
272+
}
273+
274+
158275
fakehh5_names <- function(file){
159276
if(!endsWith(tolower(file_path), ".ralt")) {
160277
file_path <- sprintf("%s.ralt", file_path)

0 commit comments

Comments
 (0)