Skip to content

Commit dc38ce2

Browse files
pinin4fjordsclaude
andauthored
Support exclude_samples_col/exclude_samples_values in contrast validation (#87)
- Filter samples per-contrast before building the design matrix and checking full rank, aligning with differentialabundance pipeline behaviour - Add validate_design parameter to disable design matrix checks when not needed Fixes #86 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 040b912 commit dc38ce2

2 files changed

Lines changed: 59 additions & 30 deletions

File tree

R/accessory.R

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,8 @@ checkListIsSubset <- function(test_list,
910910
#' sample sheet variables to be used as blocking factors
911911
#' @param convert_to_list Convert output to a list as used internally by
912912
#' shinyngs?
913+
#' @param validate_design Validate design matrix (check for NAs, full rank,
914+
#' numeric columns, special characters)? Set to FALSE to skip these checks.
913915
#'
914916
#' @return output Validated contrasts data frame
915917
#' @export
@@ -921,7 +923,8 @@ read_contrasts <-
921923
reference_column = "reference",
922924
target_column = "target",
923925
blocking_column = "blocking",
924-
convert_to_list = FALSE) {
926+
convert_to_list = FALSE,
927+
validate_design = TRUE) {
925928

926929
# Read the contrasts depending on the file format (CSV or YAML)
927930
if (grepl("\\.csv$", filename)) {
@@ -1029,37 +1032,51 @@ read_contrasts <-
10291032
formula_vars <- all.vars(as.formula(contrasts$formula[i]))
10301033
}
10311034

1032-
design_cols <- unique(na.omit(c(contrasts[[variable_column]][i], blocking_vars, formula_vars)))
1033-
design_matrix <- samples[, design_cols, drop = FALSE]
1034-
1035-
# Ensure there are no NA values in the design matrix.
1036-
if (any(is.na(design_matrix))) {
1037-
stop("NA values found in one or more design matrix columns.")
1038-
}
1039-
1040-
# Check that the design matrix is full rank.
1041-
mm <- model.matrix(~ . - 1, data = design_matrix)
1042-
if (qr(mm)$rank < ncol(mm)) {
1043-
stop(paste("Design matrix is not full rank.", "Model matrix columns:", paste(colnames(mm), collapse = ", "), "\n"))
1044-
}
1045-
1046-
# Warn about continuous covariates in the design matrix columns.
1047-
for (col in design_cols) {
1048-
if (is.numeric(samples[[col]])) {
1049-
warning(paste("Column", col, "is numeric and may be treated as continuous."))
1035+
if (validate_design) {
1036+
design_cols <- unique(na.omit(c(contrasts[[variable_column]][i], blocking_vars, formula_vars)))
1037+
1038+
# Filter samples if exclude columns are specified for this contrast
1039+
contrast_samples <- samples
1040+
if ("exclude_samples_col" %in% colnames(contrasts) && "exclude_samples_values" %in% colnames(contrasts)) {
1041+
if (!is.na(contrasts$exclude_samples_col[i]) && !is.na(contrasts$exclude_samples_values[i])) {
1042+
exclude_col <- contrasts$exclude_samples_col[i]
1043+
exclude_vals <- simpleSplit(contrasts$exclude_samples_values[i], ";")
1044+
contrast_samples <- samples[!samples[[exclude_col]] %in% exclude_vals, , drop = FALSE]
1045+
}
10501046
}
1051-
}
1052-
1053-
# Check that values in design matrix columns do not contain disallowed special characters.
1054-
for (col in design_cols) {
1055-
vals <- as.character(samples[[col]])
1056-
for (sc in c("/", "\\\\")) { # Default special characters: c("/", "\\\\")
1057-
if (any(grepl(sc, vals))) {
1058-
warning(paste("Column", col, "contains special character", sc,
1059-
"which may cause issues downstream."))
1047+
1048+
design_matrix <- contrast_samples[, design_cols, drop = FALSE]
1049+
1050+
# Ensure there are no NA values in the design matrix.
1051+
if (any(is.na(design_matrix))) {
1052+
stop("NA values found in one or more design matrix columns.")
1053+
}
1054+
1055+
# Check that the design matrix is full rank.
1056+
mm <- model.matrix(~ . - 1, data = design_matrix)
1057+
if (qr(mm)$rank < ncol(mm)) {
1058+
stop(paste("Design matrix is not full rank.", "Model matrix columns:", paste(colnames(mm), collapse = ", "), "\n"))
1059+
}
1060+
1061+
# Warn about continuous covariates in the design matrix columns.
1062+
for (col in design_cols) {
1063+
if (is.numeric(samples[[col]])) {
1064+
warning(paste("Column", col, "is numeric and may be treated as continuous."))
1065+
}
1066+
}
1067+
1068+
# Check that values in design matrix columns do not contain disallowed special characters.
1069+
for (col in design_cols) {
1070+
vals <- as.character(samples[[col]])
1071+
for (sc in c("/", "\\\\")) { # Default special characters: c("/", "\\\\")
1072+
if (any(grepl(sc, vals))) {
1073+
warning(paste("Column", col, "contains special character", sc,
1074+
"which may cause issues downstream."))
1075+
}
10601076
}
10611077
}
10621078
}
1079+
10631080
var <- contrasts[i, variable_column]
10641081
ref <- contrasts[i, reference_column]
10651082
tgt <- contrasts[i, target_column]

man/read_contrasts.Rd

Lines changed: 14 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)