NDCLab
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎code/postprocessing/postprocessing-behavior/.RData‎
34.3 KB b/‎code/postprocessing/postprocessing-behavior/.RData‎
34.3 KB
diff --git a/‎code/postprocessing/postprocessing-behavior/.Rhistory‎
Lines changed: 346 additions & 0 deletions b/‎code/postprocessing/postprocessing-behavior/.Rhistory‎
Lines changed: 346 additions & 0 deletions
@@ -2,4 +2,5 @@
 .DS_Store
 
 # ignore data specific files
-derivatives/*
+derivatives/*
+.Rproj.user
@@ -0,0 +1,346 @@
+# paths.r - path configuration for behavioral analysis
+# author: marlene buch
+library(here)
+library(stringr)
+# read link to preprocessed data (same file used by matlab)
+preprocessed_link <- file.path(here(), "..", "..", "input", "preprocessed")
+preprocessed_path <- readLines(preprocessed_link, warn = FALSE) %>% str_trim()
+# paths.r - path configuration for behavioral analysis
+# author: marlene buch
+library(here)
+library(stringr)
+# read link to preprocessed data (same file used by matlab)
+# here() gives us the r project root, need to go up to repo root
+repo_root <- file.path(here(), "..", "..", "..")
+preprocessed_link <- file.path(repo_root, "input", "preprocessed")
+preprocessed_path <- readLines(preprocessed_link, warn = FALSE) %>% str_trim()
+# construct paths to data
+behavioral_dir <- file.path(preprocessed_path, "s1_r1", "behavior")
+eeg_dir <- file.path(preprocessed_path, "s1_r1", "eeg")
+# output directory (timestamped)
+output_dir <- file.path(repo_root, "derivatives",
+paste0(Sys.Date(), "_behavioral-analysis"))
+# output subdirectories
+cleaned_data_dir <- file.path(output_dir, "cleaned_data")
+descriptives_dir <- file.path(output_dir, "descriptives")
+statistics_dir <- file.path(output_dir, "statistics")
+logs_dir <- file.path(output_dir, "logs")
+# matlab outputs for validation (if needed)
+matlab_erp_dir <- file.path(repo_root, "derivatives")
+# create output directories
+create_output_dirs <- function() {
+dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(cleaned_data_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(descriptives_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(statistics_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(logs_dir, showWarnings = FALSE, recursive = TRUE)
+message("output directories created:")
+message("  ", output_dir)
+}
+# validate paths exist
+validate_paths <- function() {
+if (!file.exists(preprocessed_link)) {
+stop("link file not found: ", preprocessed_link)
+}
+if (!dir.exists(behavioral_dir)) {
+stop("behavioral data directory not found: ", behavioral_dir)
+}
+message("paths validated")
+message("  behavioral data: ", behavioral_dir)
+}
+validate_paths()
+# settings.r - analysis parameters matching matlab postprocessing
+# CRITICAL: these must match batch_eeg_postprocessing.m exactly
+# author: marlene buch
+# === BEHAVIORAL CODES ===
+# tier 1: primary hypothesis codes (all-or-nothing for dataset inclusion)
+PRIMARY_CODES <- c(102, 104, 202, 204)
+PRIMARY_CODE_NAMES <- c("social-invis-FE", "social-invis-NFG",
+"nonsoc-invis-FE", "nonsoc-invis-NFG")
+# tier 2: secondary analysis codes (condition-specific inclusion)
+SECONDARY_CODES <- c(111, 112, 113, 211, 212, 213)
+SECONDARY_CODE_NAMES <- c("social-vis-corr", "social-vis-FE", "social-vis-NFE",
+"nonsoc-vis-corr", "nonsoc-vis-FE", "nonsoc-vis-NFE")
+# all codes combined
+ALL_CODES <- c(PRIMARY_CODES, SECONDARY_CODES)
+ALL_CODE_NAMES <- setNames(
+c(PRIMARY_CODE_NAMES, SECONDARY_CODE_NAMES),
+ALL_CODES
+)
+# === INCLUSION THRESHOLDS (MUST MATCH MATLAB) ===
+# minimum trials per condition for inclusion
+MIN_EPOCHS_PER_CODE <- 10
+# minimum overall accuracy (calculated on visible target trials only)
+MIN_ACCURACY <- 0.60
+# === RT TRIMMING PARAMETERS (MUST MATCH MATLAB) ===
+# rt lower bound (trials < 150ms excluded)
+RT_LOWER_BOUND <- 150  # milliseconds
+# rt outlier threshold (per condition)
+RT_OUTLIER_THRESHOLD <- 3  # standard deviations
+# === CONDITION GROUPINGS FOR ANALYSES ===
+# visibility conditions
+VISIBLE_CODES <- SECONDARY_CODES
+INVISIBLE_CODES <- PRIMARY_CODES
+# social conditions
+SOCIAL_CODES <- c(111, 112, 113, 102, 104)
+NONSOCIAL_CODES <- c(211, 212, 213, 202, 204)
+# response types
+CORRECT_CODES <- c(111, 211)
+FLANKER_ERROR_CODES <- c(112, 212, 102, 202)
+NONFLANKER_CODES <- c(113, 213, 104, 204)  # nfe in visible, nfg in invisible
+source("config/settings.R")
+print(ALL_CODES)
+print(MIN_EPOCHS_PER_CODE)
+# paths.r - path configuration for behavioral analysis
+# author: marlene buch
+library(here)
+library(stringr)
+# read link to preprocessed data (same file used by matlab)
+# here() gives us the r project root, need to go up to repo root
+repo_root <- file.path(here(), "..", "..")
+preprocessed_link <- file.path(repo_root, "input", "preprocessed")
+preprocessed_path <- readLines(preprocessed_link, warn = FALSE) %>% str_trim()
+# paths.r - path configuration for behavioral analysis
+# author: marlene buch
+library(here)
+library(stringr)
+# read link to preprocessed data (same file used by matlab)
+# here() gives us the r project root, need to go up to repo root
+repo_root <- file.path(here(), "..", "..", "..")
+preprocessed_link <- file.path(repo_root, "input", "preprocessed")
+preprocessed_path <- readLines(preprocessed_link, warn = FALSE) %>% str_trim()
+# construct paths to data
+behavioral_dir <- file.path(preprocessed_path, "s1_r1", "behavior")
+eeg_dir <- file.path(preprocessed_path, "s1_r1", "eeg")
+# output directory (timestamped)
+output_dir <- file.path(repo_root, "derivatives",
+paste0(Sys.Date(), "_postprocessing-behavior"))
+# output subdirectories
+cleaned_data_dir <- file.path(output_dir, "cleaned_data")
+descriptives_dir <- file.path(output_dir, "descriptives")
+statistics_dir <- file.path(output_dir, "statistics")
+logs_dir <- file.path(output_dir, "logs")
+# matlab outputs for validation (if needed)
+matlab_erp_dir <- file.path(repo_root, "derivatives")
+# create output directories
+create_output_dirs <- function() {
+dir.create(output_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(cleaned_data_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(descriptives_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(statistics_dir, showWarnings = FALSE, recursive = TRUE)
+dir.create(logs_dir, showWarnings = FALSE, recursive = TRUE)
+message("output directories created:")
+message("  ", output_dir)
+}
+# validate paths exist
+validate_paths <- function() {
+if (!file.exists(preprocessed_link)) {
+stop("link file not found: ", preprocessed_link)
+}
+if (!dir.exists(behavioral_dir)) {
+stop("behavioral data directory not found: ", behavioral_dir)
+}
+message("paths validated")
+message("  behavioral data: ", behavioral_dir)
+}
+source("config/paths.R")
+validate_paths()
+# settings.r - postprocessing parameters matching matlab postprocessing
+# CRITICAL: these must match batch_eeg_postprocessing.m exactly
+# author: marlene buch
+# === BEHAVIORAL CODES ===
+# tier 1: primary hypothesis codes (all-or-nothing for dataset inclusion)
+PRIMARY_CODES <- c(102, 104, 202, 204)
+PRIMARY_CODE_NAMES <- c("social-invis-FE", "social-invis-NFG",
+"nonsoc-invis-FE", "nonsoc-invis-NFG")
+# tier 2: secondary analysis codes (condition-specific inclusion)
+SECONDARY_CODES <- c(111, 112, 113, 211, 212, 213)
+SECONDARY_CODE_NAMES <- c("social-vis-corr", "social-vis-FE", "social-vis-NFE",
+"nonsoc-vis-corr", "nonsoc-vis-FE", "nonsoc-vis-NFE")
+# all codes combined
+ALL_CODES <- c(PRIMARY_CODES, SECONDARY_CODES)
+ALL_CODE_NAMES <- setNames(
+c(PRIMARY_CODE_NAMES, SECONDARY_CODE_NAMES),
+ALL_CODES
+)
+# === INCLUSION THRESHOLDS (MUST MATCH MATLAB) ===
+# minimum trials per condition for inclusion
+MIN_EPOCHS_PER_CODE <- 10
+# minimum overall accuracy (calculated on visible target trials only)
+MIN_ACCURACY <- 0.60
+# === RT TRIMMING PARAMETERS (MUST MATCH MATLAB) ===
+# rt lower bound (trials < 150ms excluded)
+RT_LOWER_BOUND <- 150  # milliseconds
+# rt outlier threshold (per condition)
+RT_OUTLIER_THRESHOLD <- 3  # standard deviations
+# === CONDITION GROUPINGS FOR ANALYSES ===
+# visibility conditions
+VISIBLE_CODES <- SECONDARY_CODES
+INVISIBLE_CODES <- PRIMARY_CODES
+# social conditions
+SOCIAL_CODES <- c(111, 112, 113, 102, 104)
+NONSOCIAL_CODES <- c(211, 212, 213, 202, 204)
+# response types
+CORRECT_CODES <- c(111, 211)
+FLANKER_ERROR_CODES <- c(112, 212, 102, 202)
+NONFLANKER_CODES <- c(113, 213, 104, 204)  # nfe in visible, nfg in invisible
+source("config/settings.R")
+print(PRIMARY_CODES)
+# load_behavioral_data.r - load cleaned behavioral csvs from preprocessing
+# author: marlene buch
+library(tidyverse)
+load_behavioral_data <- function(behavioral_dir, subjects = NULL) {
+# load all cleaned behavioral csvs from soccer-dataset preprocessing
+#
+# inputs:
+#   behavioral_dir - path to preprocessed behavior folder
+#   subjects - optional vector of subject ids to load (e.g., c("390001", "390002"))
+#
+# outputs:
+#   tibble with all subjects' behavioral data
+message("loading behavioral data from: ", behavioral_dir)
+# find all subject directories
+subject_dirs <- list.dirs(behavioral_dir, recursive = FALSE, full.names = TRUE)
+if (length(subject_dirs) == 0) {
+stop("no subject directories found in: ", behavioral_dir)
+}
+# filter to requested subjects if specified
+if (!is.null(subjects)) {
+subject_pattern <- paste0("sub-", subjects, collapse = "|")
+subject_dirs <- subject_dirs[str_detect(basename(subject_dirs), subject_pattern)]
+}
+message("found ", length(subject_dirs), " subject directories")
+# load all csvs
+all_data <- map_dfr(subject_dirs, function(subject_dir) {
+# extract subject id
+subject_id <- str_extract(basename(subject_dir), "\\d+")
+# find csv file (should be exactly one per subject)
+csv_files <- list.files(subject_dir, pattern = "*_clean\\.csv$", full.names = TRUE)
+if (length(csv_files) == 0) {
+warning("no clean csv found for subject ", subject_id)
+return(NULL)
+}
+if (length(csv_files) > 1) {
+warning("multiple csvs found for subject ", subject_id, ", using first")
+}
+# read csv
+data <- read_csv(csv_files[1], show_col_types = FALSE) %>%
+mutate(subject = subject_id) %>%
+relocate(subject)
+return(data)
+})
+message("loaded data for ", n_distinct(all_data$subject), " subjects")
+message("total trials: ", nrow(all_data))
+return(all_data)
+}
+# helper function to get list of available subjects
+get_available_subjects <- function(behavioral_dir) {
+subject_dirs <- list.dirs(behavioral_dir, recursive = FALSE, full.names = FALSE)
+subjects <- str_extract(subject_dirs, "\\d+")
+return(sort(subjects[!is.na(subjects)]))
+}
+# validate loaded data structure
+validate_behavioral_data <- function(data) {
+# check required columns exist
+required_cols <- c(
+"subject", "code", "flankerResponse_rt", "flankerResponse_keys",
+"confidenceRating", "responseType", "visInvis", "block_condition",
+"target", "flanker", "correctKey", "flankerKey"
+)
+missing_cols <- setdiff(required_cols, names(data))
+if (length(missing_cols) > 0) {
+stop("missing required columns: ", paste(missing_cols, collapse = ", "))
+}
+# check for expected codes
+unexpected_codes <- setdiff(unique(data$code), ALL_CODES)
+if (length(unexpected_codes) > 0) {
+warning("unexpected behavioral codes found: ", paste(unexpected_codes, collapse = ", "))
+}
+message("behavioral data structure validated")
+return(invisible(TRUE))
+}
+source("config/paths.R")
+source("config/settings.R")
+source("functions/load_behavioral_data.R")
+# test with one subject
+test_data <- load_behavioral_data(behavioral_dir, subjects = c("390001"))
+head(test_data)
+validate_behavioral_data(test_data)
+# load_behavioral_data.r - load cleaned behavioral csvs from preprocessing
+# author: marlene buch
+library(tidyverse)
+load_behavioral_data <- function(behavioral_dir, subjects = NULL) {
+# load all cleaned behavioral csvs from soccer-dataset preprocessing
+#
+# inputs:
+#   behavioral_dir - path to preprocessed behavior folder
+#   subjects - optional vector of subject ids to load (e.g., c("390001", "390002"))
+#
+# outputs:
+#   tibble with all subjects' behavioral data
+message("loading behavioral data from: ", behavioral_dir)
+# find all subject directories
+subject_dirs <- list.dirs(behavioral_dir, recursive = FALSE, full.names = TRUE)
+if (length(subject_dirs) == 0) {
+stop("no subject directories found in: ", behavioral_dir)
+}
+# filter to requested subjects if specified
+if (!is.null(subjects)) {
+subject_pattern <- paste0("sub-", subjects, collapse = "|")
+subject_dirs <- subject_dirs[str_detect(basename(subject_dirs), subject_pattern)]
+}
+message("found ", length(subject_dirs), " subject directories")
+# load all csvs
+all_data <- map_dfr(subject_dirs, function(subject_dir) {
+# extract subject id
+subject_id <- str_extract(basename(subject_dir), "\\d+")
+# find csv file (should be exactly one per subject)
+csv_files <- list.files(subject_dir, pattern = "*_clean\\.csv$", full.names = TRUE)
+if (length(csv_files) == 0) {
+warning("no clean csv found for subject ", subject_id)
+return(NULL)
+}
+if (length(csv_files) > 1) {
+warning("multiple csvs found for subject ", subject_id, ", using first")
+}
+# read csv
+data <- read_csv(csv_files[1], show_col_types = FALSE) %>%
+mutate(subject = subject_id) %>%
+relocate(subject)
+return(data)
+})
+message("loaded data for ", n_distinct(all_data$subject), " subjects")
+message("total trials: ", nrow(all_data))
+return(all_data)
+}
+# helper function to get list of available subjects
+get_available_subjects <- function(behavioral_dir) {
+subject_dirs <- list.dirs(behavioral_dir, recursive = FALSE, full.names = FALSE)
+subjects <- str_extract(subject_dirs, "\\d+")
+return(sort(subjects[!is.na(subjects)]))
+}
+# validate loaded data structure
+validate_behavioral_data <- function(data) {
+# check required columns exist
+required_cols <- c(
+"subject", "code", "flankerResponse.rt", "flankerResponse.keys",
+"confidenceRating", "responseType", "visInvis", "block_condition",
+"target", "flanker", "correctKey", "flankerKey"
+)
+missing_cols <- setdiff(required_cols, names(data))
+if (length(missing_cols) > 0) {
+stop("missing required columns: ", paste(missing_cols, collapse = ", "))
+}
+# check for expected codes
+unexpected_codes <- setdiff(unique(data$code), ALL_CODES)
+if (length(unexpected_codes) > 0) {
+warning("unexpected behavioral codes found: ", paste(unexpected_codes, collapse = ", "))
+}
+message("behavioral data structure validated")
+return(invisible(TRUE))
+}
+source("functions/load_behavioral_data.R")
+validate_behavioral_data(test_data)
+test_data %>% count(code, responseType) %>% arrange(code)
+test_data %>% count(code, responseType) %>% arrange(code)
+test_data %>% count(code, responseType) %>% arrange(code)