Vitek-Lab · anshuman-raina · Jun 12, 2025 · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -3,8 +3,8 @@ name: Run Simple R Script on HPC via Slurm
 on:
   push:
     branches:
-      # - feature/multiple-scripts
-      - devel
+      - feature/metamorpheus-scripts
+      # - devel
 
 jobs:
   Benchmarking-pipeline:
@@ -21,24 +21,24 @@ jobs:
         mkdir -p ~/.ssh
         touch ~/.ssh/id_rsa
         chmod 600 ~/.ssh/id_rsa
-        echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa
-        ssh-keyscan -H login-00.discovery.neu.edu >> ~/.ssh/known_hosts || exit 1
+        echo "${{ secrets.SSH_PRIVATE_KEY_EXPLORER }}" > ~/.ssh/id_rsa
+        ssh-keyscan -H login.explorer.northeastern.edu >> ~/.ssh/known_hosts || exit 1
 
     - name: Transfer Files to HPC
       run: |
-        scp -O benchmark/benchmark_Dowell2021-HEqe408_LFQ.R benchmark/benchmark_Puyvelde2022-HYE5600735_LFQ.R benchmark/scriptController.json benchmark/calculateMetrics.R benchmark/config.slurm raina.ans@login-00.discovery.neu.edu:/work/VitekLab/Projects/Benchmarking || exit 1
+        scp -r benchmark raina.ans@login.explorer.northeastern.edu:/projects/VitekLab/Projects/Benchmarking || exit 1
 
     - name: Submit Slurm Job and Capture Job ID
       id: submit_job
       run: |
-        ssh raina.ans@login-00.discovery.neu.edu "cd /work/VitekLab/Projects/Benchmarking && sbatch config.slurm" | tee slurm_job_id.txt 
+        ssh raina.ans@login.explorer.northeastern.edu "cd /projects/VitekLab/Projects/Benchmarking/benchmark && sbatch config.slurm" | tee slurm_job_id.txt 
         slurm_job_id=$(grep -oP '\d+' slurm_job_id.txt) 
         echo "Slurm Job ID is $slurm_job_id"
         echo "slurm_job_id=$slurm_job_id" >> $GITHUB_ENV  
 
     - name: Monitor Slurm Job
       run: |
-        ssh raina.ans@login-00.discovery.neu.edu "
+        ssh raina.ans@login.explorer.northeastern.edu "
           while squeue -j ${{ env.slurm_job_id }} | grep -q ${{ env.slurm_job_id }}; do
             echo 'Job Id : ${{ env.slurm_job_id }} is still running...'
             sleep 10
@@ -48,8 +48,8 @@ jobs:
 
     - name: Fetch Output
       run: |
-        scp -O raina.ans@login-00.discovery.neu.edu:/work/VitekLab/Projects/Benchmarking/job_output.txt job_output.txt
-        scp -O raina.ans@login-00.discovery.neu.edu:/work/VitekLab/Projects/Benchmarking/job_error.txt job_error.txt
+        scp -O raina.ans@login.explorer.northeastern.edu:/projects/VitekLab/Projects/Benchmarking/benchmark/job_output.txt job_output.txt
+        scp -O raina.ans@login.explorer.northeastern.edu:/projects/VitekLab/Projects/Benchmarking/benchmark/job_error.txt job_error.txt
 
     - name: Upload Output as Artifact
       uses: actions/upload-artifact@v4

diff --git a/benchmark/benchmark_Metamorpheus.R b/benchmark/benchmark_Metamorpheus.R
@@ -0,0 +1,93 @@
+library(MSstatsConvert)
+library(MSstats)
+library(parallel)
+library(stringr)
+library(jsonlite)
+library(dplyr)
+
+source("metamorpheus_Process.R")
+config <- fromJSON("scriptController.json", simplifyVector = FALSE)
+
-source("metamorpheus_Process.R")
-config <- fromJSON("scriptController.json", simplifyVector = FALSE)
+script_args <- commandArgs(trailingOnly = FALSE)
+script_path <- sub("^--file=", "", script_args[grep("^--file=", script_args)])
+base_dir <- if (length(script_path)) dirname(normalizePath(script_path)) else "."
+repo_root <- normalizePath(file.path(base_dir, ".."), mustWork = FALSE)
+
+source(file.path(base_dir, "metamorpheus_Process.R"))
+# Ensure MetamorpheusToMSstatsFormat is available
+if (file.exists(file.path(repo_root, "metamorpheus_code.R"))) {
+  source(file.path(repo_root, "metamorpheus_code.R"))
+}
+config <- fromJSON(file.path(base_dir, "scriptController.json"), simplifyVector = FALSE)
-source("metamorpheus_Process.R")
-config <- fromJSON("scriptController.json", simplifyVector = FALSE)
+script_args <- commandArgs(trailingOnly = FALSE)
+script_path <- sub("^--file=", "", script_args[grep("^--file=", script_args)])
+base_dir <- if (length(script_path)) dirname(normalizePath(script_path)) else "."
+repo_root <- normalizePath(file.path(base_dir, ".."), mustWork = FALSE)
+
+source(file.path(base_dir, "metamorpheus_Process.R"))
+# Ensure MetamorpheusToMSstatsFormat is available
+if (file.exists(file.path(repo_root, "metamorpheus_code.R"))) {
+  source(file.path(repo_root, "metamorpheus_code.R"))
+}
+config <- fromJSON(file.path(base_dir, "scriptController.json"), simplifyVector = FALSE)
+runBenchmarkForMetaMorpheusData <- function(datasetPath, config) {
+
+  dataset_config <- config$datasets[[datasetPath]]
+  dataset_config <- as.list(dataset_config)
+
+  cat("Processing Dataset:", dataset_config$name, "\n")
+
+  filePath <- file.path(dataset_config$parent, dataset_config$data)
+  annotPath <- dataset_config$parent
+
+  input = data.table::fread(file.path(filePath, "QuantifiedPeaks.tsv"))
+  annot = data.table::fread(file.path(annotPath, "annotation.csv"))
+
+
+  cat("Dataset File Path:", filePath, "\n")
+  cat("Annotation File Path:", annotPath, "\n")
+
+  input = input %>% filter(!str_detect(`Protein Group`, ";")) # remove multiple protein group in same cell
+  input = input %>% filter(!str_detect(`Protein Group`, "DECOY")) # remove decoys
+
+  protein_mappings = data.table::fread(file.path(filePath, "QuantifiedProteins.tsv"))
+
+  protein_mappings = protein_mappings %>% filter(Organism %in% c("Escherichia coli (strain K12)", "Homo sapiens"))
+
+  print(protein_mappings)
+
+  input = input %>% filter(`Protein Group` %in% protein_mappings$`Protein Groups`)
+
+  output = MetamorpheusToMSstatsFormat(input, annot)
+
+  data_process_tasks <- list(
+    list(
+      label = "Data process with Normalized Data",
+      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20)
+    ),
+    list(
+      label = "Data process with Normalization and MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20)
+    ),
+    list(
+      label = "Data process without Normalization with MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for all features",
+      result = function() dataProcess(output, featureSubset = "all", normalization = "FALSE", MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for top3 features",
+      result = function() dataProcess(output, featureSubset = "top3", normalization = "FALSE", MBimpute = FALSE)
+    )
-      label = "Data process with Normalization and MBImpute False",
-      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20, MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization",
-      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20)
-    ),
-    list(
-      label = "Data process without Normalization with MBImpute False",
-      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20, MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization and Imputation On for all features",
-      result = function() dataProcess(output, featureSubset = "all", normalization = "FALSE", MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization and Imputation On for top3 features",
-      result = function() dataProcess(output, featureSubset = "top3", normalization = "FALSE", MBimpute = FALSE)
-    )
+      label = "Data process with Normalization and MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20)
+    ),
+    list(
+      label = "Data process without Normalization with MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for all features",
+      result = function() dataProcess(output, featureSubset = "all", normalization = "FALSE", MBimpute = TRUE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for top3 features",
+      result = function() dataProcess(output, featureSubset = "top3", normalization = "FALSE", MBimpute = TRUE)
+    )
-      label = "Data process with Normalization and MBImpute False",
-      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20, MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization",
-      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20)
-    ),
-    list(
-      label = "Data process without Normalization with MBImpute False",
-      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20, MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization and Imputation On for all features",
-      result = function() dataProcess(output, featureSubset = "all", normalization = "FALSE", MBimpute = FALSE)
-    ),
-    list(
-      label = "Data process without Normalization and Imputation On for top3 features",
-      result = function() dataProcess(output, featureSubset = "top3", normalization = "FALSE", MBimpute = FALSE)
-    )
+      label = "Data process with Normalization and MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20)
+    ),
+    list(
+      label = "Data process without Normalization with MBImpute False",
+      result = function() dataProcess(output, featureSubset = "topN", normalization = "FALSE", n_top_feature = 20, MBimpute = FALSE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for all features",
+      result = function() dataProcess(output, featureSubset = "all", normalization = "FALSE", MBimpute = TRUE)
+    ),
+    list(
+      label = "Data process without Normalization and Imputation On for top3 features",
+      result = function() dataProcess(output, featureSubset = "top3", normalization = "FALSE", MBimpute = TRUE)
+    )
+  )
+
+  start_time <- Sys.time()
+
+  num_cores <- detectCores() - 1 
+
+  summarized_results <- mclapply(data_process_tasks, function(task) {
+    list(label = task$label, summarized = task$result())
+  }, mc.cores = num_cores)	
+
+
+  results_list <- mclapply(summarized_results, function(res) {
+    calculate_Metrics(res$summarized, protein_mappings, res$label)
+  }, mc.cores = num_cores)
+
-  num_cores <- detectCores() - 1 
-
-  summarized_results <- mclapply(data_process_tasks, function(task) {
-    list(label = task$label, summarized = task$result())
-  }, mc.cores = num_cores)	
-
-
-  results_list <- mclapply(summarized_results, function(res) {
-    calculate_Metrics(res$summarized, protein_mappings, res$label)
-  }, mc.cores = num_cores)
+  num_cores <- max(1L, as.integer(detectCores() - 1L))
+  if (.Platform$OS.type != "windows" && num_cores > 1L) {
+    summarized_results <- mclapply(
+      data_process_tasks,
+      function(task) list(label = task$label, summarized = task$result()),
+      mc.cores = num_cores
+    )
+  } else {
+    summarized_results <- lapply(
+      data_process_tasks,
+      function(task) list(label = task$label, summarized = task$result())
+    )
+  }
+
+  if (.Platform$OS.type != "windows" && num_cores > 1L) {
+    results_list <- mclapply(
+      summarized_results,
+      function(res) calculate_Metrics(res$summarized, protein_mappings, res$label),
+      mc.cores = num_cores
+    )
+  } else {
+    results_list <- lapply(
+      summarized_results,
+      function(res) calculate_Metrics(res$summarized, protein_mappings, res$label)
+    )
+  }
-  num_cores <- detectCores() - 1 
-
-  summarized_results <- mclapply(data_process_tasks, function(task) {
-    list(label = task$label, summarized = task$result())
-  }, mc.cores = num_cores)	
-
-
-  results_list <- mclapply(summarized_results, function(res) {
-    calculate_Metrics(res$summarized, protein_mappings, res$label)
-  }, mc.cores = num_cores)
+  num_cores <- max(1L, as.integer(detectCores() - 1L))
+  if (.Platform$OS.type != "windows" && num_cores > 1L) {
+    summarized_results <- mclapply(
+      data_process_tasks,
+      function(task) list(label = task$label, summarized = task$result()),
+      mc.cores = num_cores
+    )
+  } else {
+    summarized_results <- lapply(
+      data_process_tasks,
+      function(task) list(label = task$label, summarized = task$result())
+    )
+  }
+
+  if (.Platform$OS.type != "windows" && num_cores > 1L) {
+    results_list <- mclapply(
+      summarized_results,
+      function(res) calculate_Metrics(res$summarized, protein_mappings, res$label),
+      mc.cores = num_cores
+    )
+  } else {
+    results_list <- lapply(
+      summarized_results,
+      function(res) calculate_Metrics(res$summarized, protein_mappings, res$label)
+    )
+  }
+
+  final_results <- do.call(rbind, results_list)
+  end_time <- Sys.time()
+  total_time <- end_time - start_time
+  print(final_results)
+  print(paste("Total Execution Time:", total_time))
+
+}
+
+
+
+runBenchmarkForMetaMorpheusData("DDA-Solivais2024-Metamorpheus_MBR_LFQ", config)
+runBenchmarkForMetaMorpheusData("DDA-Solivais2024-Metamorpheus_NoMBR_LFQ", config)
diff --git a/benchmark/config.slurm b/benchmark/config.slurm
@@ -1,38 +1,51 @@
 #!/bin/bash
 #SBATCH --job-name=msstats_benchmark_job_updated
-#SBATCH --chdir=/work/VitekLab/Projects/Benchmarking/
+#SBATCH --chdir=/projects/VitekLab/Projects/Benchmarking/benchmark
 #SBATCH --output=job_output.txt
 #SBATCH --error=job_error.txt
-#SBATCH --open-mode=append
-#SBATCH --time=01:00:00         # Set the maximum run time
-#SBATCH --ntasks=1              # Number of tasks (one process)
-#SBATCH --cpus-per-task=8      # Use 8 CPU cores for the task
-#SBATCH --mem=256G              # Request 256GB of memory
-#SBATCH --partition=short       # Use the 'short' partition (or change as needed)
+#SBATCH --time=01:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=8
+#SBATCH --mem=128G
+#SBATCH --partition=short
 
-module load R-geospatial
-
-module load gcc/11.1.0
-module load cmake/3.23.2
+module load R
+module load cmake/3.30.2
 
 export LC_ALL=C
-export R_LIBS_USER=/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.2-geospatial
+export R_LIBS_USER=/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4
+mkdir -p $R_LIBS_USER
 
+mkdir -p $HOME/lib_fix
+ln -sf /shared/EL9/explorer/R/4.4.1/lib64/R/lib/libRlapack.so $HOME/lib_fix/libRlapack.so.3
 
-mkdir -p $R_LIBS_USER
+export LD_LIBRARY_PATH=$HOME/lib_fix:/shared/EL9/explorer/R/4.4.1/lib64/R/lib:/usr/lib64:$LD_LIBRARY_PATH
 
-module load R
-Rscript -e "if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes', lib = Sys.getenv('R_LIBS_USER'), repos = 'https://cloud.r-project.org'); \
-remotes::install_github('Vitek-Lab/MSstats', ref = 'devel', lib = Sys.getenv('R_LIBS_USER')); \
-remotes::install_github('Vitek-Lab/MSstatsConvert', ref = 'master', lib = Sys.getenv('R_LIBS_USER')); \
-install.packages(c('dplyr', 'stringr', 'ggplot2'), lib = Sys.getenv('R_LIBS_USER'), repos = 'https://cloud.r-project.org')"
+Rscript -e '
+.libPaths("/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4")
+Sys.unsetenv("R_LIBS")
+cat(".libPaths():\n"); print(.libPaths())
+
+if (!requireNamespace("remotes", quietly = TRUE))
+    install.packages("remotes", repos = "https://cloud.r-project.org")
+
+install.packages("nloptr", type = "source", repos = "https://cloud.r-project.org")
+
+for (pkg in c("dplyr", "stringr", "ggplot2")) {
+    if (!requireNamespace(pkg, quietly = TRUE)) {
+        install.packages(pkg, repos = "https://cloud.r-project.org")
+    }
+}
+BiocManager::install(c("MSstatsConvert", "preprocessCore"), force = TRUE)
+remotes::install_github("Vitek-Lab/MSstats", ref = "devel", force = TRUE)
+'
-Rscript -e '
-.libPaths("/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4")
-Sys.unsetenv("R_LIBS")
-cat(".libPaths():\n"); print(.libPaths())
-
-if (!requireNamespace("remotes", quietly = TRUE))
-    install.packages("remotes", repos = "https://cloud.r-project.org")
-
-install.packages("nloptr", type = "source", repos = "https://cloud.r-project.org")
-
-for (pkg in c("dplyr", "stringr", "ggplot2")) {
-    if (!requireNamespace(pkg, quietly = TRUE)) {
-        install.packages(pkg, repos = "https://cloud.r-project.org")
-    }
-}
-BiocManager::install(c("MSstatsConvert", "preprocessCore"), force = TRUE)
-remotes::install_github("Vitek-Lab/MSstats", ref = "devel", force = TRUE)
-'
+Rscript -e '
+.libPaths("/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4")
+Sys.unsetenv("R_LIBS")
+cat(".libPaths():\n"); print(.libPaths())
+
+if (!requireNamespace("remotes", quietly = TRUE))
+    install.packages("remotes", repos = "https://cloud.r-project.org")
+
+install.packages("nloptr", type = "source", repos = "https://cloud.r-project.org")
+
+for (pkg in c("dplyr", "stringr", "ggplot2")) {
+    if (!requireNamespace(pkg, quietly = TRUE)) {
+        install.packages(pkg, repos = "https://cloud.r-project.org")
+    }
+}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+    install.packages("BiocManager", repos = "https://cloud.r-project.org")
+}
+BiocManager::install(c("MSstatsConvert", "preprocessCore"), force = TRUE)
+remotes::install_github("Vitek-Lab/MSstats", ref = "devel", force = TRUE)
+'
-Rscript -e '
-.libPaths("/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4")
-Sys.unsetenv("R_LIBS")
-cat(".libPaths():\n"); print(.libPaths())
-
-if (!requireNamespace("remotes", quietly = TRUE))
-    install.packages("remotes", repos = "https://cloud.r-project.org")
-
-install.packages("nloptr", type = "source", repos = "https://cloud.r-project.org")
-
-for (pkg in c("dplyr", "stringr", "ggplot2")) {
-    if (!requireNamespace(pkg, quietly = TRUE)) {
-        install.packages(pkg, repos = "https://cloud.r-project.org")
-    }
-}
-BiocManager::install(c("MSstatsConvert", "preprocessCore"), force = TRUE)
-remotes::install_github("Vitek-Lab/MSstats", ref = "devel", force = TRUE)
-'
+Rscript -e '
+.libPaths("/home/raina.ans/R/x86_64-pc-linux-gnu-library/4.4")
+Sys.unsetenv("R_LIBS")
+cat(".libPaths():\n"); print(.libPaths())
+
+if (!requireNamespace("remotes", quietly = TRUE))
+    install.packages("remotes", repos = "https://cloud.r-project.org")
+
+install.packages("nloptr", type = "source", repos = "https://cloud.r-project.org")
+
+for (pkg in c("dplyr", "stringr", "ggplot2")) {
+    if (!requireNamespace(pkg, quietly = TRUE)) {
+        install.packages(pkg, repos = "https://cloud.r-project.org")
+    }
+}
+if (!requireNamespace("BiocManager", quietly = TRUE)) {
+    install.packages("BiocManager", repos = "https://cloud.r-project.org")
+}
+BiocManager::install(c("MSstatsConvert", "preprocessCore"), force = TRUE)
+remotes::install_github("Vitek-Lab/MSstats", ref = "devel", force = TRUE)
+'
 
-R_SCRIPTS=("benchmark_Dowell2021-HEqe408_LFQ.R" "benchmark_Puyvelde2022-HYE5600735_LFQ.R")
+R_SCRIPTS=("benchmark_Dowell2021-HEqe408_LFQ.R" "benchmark_Puyvelde2022-HYE5600735_LFQ.R" "benchmark_Metamorpheus.R" )
 
 for script in "${R_SCRIPTS[@]}"; do
-    echo "Executing script: $script" >> job_output.txt  
-    Rscript "$script" >> job_output.txt 2>> job_error.txt
-    wait  
-    echo "Finished executing script: $script" >> job_output.txt  
+    echo "Executing script: $script" >> job_output.txt
+    stdbuf -oL -eL Rscript "$script" >> job_output.txt 2>> job_error.txt
+    wait
+    echo "Finished executing script: $script" >> job_output.txt
     echo -e "\n\n"
 done
diff --git a/benchmark/metamorpheus_Process.R b/benchmark/metamorpheus_Process.R
@@ -0,0 +1,47 @@
+calculate_Metrics <- function(QuantData, protein_mappings, task_label, alpha = 0.05) {
+  comparison <- matrix(
+    c(-1,0,0,0,1,   # E-A
+      -1,0,0,1,0,   # D-A
+      -1,0,1,0,0,   # C-A
+      -1,1,0,0,0),  # B-A
+    nrow = 4, byrow = TRUE
+  )
+  rownames(comparison) <- c("E-A", "D-A", "C-A", "B-A")
+  groups <- levels(QuantData$ProteinLevelData$GROUP)
+  colnames(comparison) <- groups[order(as.numeric(groups))]
+
-  groups <- levels(QuantData$ProteinLevelData$GROUP)
-  colnames(comparison) <- groups[order(as.numeric(groups))]
+  groups <- levels(QuantData$ProteinLevelData$GROUP)
+  if (length(groups) != ncol(comparison)) {
+    stop(sprintf("Contrast matrix expects %d groups but found %d in data: %s",
+                 ncol(comparison), length(groups), paste(groups, collapse = ", ")))
+  }
+  colnames(comparison) <- groups
-  groups <- levels(QuantData$ProteinLevelData$GROUP)
-  colnames(comparison) <- groups[order(as.numeric(groups))]
+  groups <- levels(QuantData$ProteinLevelData$GROUP)
+  if (length(groups) != ncol(comparison)) {
+    stop(sprintf("Contrast matrix expects %d groups but found %d in data: %s",
+                 ncol(comparison), length(groups), paste(groups, collapse = ", ")))
+  }
+  colnames(comparison) <- groups
+  model <- groupComparison(
+    contrast.matrix = comparison,
+    data = QuantData,
+    use_log_file = FALSE
+  )
+
+  ecoli_ids <- protein_mappings %>%
+    filter(Organism == "Escherichia coli (strain K12)") %>%
+    pull(`Protein Groups`)
+
+  filtered_comparison_result <- model$ComparisonResult %>%
+    mutate(ecoli = Protein %in% ecoli_ids) %>%
+    filter(is.na(issue))
+
+  labels <- unique(filtered_comparison_result$Label)
+  result_rows <- lapply(labels, function(lbl) {
+    df <- filtered_comparison_result %>% filter(Label == lbl)
+    sig <- df %>% filter(adj.pvalue < alpha)
+
+    tp <- sig %>% filter(ecoli) %>% nrow()
+    fp <- sig %>% filter(!ecoli) %>% nrow()
+    tot <- tp + fp
+    fdr <- if (tot > 0) fp / tot else NA_real_
+
+    data.frame(
+      Task       = task_label,
+      Comparison = lbl,
+      FDR        = fdr,
+      stringsAsFactors = FALSE
+    )
+  })
+
+  results <- do.call(rbind, result_rows)
+  return(results)
+}
diff --git a/benchmark/scriptController.json b/benchmark/scriptController.json
@@ -2,7 +2,7 @@
   "datasets": {
     "DDA-Puyvelde2022-HYE5600735_LFQ": {
       "name": "DDA-Puyvelde2022-HYE5600735_LFQ",
-      "file": "/work/VitekLab/Data/MS/Benchmarking/DDA-Puyvelde2022/DDA-Puyvelde2022-HYE5600735_LFQ/FragPipe/TOP0/MSstats_fixed.csv",
+      "file": "/projects/VitekLab/Data/MS/Benchmarking/DDA-Puyvelde2022/DDA-Puyvelde2022-HYE5600735_LFQ/FragPipe/TOP0/MSstats_fixed.csv",
       "samples": {
         "Human": {
           "pattern": "_HUMAN$",
@@ -20,7 +20,37 @@
     },
     "DDA-Dowell2021-HEqe408_LFQ": {
       "name": "DDA-Dowell2021-HEqe408_LFQ",
-      "file": "/work/VitekLab/Data/MS/Benchmarking/DDA-Dowell2021-HEqe408_LFQ/FragPipe/TOP0/MSstats.csv",
+      "file": "/projects/VitekLab/Data/MS/Benchmarking/DDA-Dowell2021-HEqe408_LFQ/FragPipe/TOP0/MSstats.csv",
+      "samples": {
+        "Human": {
+          "pattern": "_HUMAN$",
+          "type": "insignificant"
+        },
+        "Ecoli": {
+          "pattern": "_ECOLI$",
+          "type": "significant"
+        }
+      }
+    },
+    "DDA-Solivais2024-Metamorpheus_NoMBR_LFQ": {
+      "name": "DDA-Solivais2024-Metamorpheus_NoMBR_LFQ",
+      "parent": "/projects/VitekLab/Data/MS/Benchmarking/DDA-Solivais2024_Metamorpheus/Current",
+      "data":"FlashLFQ_NoNormalization_NoPIP",
+      "samples": {
+        "Human": {
+          "pattern": "_HUMAN$",
+          "type": "insignificant"
+        },
+        "Ecoli": {
+          "pattern": "_ECOLI$",
+          "type": "significant"
+        }
+      }
+    },
+    "DDA-Solivais2024-Metamorpheus_MBR_LFQ": {
+      "name": "DDA-Solivais2024-Metamorpheus_MBR_LFQ",
+      "parent": "/projects/VitekLab/Data/MS/Benchmarking/DDA-Solivais2024_Metamorpheus/Current",
+      "data":"FlashLFQ_v1.0_NoNormalization_wPIP",
       "samples": {
         "Human": {
           "pattern": "_HUMAN$",