sheynkman-lab · JTL-lab · Mar 12, 2026 · Mar 12, 2026
diff --git a/bin/novel_peptides.R b/bin/novel_peptides.R
@@ -52,6 +52,8 @@ option_list = list(
               help = "Search engine used: 'fragpipe' or 'metamorpheus' [default: fragpipe]"),
   make_option(c("--acquisition_type"), type = "character", default = NULL,
               help = "Fragpipe acquisition tpye: 'DIA' or 'DDA' [required for fragpipe]"),
+  make_option(c("--fragpipe_results_dir"), type = "character", default = NULL,
+              help = "Path to FragPipe results directory (e.g., results/S5_PROTEOMICS/M3_FRAGPIPE/sample_name)"),
   make_option(c("--lr_cds_gtf"), type = "character", default = NULL,
               help = "Path to LR transcript GTF with CDS entries (enables peptide-to-genome mapping)"),
   make_option(c("--lr_orf_fasta"), type = "character", default = NULL,
@@ -276,21 +278,28 @@ map_peptides_to_genome = function(peptides, aa_fasta_path, gtf_path) {
 
 # Fragpipe DIA
 if (opt$acquisition_type == "DIA" & opt$ms_search_software == "fragpipe") {
-
+
+  # Determine base path for FragPipe results
+  if (!is.null(opt$fragpipe_results_dir)) {
+    fragpipe_base = opt$fragpipe_results_dir
+  } else {
+    fragpipe_base = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name)
+  }
+
   # peptides
-  peptides_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "peptide.tsv")
+  peptides_path = file.path(fragpipe_base, "peptide.tsv")
   dia_peptides = read_tsv(peptides_path, show_col_types = FALSE)
-  
+
   peptides_df = dia_peptides %>%
     select(
       Sequence = `Peptide`,                   # Clean peptide sequence
       PSM = contains("Spectral Count"),       # PSM count
-      Protein, 
+      Protein,
       Additional_Proteins = `Mapped Proteins` # All protein matches (comma delimiter-separated)
     )
-  
+
   # get intensities
-  dia_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "dia-quant-output", "report.tsv")
+  dia_path = file.path(fragpipe_base, "dia-quant-output", "report.tsv")
   dia_report = read_tsv(dia_path, show_col_types = FALSE)
 
   dia_intensity = dia_report %>%
@@ -306,8 +315,15 @@ if (opt$acquisition_type == "DIA" & opt$ms_search_software == "fragpipe") {
 
 # Fragpipe DDA
 if (opt$acquisition_type == "DDA" & opt$ms_search_software == "fragpipe") {
-
-  peptides_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "combined_peptide.tsv")
+
+  # Determine base path for FragPipe results
+  if (!is.null(opt$fragpipe_results_dir)) {
+    fragpipe_base = opt$fragpipe_results_dir
+  } else {
+    fragpipe_base = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name)
+  }
+
+  peptides_path = file.path(fragpipe_base, "combined_peptide.tsv")
   dda_peptides = read_tsv(peptides_path, show_col_types = FALSE)
 
   peptides_df = dda_peptides %>%
@@ -492,8 +508,9 @@ if (!is.null(opt$lr_cds_gtf) && !is.null(opt$lr_orf_fasta)) {
     distinct(Sequence, transcript_id, PSM) %>%
     group_by(Sequence, PSM) %>%
     slice(1) %>%
-    ungroup()
-
+    ungroup() %>%
+    mutate(transcript_id = as.character(transcript_id))
+
   # Map LR peptides
   lr_bed = map_peptides_to_genome(unique_lr_peptides, opt$lr_orf_fasta, opt$lr_cds_gtf)
 

diff --git a/modules/local/novel_peptides/main.nf b/modules/local/novel_peptides/main.nf
@@ -6,10 +6,11 @@ process NOVEL_PEPTIDES {
     container "docker://docker.io/jtllab/lrp2-lite:latest"
 
     input:
-    tuple val(meta), path(search_results)
+    tuple val(meta), val(fragpipe_results_dir)
     path novel_peptides_script
     path lr_cds_gtf
     path lr_orf_fasta
+    val gencode_version
 
     output:
     tuple val(meta), path("*_novel_peptides.tsv"), emit: novel_peptides
@@ -25,45 +26,32 @@ process NOVEL_PEPTIDES {
     def acquisition_type = meta.mass_spec_type ?: 'DDA'
 
     """
-    echo "=========================================================================="
-    echo "            NOVEL PEPTIDES CLASSIFICATION: ${meta.id}"
-    echo "=========================================================================="
+    echo "NOVEL PEPTIDES CLASSIFICATION: ${meta.id}"
     echo "Sample: ${prefix}"
     echo "Search software: ${ms_search_software}"
     echo "Acquisition type: ${acquisition_type}"
     echo "LR CDS GTF: ${lr_cds_gtf}"
     echo "LR ORF FASTA: ${lr_orf_fasta}"
-    echo "Input directory: \$(pwd)"
-    echo "=========================================================================="
+    echo "FragPipe results directory: ${fragpipe_results_dir}"
     echo ""
 
-    # Copy search results to expected directory structure
-    # The R script expects results in S5_PROTEOMICS/M2_FRAGPIPE/<sample_name>/ directory
-    mkdir -p S5_PROTEOMICS/M2_FRAGPIPE/${prefix}
-
-    # Copy all input files to the expected location
-    if [ -d "${search_results}" ]; then
-        cp -r ${search_results}/* S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ 2>/dev/null || true
+    if [ ! -d "${fragpipe_results_dir}" ]; then
+        echo "ERROR: FragPipe results directory not found: ${fragpipe_results_dir}"
+        echo "Please check that FragPipe completed successfully."
+        exit 1
     fi
 
-    # Also check if files are directly provided (not in subdirectory)
-    cp ${search_results}/*.tsv S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ 2>/dev/null || true
-
-    echo "Directory structure created:"
-    ls -lah S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ | head -20
-    echo ""
-
-    # Run novel peptides classification
     Rscript ${novel_peptides_script} \\
         --sample_name ${prefix} \\
         --ms_search_software ${ms_search_software} \\
         --acquisition_type ${acquisition_type} \\
+        --fragpipe_results_dir ${fragpipe_results_dir} \\
         --lr_cds_gtf ${lr_cds_gtf} \\
         --lr_orf_fasta ${lr_orf_fasta} \\
+        --gencode_version ${gencode_version} \\
         --outdir . \\
         $args
 
-    # Verify output was created
     if [ ! -f "${prefix}_novel_peptides.tsv" ]; then
         echo "ERROR: Novel peptides output file not created"
         exit 1
@@ -76,8 +64,8 @@ process NOVEL_PEPTIDES {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        r-base: \$(R --version | grep "R version" | sed 's/.*R version //g' | sed 's/ .*//g')
-        r-tidyverse: \$(Rscript -e "cat(as.character(packageVersion('tidyverse')))")
+        r-base: \$(R --version 2>/dev/null | grep "R version" | sed 's/.*R version //g' | sed 's/ .*//g' || echo "unknown")
+        r-tidyverse: \$(Rscript -e "cat(as.character(packageVersion('tidyverse')))" 2>/dev/null || echo "unknown")
     END_VERSIONS
     """
 

diff --git a/subworkflows/local/proteomics.nf b/subworkflows/local/proteomics.nf
@@ -33,6 +33,7 @@ workflow PROTEOMICS {
     // Novel peptides inputs (from PREDICTED_PROTEOME subworkflow)
     lr_cds_gtf                  // path: Long-read CDS GTF (*_corrected_filtered_CDS.gtf)
     lr_orf_fasta                // path: Long-read ORF FASTA (*_predicted.proteome.all_best_orfs.fa)
+    genome                      // val: Genome reference name (e.g., 'GRCh38.p14.v49')
 
     main:
     ch_versions = channel.empty()
@@ -162,15 +163,27 @@ workflow PROTEOMICS {
         ch_versions = ch_versions.mix(FRAGPIPE.out.versions)
         ch_psm_table = FRAGPIPE.out.psm_table
         ch_search_results = FRAGPIPE.out.results
-
+        
         // Step 4: Run NOVEL_PEPTIDES on FragPipe results
+        // Extract GENCODE version from genome parameter (e.g., 'GRCh38.p14.v49' -> '49')
+        def gencode_version = genome ? genome.tokenize('.')[-1].replaceAll(/[^0-9]/, '') : '49'
+
+        // Create channel with meta and path to published FragPipe results directory
+        ch_fragpipe_results_dir = ch_search_results.map { meta, _results ->
+            // Convert params.outdir to absolute path
+            def outdir_file = file(params.outdir)
+            def fragpipe_dir = "${outdir_file}/S5_PROTEOMICS/M3_FRAGPIPE/${meta.id}"
+            return [meta, fragpipe_dir]
+        }
+
         def novel_peptides_script = file("${projectDir}/bin/novel_peptides.R")
 
         NOVEL_PEPTIDES(
-            ch_search_results,
+            ch_fragpipe_results_dir,
             novel_peptides_script,
             lr_cds_gtf,
-            lr_orf_fasta
+            lr_orf_fasta,
+            gencode_version
         )
         ch_versions = ch_versions.mix(NOVEL_PEPTIDES.out.versions)
         ch_novel_peptides = NOVEL_PEPTIDES.out.novel_peptides

diff --git a/workflows/lrp2.nf b/workflows/lrp2.nf
@@ -350,7 +350,8 @@ workflow LRP2 {
             params.fragpipe_license_accept,
             // Novel peptides inputs
             ch_lr_cds_gtf,
-            ch_lr_orf_fasta
+            ch_lr_orf_fasta,
+            params.genome
         )
         ch_versions = ch_versions.mix(PROTEOMICS.out.versions.ifEmpty([]))
     }