Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions bin/novel_peptides.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ option_list = list(
help = "Search engine used: 'fragpipe' or 'metamorpheus' [default: fragpipe]"),
make_option(c("--acquisition_type"), type = "character", default = NULL,
help = "Fragpipe acquisition tpye: 'DIA' or 'DDA' [required for fragpipe]"),
make_option(c("--fragpipe_results_dir"), type = "character", default = NULL,
help = "Path to FragPipe results directory (e.g., results/S5_PROTEOMICS/M3_FRAGPIPE/sample_name)"),
make_option(c("--lr_cds_gtf"), type = "character", default = NULL,
help = "Path to LR transcript GTF with CDS entries (enables peptide-to-genome mapping)"),
make_option(c("--lr_orf_fasta"), type = "character", default = NULL,
Expand Down Expand Up @@ -276,21 +278,28 @@ map_peptides_to_genome = function(peptides, aa_fasta_path, gtf_path) {

# Fragpipe DIA
if (opt$acquisition_type == "DIA" & opt$ms_search_software == "fragpipe") {


# Determine base path for FragPipe results
if (!is.null(opt$fragpipe_results_dir)) {
fragpipe_base = opt$fragpipe_results_dir
} else {
fragpipe_base = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name)
}

# peptides
peptides_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "peptide.tsv")
peptides_path = file.path(fragpipe_base, "peptide.tsv")
dia_peptides = read_tsv(peptides_path, show_col_types = FALSE)

peptides_df = dia_peptides %>%
select(
Sequence = `Peptide`, # Clean peptide sequence
PSM = contains("Spectral Count"), # PSM count
Protein,
Protein,
Additional_Proteins = `Mapped Proteins` # All protein matches (comma delimiter-separated)
)

# get intensities
dia_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "dia-quant-output", "report.tsv")
dia_path = file.path(fragpipe_base, "dia-quant-output", "report.tsv")
dia_report = read_tsv(dia_path, show_col_types = FALSE)

dia_intensity = dia_report %>%
Expand All @@ -306,8 +315,15 @@ if (opt$acquisition_type == "DIA" & opt$ms_search_software == "fragpipe") {

# Fragpipe DDA
if (opt$acquisition_type == "DDA" & opt$ms_search_software == "fragpipe") {

peptides_path = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name, "combined_peptide.tsv")

# Determine base path for FragPipe results
if (!is.null(opt$fragpipe_results_dir)) {
fragpipe_base = opt$fragpipe_results_dir
} else {
fragpipe_base = file.path("S5_PROTEOMICS/M2_FRAGPIPE", opt$sample_name)
}

peptides_path = file.path(fragpipe_base, "combined_peptide.tsv")
dda_peptides = read_tsv(peptides_path, show_col_types = FALSE)

peptides_df = dda_peptides %>%
Expand Down Expand Up @@ -492,8 +508,9 @@ if (!is.null(opt$lr_cds_gtf) && !is.null(opt$lr_orf_fasta)) {
distinct(Sequence, transcript_id, PSM) %>%
group_by(Sequence, PSM) %>%
slice(1) %>%
ungroup()

ungroup() %>%
mutate(transcript_id = as.character(transcript_id))

# Map LR peptides
lr_bed = map_peptides_to_genome(unique_lr_peptides, opt$lr_orf_fasta, opt$lr_cds_gtf)

Expand Down
36 changes: 12 additions & 24 deletions modules/local/novel_peptides/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ process NOVEL_PEPTIDES {
container "docker://docker.io/jtllab/lrp2-lite:latest"

input:
tuple val(meta), path(search_results)
tuple val(meta), val(fragpipe_results_dir)
path novel_peptides_script
path lr_cds_gtf
path lr_orf_fasta
val gencode_version

output:
tuple val(meta), path("*_novel_peptides.tsv"), emit: novel_peptides
Expand All @@ -25,45 +26,32 @@ process NOVEL_PEPTIDES {
def acquisition_type = meta.mass_spec_type ?: 'DDA'

"""
echo "=========================================================================="
echo " NOVEL PEPTIDES CLASSIFICATION: ${meta.id}"
echo "=========================================================================="
echo "NOVEL PEPTIDES CLASSIFICATION: ${meta.id}"
echo "Sample: ${prefix}"
echo "Search software: ${ms_search_software}"
echo "Acquisition type: ${acquisition_type}"
echo "LR CDS GTF: ${lr_cds_gtf}"
echo "LR ORF FASTA: ${lr_orf_fasta}"
echo "Input directory: \$(pwd)"
echo "=========================================================================="
echo "FragPipe results directory: ${fragpipe_results_dir}"
echo ""

# Copy search results to expected directory structure
# The R script expects results in S5_PROTEOMICS/M2_FRAGPIPE/<sample_name>/ directory
mkdir -p S5_PROTEOMICS/M2_FRAGPIPE/${prefix}

# Copy all input files to the expected location
if [ -d "${search_results}" ]; then
cp -r ${search_results}/* S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ 2>/dev/null || true
if [ ! -d "${fragpipe_results_dir}" ]; then
echo "ERROR: FragPipe results directory not found: ${fragpipe_results_dir}"
echo "Please check that FragPipe completed successfully."
exit 1
fi

# Also check if files are directly provided (not in subdirectory)
cp ${search_results}/*.tsv S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ 2>/dev/null || true

echo "Directory structure created:"
ls -lah S5_PROTEOMICS/M2_FRAGPIPE/${prefix}/ | head -20
echo ""

# Run novel peptides classification
Rscript ${novel_peptides_script} \\
--sample_name ${prefix} \\
--ms_search_software ${ms_search_software} \\
--acquisition_type ${acquisition_type} \\
--fragpipe_results_dir ${fragpipe_results_dir} \\
--lr_cds_gtf ${lr_cds_gtf} \\
--lr_orf_fasta ${lr_orf_fasta} \\
--gencode_version ${gencode_version} \\
--outdir . \\
$args

# Verify output was created
if [ ! -f "${prefix}_novel_peptides.tsv" ]; then
echo "ERROR: Novel peptides output file not created"
exit 1
Expand All @@ -76,8 +64,8 @@ process NOVEL_PEPTIDES {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
r-base: \$(R --version | grep "R version" | sed 's/.*R version //g' | sed 's/ .*//g')
r-tidyverse: \$(Rscript -e "cat(as.character(packageVersion('tidyverse')))")
r-base: \$(R --version 2>/dev/null | grep "R version" | sed 's/.*R version //g' | sed 's/ .*//g' || echo "unknown")
r-tidyverse: \$(Rscript -e "cat(as.character(packageVersion('tidyverse')))" 2>/dev/null || echo "unknown")
END_VERSIONS
"""

Expand Down
19 changes: 16 additions & 3 deletions subworkflows/local/proteomics.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ workflow PROTEOMICS {
// Novel peptides inputs (from PREDICTED_PROTEOME subworkflow)
lr_cds_gtf // path: Long-read CDS GTF (*_corrected_filtered_CDS.gtf)
lr_orf_fasta // path: Long-read ORF FASTA (*_predicted.proteome.all_best_orfs.fa)
genome // val: Genome reference name (e.g., 'GRCh38.p14.v49')

main:
ch_versions = channel.empty()
Expand Down Expand Up @@ -162,15 +163,27 @@ workflow PROTEOMICS {
ch_versions = ch_versions.mix(FRAGPIPE.out.versions)
ch_psm_table = FRAGPIPE.out.psm_table
ch_search_results = FRAGPIPE.out.results

// Step 4: Run NOVEL_PEPTIDES on FragPipe results
// Extract GENCODE version from genome parameter (e.g., 'GRCh38.p14.v49' -> '49')
def gencode_version = genome ? genome.tokenize('.')[-1].replaceAll(/[^0-9]/, '') : '49'

// Create channel with meta and path to published FragPipe results directory
ch_fragpipe_results_dir = ch_search_results.map { meta, _results ->
// Convert params.outdir to absolute path
def outdir_file = file(params.outdir)
def fragpipe_dir = "${outdir_file}/S5_PROTEOMICS/M3_FRAGPIPE/${meta.id}"
return [meta, fragpipe_dir]
}

def novel_peptides_script = file("${projectDir}/bin/novel_peptides.R")

NOVEL_PEPTIDES(
ch_search_results,
ch_fragpipe_results_dir,
novel_peptides_script,
lr_cds_gtf,
lr_orf_fasta
lr_orf_fasta,
gencode_version
)
ch_versions = ch_versions.mix(NOVEL_PEPTIDES.out.versions)
ch_novel_peptides = NOVEL_PEPTIDES.out.novel_peptides
Expand Down
3 changes: 2 additions & 1 deletion workflows/lrp2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ workflow LRP2 {
params.fragpipe_license_accept,
// Novel peptides inputs
ch_lr_cds_gtf,
ch_lr_orf_fasta
ch_lr_orf_fasta,
params.genome
)
ch_versions = ch_versions.mix(PROTEOMICS.out.versions.ifEmpty([]))
}
Expand Down
Loading