Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Fixed`

- Fixed `SUMMARIZE_RESULTS` crash with `--quantify` caused by OpenMS 3.5.0 TextExporter phantom column bug ([OpenMS/OpenMS#9120](https://github.com/OpenMS/OpenMS/issues/9120)) [#444](https://github.com/nf-core/mhcquant/pull/444)
- Fixed an issue where stripping the sequence in `SUMMARIZE_RESULTS` did not work for complex modifications [#436](https://github.com/nf-core/mhcquant/pull/436)

### `Changed`
Expand Down
20 changes: 17 additions & 3 deletions bin/summarize_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@ def parse_multiTSV(file_path):
elif line.startswith("#UNASSIGNEDPEPTIDE"):
unassigned_peptide_cols = line.strip().split('\t')[1:]

# Workaround for OpenMS 3.5.0 TextExporter bug (https://github.com/OpenMS/OpenMS/issues/9120):
# consensusXML export writes a phantom column in data rows between 'end' and 'FFId_category'
# that is missing from the header. Remove it to realign columns.
for rows, cols in [(peptide_rows, peptide_cols), (unassigned_peptide_rows, unassigned_peptide_cols)]:
if rows and len(rows[0]) > len(cols) and 'end' in cols:
extra_idx = cols.index('end') + 1
for i, row in enumerate(rows):
rows[i] = row[:extra_idx] + row[extra_idx + 1:]

peptide_df = pd.DataFrame(peptide_rows, columns=peptide_cols)
consensus_df = pd.DataFrame(consensus_rows, columns=consensus_cols)
# Concatenate CONSENSUS and PEPTIDE columns
Expand Down Expand Up @@ -195,6 +204,10 @@ def process_file(file, prefix, quantify, keep_cols):
header=False
)

# Add a column with unique protein accessions next to accessions
data.insert(data.columns.get_loc('accessions') + 1, 'unique_accessions',
data['accessions'].map(lambda x: ';'.join(dict.fromkeys(x.split(';')))))

# Filter the columns down to a user-defined subset of columns
if keep_cols:
missing_columns = set(keep_cols) - set(data.columns)
Expand All @@ -205,6 +218,10 @@ def process_file(file, prefix, quantify, keep_cols):
regex_patterns = [r'^rt_', r'^mz_', r'^intensity_', r'^charge_']
for pattern in regex_patterns:
keep_cols.extend([col for col in data.columns if re.match(pattern, col)])
# Always include unique_accessions next to accessions
if 'accessions' in keep_cols and 'unique_accessions' not in keep_cols:
idx = keep_cols.index('accessions') + 1
keep_cols.insert(idx, 'unique_accessions')
# Remove duplicates while retaining order
keep_cols = list(dict.fromkeys(keep_cols))
data = data.loc[:, keep_cols]
Expand All @@ -213,9 +230,6 @@ def process_file(file, prefix, quantify, keep_cols):
float_cols = data.select_dtypes(include=['float']).columns
data.loc[:, float_cols] = data.loc[:, float_cols].round(5)

# Add a column with unique protein accessions
data['unique_accessions'] = data['accessions'].map(lambda x: ';'.join(dict.fromkeys(x.split(';'))))

data.to_csv(f"{prefix}.tsv", sep='\t', index=False)


Expand Down
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,7 @@ process {
}

withName: 'OPENMS_TEXTEXPORTER' {
ext.args = '-id:peptides_only -id:add_hit_metavalues 0 -id:add_metavalues 0'
publishDir = [
enabled: false
]
Expand All @@ -473,8 +474,7 @@ process {
"observed_retention_time_best", "predicted_retention_time_best",
"spec_pearson",
"std_abs_diff",
"ccs_predicted_im2deep", "ccs_error_im2deep",
"ion_mobility"
"ccs_predicted_im2deep", "ccs_error_im2deep", "ion_mobility"
].join(',').trim(),
].join(' ').trim()
publishDir = [
Expand Down
3 changes: 0 additions & 3 deletions modules/local/openms/textexporter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ process OPENMS_TEXTEXPORTER {
-in $file \\
-out ${prefix}_exported.tsv \\
-threads $task.cpus \\
-id:add_hit_metavalues 0 \\
-id:add_metavalues 0 \\
-id:peptides_only \\
$args
"""

Expand Down
Loading