Skip to content

Commit f761b2c

Browse files
committed
plot after rescoring PsudoROC
1 parent 0dcbe4a commit f761b2c

File tree

2 files changed

+120
-5
lines changed

2 files changed

+120
-5
lines changed

content/rescoring.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pathlib import Path
77
from src.result_files import *
88
from src.run_subprocess import *
9+
from src.view import plot_FDR_plot
910

1011
params = page_setup()
1112

@@ -89,7 +90,7 @@
8990
for f in Path(st.session_state.workspace, "result-files").iterdir()
9091
if (
9192
f.name.endswith(".idXML")
92-
and not any(x in f.name for x in ["0.0100", "0.1000", "1.0000","RT_feat", "RT_Int_feat", "updated_feat"])
93+
and not any(x in f.name for x in ["0.0100", "0.1000", "1.0000","RT_feat", "RT_Int_feat", "updated_feat", "_perc_", "_sse_perc_" ])
9394
)
9495
]
9596

@@ -105,7 +106,7 @@
105106
selected_id_file = st.selectbox("Choose a file for rescoring: ", session_idXML_files)
106107
idXML_file = str(Path(st.session_state.workspace, "result-files", selected_id_file))
107108
#st.info(f"Full path: {idXML_file}")
108-
109+
109110
protocol = st.selectbox(
110111
'Select the suitable protocol',
111112
['RNA_DEB', 'RNA_NM', 'RNA_4SU', 'RNA_UV', 'RNA_Other'],
@@ -116,6 +117,8 @@
116117

117118
Max_correlation_features = st.checkbox("Max correlation features", value=True, help="Check this box to use max correlation features during rescoring.")
118119

120+
plot_PseudROC = st.checkbox("plot pseudo-ROC", value=True, help="Check this for pseudo-ROC plot the comparison of rescoring.")
121+
119122
submit_button = st.form_submit_button("Run-Rescoring", type="primary")
120123

121124
# Create a dictionary to capture the output and status of the subprocess
@@ -177,27 +180,37 @@ def terminate_subprocess():
177180

178181
#st.write(model_path)
179182
#st.write(calibration_data)
183+
idXML_file_100_XLs = result_dir / Path(idXML_file).name.replace(".idXML", "_perc_1.0000_XLs.idXML")
180184

181185
# run the different combinations of features
186+
# RT_feat_
182187
if Retention_time_features and not Max_correlation_features:
183188
st.write("Using ONLY retention time features.")
184189
# Assume 'posix' for Linux and macOS
185190
args =["nuxl_rescore", "run", "-id", idXML_file, "-calibration", calibration_data,
186191
"-unimod", unimod, "-feat_config", feat_config, "-rt_model", "DeepLC", "-model_path", model_path, "-out", str(result_dir)]
192+
idXML_file_extra_100_XLs = result_dir / f"RT_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
187193

194+
# Int_feat_
188195
elif not Retention_time_features and Max_correlation_features:
189196
st.write("Using ONLY max correlation feature.")
190197
# Assume 'posix' for Linux and macOS
191198
args =["nuxl_rescore", "run", "-id", idXML_file,"-rt_model", "None", "-ms2pip",
192199
"-unimod", unimod, "-feat_config", feat_config, "-out", str(result_dir)]
200+
idXML_file_extra_100_XLs = result_dir / f"Int_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
193201

202+
# RT_Int_feat_
194203
elif Retention_time_features and Max_correlation_features:
195204
st.write("Using retention time and max correlation feature.")
196205
# Assume 'posix' for Linux and macOS
197206
args =["nuxl_rescore", "run", "-id", idXML_file, "-calibration", calibration_data,
198207
"-unimod", unimod, "-rt_model", "DeepLC", "-ms2pip", "-feat_config", feat_config, "-model_path", model_path, "-out", str(result_dir)]
208+
idXML_file_extra_100_XLs = result_dir / f"RT_Int_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
209+
210+
199211
else:
200212
st.error("Please select at least one feature to use for rescoring.")
213+
idXML_file_extra_100_XLs = result_dir / f"updated_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
201214
st.stop()
202215

203216
# Add any additional variables needed for the subprocess (if any)
@@ -255,13 +268,12 @@ def terminate_subprocess():
255268
# want to see the command values and argues
256269
message = f"Running '{' '.join(args)}'"
257270
st.info(message)
258-
271+
st.info("check inputs plot: " + str(idXML_file_100_XLs)+' and '+ str(idXML_file_extra_100_XLs)+' '+ str(Path(idXML_file).stem))
259272
# run subprocess command
260273
run_subprocess(args, variables, result_dict)
261274

262275
# Check if the subprocess was successful
263276
if result_dict["success"]:
264-
st.info("⚡️ **Rescoring Completed Successfully!** ⚡️")
265277
# Here can add code here to handle the results, e.g., display them to the user
266278
extensions_to_remove = {
267279
".csv",
@@ -275,6 +287,21 @@ def terminate_subprocess():
275287
if f.is_file() and f.suffix in extensions_to_remove:
276288
f.unlink()
277289

290+
if plot_PseudROC:
291+
#ploting_pseudoROC()
292+
st.info("Generating Pseudo-ROC plot ...")
293+
fig = plot_FDR_plot(
294+
idXML_id=str(idXML_file_100_XLs),
295+
idXML_extra=str(idXML_file_extra_100_XLs),
296+
FDR_level=20,
297+
exp_name=str(Path(idXML_file).stem)
298+
)
299+
300+
#show figure
301+
show_fig(fig, f"{Path(idXML_file).stem}_PseudoROC_plot_rescoring")
302+
303+
st.success("⚡️ **Rescoring Completed Successfully!** ⚡️")
304+
278305
else:
279306
# Display error message
280307
st.error(

src/view.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import plotly.express as px
55
import plotly.graph_objects as go
66
import streamlit as st
7+
import matplotlib.pyplot as plt
8+
from pyopenms import *
79

810
@st.cache_resource
911
def plot_ms2_spectrum(spec, title, color):
@@ -161,4 +163,90 @@ def create_spectra(x, y, zero=0):
161163
fig.layout.template = "plotly_white"
162164
fig.update_yaxes(fixedrange=True)
163165

164-
return fig
166+
return fig
167+
168+
169+
def plot_FDR_plot(idXML_id, idXML_extra, exp_name= "FileName", FDR_level=10):
170+
"""
171+
FDR plot of two input idXML identification files
172+
idXML_id: without extra feature
173+
idXML_extra: with extra feature
174+
FDR_level: 10 for 0.01, 20 for 0.02, 100 for 1.0
175+
"""
176+
177+
# ---------- Without extra features ----------
178+
protein_ids = []
179+
peptide_ids = []
180+
IdXMLFile().load(idXML_id, protein_ids, peptide_ids)
181+
182+
Psm_score_list = []
183+
for pep_id in peptide_ids:
184+
for hit in pep_id.getHits():
185+
Psm_score_list.append(float(hit.getScore()))
186+
187+
list_results = []
188+
q_values = []
189+
x = -0.0002
190+
for _ in range(10001):
191+
list_results.append(sum(j < x for j in Psm_score_list))
192+
q_values.append(x)
193+
x += 0.0001
194+
195+
# ---------- With extra features ----------
196+
protein_ids_extra = []
197+
peptide_ids_extra = []
198+
IdXMLFile().load(idXML_extra, protein_ids_extra, peptide_ids_extra)
199+
200+
Psm_score_list_extra = []
201+
for pep_id in peptide_ids_extra:
202+
for hit in pep_id.getHits():
203+
Psm_score_list_extra.append(float(hit.getScore()))
204+
205+
list_results_extra = []
206+
q_values_extra = []
207+
x = -0.0002
208+
len_3000 = 0
209+
210+
for i in range(100001):
211+
values = sum(j < x for j in Psm_score_list_extra)
212+
list_results_extra.append(values)
213+
q_values_extra.append(x)
214+
x += 0.0001
215+
if i == 3000:
216+
len_3000 = values
217+
218+
psms_count_1_per_solely = np.sum(np.array(Psm_score_list) < 0.01)
219+
psms_count_1_per_extra = np.sum(np.array(Psm_score_list_extra) < 0.01)
220+
221+
# ---------- Plot ----------
222+
fig, ax = plt.subplots(figsize=(8, 7))
223+
224+
ax.plot(q_values, list_results,
225+
color="red", label="no extra feat", linewidth=1.0)
226+
ax.plot(q_values_extra, list_results_extra,
227+
color="blue", label="extra feat", linewidth=1.0)
228+
229+
ax.axvline(x=0.01, color="green", linewidth=1.0)
230+
ax.set_title(f"{exp_name}\nNuXL: {psms_count_1_per_solely} +extra: {psms_count_1_per_extra} CSMs at 1% CSM-level FDR", fontsize=12)
231+
ax.set_xlabel("CSM-level q-value", fontsize=12)
232+
ax.set_ylabel("no. of CSMs", fontsize=12)
233+
234+
if FDR_level == 10:
235+
ax.set_xlim(-0.01, 0.1)
236+
ax.set_ylim(0, len_3000)
237+
elif FDR_level == 20:
238+
ax.set_xlim(-0.01, 0.2)
239+
ax.set_ylim(0, len_3000)
240+
elif FDR_level == 100:
241+
ax.set_xlim(-0.01, 1.0)
242+
243+
ax.legend()
244+
245+
# ---------- Save figure (no os used) ----------
246+
output_pdf = idXML_id.replace(".idXML", "") + ".pdf"
247+
fig.savefig(output_pdf, format="pdf", bbox_inches="tight")
248+
249+
# ---------- Render in Streamlit ----------
250+
#st.pyplot(fig)
251+
return fig
252+

0 commit comments

Comments
 (0)