plot after rescoring PsudoROC

Arslan-Siraj · Arslan-Siraj · commit f761b2c1df57 · 2025-12-27T15:32:14.000+01:00
diff --git a/content/rescoring.py b/content/rescoring.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from src.result_files import *
 from src.run_subprocess import *
+from src.view import plot_FDR_plot
 
 params = page_setup()
 
@@ -89,7 +90,7 @@
         for f in Path(st.session_state.workspace, "result-files").iterdir()
         if (
             f.name.endswith(".idXML")
-            and not any(x in f.name for x in ["0.0100", "0.1000", "1.0000","RT_feat", "RT_Int_feat", "updated_feat"])
+            and not any(x in f.name for x in ["0.0100", "0.1000", "1.0000","RT_feat", "RT_Int_feat", "updated_feat", "_perc_", "_sse_perc_" ])
         )
     ]
 
@@ -105,7 +106,7 @@
         selected_id_file = st.selectbox("Choose a file for rescoring: ", session_idXML_files)
         idXML_file = str(Path(st.session_state.workspace, "result-files", selected_id_file))
         #st.info(f"Full path: {idXML_file}")
-
+        
         protocol = st.selectbox(
             'Select the suitable protocol',
             ['RNA_DEB', 'RNA_NM', 'RNA_4SU', 'RNA_UV', 'RNA_Other'],
@@ -116,6 +117,8 @@
 
         Max_correlation_features = st.checkbox("Max correlation features", value=True, help="Check this box to use max correlation features during rescoring.")
         
+        plot_PseudROC = st.checkbox("plot pseudo-ROC", value=True, help="Check this for pseudo-ROC plot the comparison of rescoring.")
+
         submit_button = st.form_submit_button("Run-Rescoring", type="primary")
 
     # Create a dictionary to capture the output and status of the subprocess
@@ -177,27 +180,37 @@ def terminate_subprocess():
                 
                 #st.write(model_path)
                 #st.write(calibration_data)
+                idXML_file_100_XLs = result_dir / Path(idXML_file).name.replace(".idXML", "_perc_1.0000_XLs.idXML")
 
                 # run the different combinations of features
+                # RT_feat_
                 if Retention_time_features and not Max_correlation_features:
                     st.write("Using ONLY retention time features.")
                     # Assume 'posix' for Linux and macOS
                     args =["nuxl_rescore", "run", "-id", idXML_file, "-calibration", calibration_data,
                         "-unimod", unimod, "-feat_config", feat_config, "-rt_model", "DeepLC", "-model_path", model_path, "-out", str(result_dir)] 
+                    idXML_file_extra_100_XLs = result_dir / f"RT_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
                 
+                # Int_feat_
                 elif not Retention_time_features and Max_correlation_features:
                     st.write("Using ONLY max correlation feature.")
                     # Assume 'posix' for Linux and macOS
                     args =["nuxl_rescore", "run", "-id", idXML_file,"-rt_model", "None", "-ms2pip", 
                         "-unimod", unimod, "-feat_config", feat_config, "-out", str(result_dir)] 
+                    idXML_file_extra_100_XLs = result_dir / f"Int_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
 
+                # RT_Int_feat_
                 elif Retention_time_features and Max_correlation_features:
                     st.write("Using retention time and max correlation feature.")
                     # Assume 'posix' for Linux and macOS
                     args =["nuxl_rescore", "run", "-id", idXML_file, "-calibration", calibration_data,
                         "-unimod", unimod, "-rt_model", "DeepLC", "-ms2pip", "-feat_config", feat_config, "-model_path", model_path, "-out", str(result_dir)]
+                    idXML_file_extra_100_XLs = result_dir / f"RT_Int_feat_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
+
+
                 else:
                     st.error("Please select at least one feature to use for rescoring.")
+                    idXML_file_extra_100_XLs = result_dir / f"updated_{Path(idXML_file).stem}_perc_1.0000_XLs.idXML"
                     st.stop()
 
             # Add any additional variables needed for the subprocess (if any)
@@ -255,13 +268,12 @@ def terminate_subprocess():
             # want to see the command values and argues
             message = f"Running '{' '.join(args)}'"
             st.info(message)
-
+            st.info("check inputs plot: " + str(idXML_file_100_XLs)+' and '+ str(idXML_file_extra_100_XLs)+' '+ str(Path(idXML_file).stem))
             # run subprocess command
             run_subprocess(args, variables, result_dict)
 
         # Check if the subprocess was successful
         if result_dict["success"]:
-            st.info("⚡️ **Rescoring Completed Successfully!** ⚡️")
             # Here can add code here to handle the results, e.g., display them to the user
             extensions_to_remove = {
                 ".csv",
@@ -275,6 +287,21 @@ def terminate_subprocess():
                 if f.is_file() and f.suffix in extensions_to_remove:
                    f.unlink()
 
+            if plot_PseudROC:
+               #ploting_pseudoROC()
+                st.info("Generating Pseudo-ROC plot ...")
+                fig = plot_FDR_plot(
+                    idXML_id=str(idXML_file_100_XLs),
+                    idXML_extra=str(idXML_file_extra_100_XLs),
+                    FDR_level=20,
+                    exp_name=str(Path(idXML_file).stem)
+                )
+
+                #show figure
+                show_fig(fig,  f"{Path(idXML_file).stem}_PseudoROC_plot_rescoring")
+
+            st.success("⚡️ **Rescoring Completed Successfully!** ⚡️")
+
         else:
             # Display error message
             st.error(
diff --git a/src/view.py b/src/view.py
@@ -4,6 +4,8 @@
 import plotly.express as px
 import plotly.graph_objects as go
 import streamlit as st
+import matplotlib.pyplot as plt
+from pyopenms import *
 
 @st.cache_resource
 def plot_ms2_spectrum(spec, title, color):
@@ -161,4 +163,90 @@ def create_spectra(x, y, zero=0):
     fig.layout.template = "plotly_white"
     fig.update_yaxes(fixedrange=True)
 
-    return fig
+    return fig
+
+
+def plot_FDR_plot(idXML_id, idXML_extra, exp_name= "FileName", FDR_level=10):
+    """
+    FDR plot of two input idXML identification files
+    idXML_id: without extra feature
+    idXML_extra: with extra feature
+    FDR_level: 10 for 0.01, 20 for 0.02, 100 for 1.0
+    """
+
+    # ---------- Without extra features ----------
+    protein_ids = []
+    peptide_ids = []
+    IdXMLFile().load(idXML_id, protein_ids, peptide_ids)
+
+    Psm_score_list = []
+    for pep_id in peptide_ids:
+        for hit in pep_id.getHits():
+            Psm_score_list.append(float(hit.getScore()))
+
+    list_results = []
+    q_values = []
+    x = -0.0002
+    for _ in range(10001):
+        list_results.append(sum(j < x for j in Psm_score_list))
+        q_values.append(x)
+        x += 0.0001
+
+    # ---------- With extra features ----------
+    protein_ids_extra = []
+    peptide_ids_extra = []
+    IdXMLFile().load(idXML_extra, protein_ids_extra, peptide_ids_extra)
+
+    Psm_score_list_extra = []
+    for pep_id in peptide_ids_extra:
+        for hit in pep_id.getHits():
+            Psm_score_list_extra.append(float(hit.getScore()))
+
+    list_results_extra = []
+    q_values_extra = []
+    x = -0.0002
+    len_3000 = 0
+
+    for i in range(100001):
+        values = sum(j < x for j in Psm_score_list_extra)
+        list_results_extra.append(values)
+        q_values_extra.append(x)
+        x += 0.0001
+        if i == 3000:
+            len_3000 = values
+
+    psms_count_1_per_solely = np.sum(np.array(Psm_score_list) < 0.01)
+    psms_count_1_per_extra = np.sum(np.array(Psm_score_list_extra) < 0.01)
+
+     # ---------- Plot ----------
+    fig, ax = plt.subplots(figsize=(8, 7))
+
+    ax.plot(q_values, list_results,
+            color="red", label="no extra feat", linewidth=1.0)
+    ax.plot(q_values_extra, list_results_extra,
+            color="blue", label="extra feat", linewidth=1.0)
+
+    ax.axvline(x=0.01, color="green", linewidth=1.0)
+    ax.set_title(f"{exp_name}\nNuXL: {psms_count_1_per_solely} +extra: {psms_count_1_per_extra} CSMs at 1% CSM-level FDR", fontsize=12)
+    ax.set_xlabel("CSM-level q-value", fontsize=12)
+    ax.set_ylabel("no. of CSMs", fontsize=12)
+
+    if FDR_level == 10:
+        ax.set_xlim(-0.01, 0.1)
+        ax.set_ylim(0, len_3000)
+    elif FDR_level == 20:
+        ax.set_xlim(-0.01, 0.2) 
+        ax.set_ylim(0, len_3000)
+    elif FDR_level == 100:
+        ax.set_xlim(-0.01, 1.0)    
+    
+    ax.legend()
+
+    # ---------- Save figure (no os used) ----------
+    output_pdf = idXML_id.replace(".idXML", "") + ".pdf"
+    fig.savefig(output_pdf, format="pdf", bbox_inches="tight")
+
+    # ---------- Render in Streamlit ----------
+    #st.pyplot(fig)
+    return fig
+