From 509f24b306868a7ca793bd012c98583388bf6ea0 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Wed, 10 Jun 2026 19:47:25 +0800
Subject: [PATCH 1/2] fix: bug in merge result

---
 onsite/onsitec.py | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/onsite/onsitec.py b/onsite/onsitec.py
index ef8572b..a69e386 100644
--- a/onsite/onsitec.py
+++ b/onsite/onsitec.py
@@ -277,7 +277,7 @@ def _get_metas_dict(df, row_idx: int) -> Dict[str, str]:
     return {str(m["name"]): str(m["value"]) for m in items if isinstance(m, dict)}
 
 
-def merge_algorithm_results(ascore_file, phosphors_file, lucxor_file, output_file):
+def merge_algorithm_results(ascore_file, phosphors_file, lucxor_file, output_file, input_idparquet):
     """Merge results from all three algorithms into a single idparquet directory."""
     ascore_df, _, _, _ = load_dataframes(ascore_file)
     phosphors_df, _, _, _ = load_dataframes(phosphors_file)
@@ -291,7 +291,6 @@ def merge_algorithm_results(ascore_file, phosphors_file, lucxor_file, output_fil
         click.echo(f"  Warning: {stats['seq_mismatch']} PSM(s) skipped (seq mismatch)")
 
     merged_rows = []
-    merged_pep_idx = 0
 
     for ai, pi, li in triples:
         a_metas = _get_metas_dict(ascore_df, ai)
@@ -369,17 +368,37 @@ def merge_algorithm_results(ascore_file, phosphors_file, lucxor_file, output_fil
                 "spectrum_reference": spec_ref,
                 "reference_file_name": ref_file,
                 "hit_index": hit_idx,
-                "peptide_identification_index": merged_pep_idx,
+                "peptide_identification_index": li,
                 "psm_metavalues": np.array(merged_metas, dtype=object),
                 "modifications": np.array([], dtype=object),
                 "protein_accessions": np.array([], dtype=object),
                 "additional_scores": np.array([], dtype=object),
                 "run_identifier": str(l_hit.get("run_identifier", "")),
             })
-        merged_pep_idx += 1
 
     out_df = pd.DataFrame(merged_rows)
-    save_dataframes(output_file, out_df, proteins_df, template_df=lucxor_df)
+
+    full_df = lucxor_df.copy()
+
+    out_df = out_df.set_index(["peptide_identification_index", "hit_index"])
+    full_df = full_df.set_index(["peptide_identification_index", "hit_index"])
+
+    for col in out_df.columns:
+        if col in full_df.columns and out_df[col].dtype != full_df[col].dtype:
+            try:
+                out_df[col] = out_df[col].astype(full_df[col].dtype)
+            except Exception:
+                pass
+
+    full_df.update(out_df)
+
+    missing_mask = ~full_df.index.isin(out_df.index)
+    full_df.loc[missing_mask, "score"] = np.nan
+    full_df.loc[missing_mask, "score_type"] = "onsite_combined_score"
+
+    out_df = full_df.reset_index()
+
+    save_dataframes(output_file, out_df, proteins_df, template_df=lucxor_df, source_idparquet=input_idparquet)
     click.echo(f"Successfully merged {stats['merged']} peptide identifications")
     click.echo("Each peptide contains scores from all three algorithms")
 
@@ -447,7 +466,7 @@ def run_all_algorithms_from_single_cli(
             if exit_code != 0:
                 raise RuntimeError(f"LucXor failed with exit code {exit_code}")
 
-            merge_algorithm_results(ascore_out, phosphors_out, lucxor_out, out_file)
+            merge_algorithm_results(ascore_out, phosphors_out, lucxor_out, out_file, id_file)
 
         elapsed = time.time() - start_time
         click.echo(f"All algorithms completed in {elapsed:.2f}s")

From bd3d40a4ccbb6b8bb8d5400a5e22d2403ac6d824 Mon Sep 17 00:00:00 2001
From: yueqixuan <yueqx@foxmail.com>
Date: Wed, 10 Jun 2026 19:54:30 +0800
Subject: [PATCH 2/2] update

---
 onsite/onsitec.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/onsite/onsitec.py b/onsite/onsitec.py
index a69e386..e404e67 100644
--- a/onsite/onsitec.py
+++ b/onsite/onsitec.py
@@ -387,8 +387,10 @@ def merge_algorithm_results(ascore_file, phosphors_file, lucxor_file, output_fil
         if col in full_df.columns and out_df[col].dtype != full_df[col].dtype:
             try:
                 out_df[col] = out_df[col].astype(full_df[col].dtype)
-            except Exception:
-                pass
+            except Exception as e:
+                click.echo(
+                    f"Could not convert column '{col}' from {out_df[col].dtype} to {full_df[col].dtype}: {e}"
+                )
 
     full_df.update(out_df)