loosened evaluate function rules, added more descriptive filenames

kbryanhsu · kbryanhsu · commit f8471134a084 · 2026-03-11T15:10:03.000Z
diff --git a/gui/app.py b/gui/app.py
@@ -672,7 +672,25 @@ def _tab_run():
         st.divider()
         if rc == 0:
             st.success("Pipeline finished successfully!")
-            if FINAL_DIR.exists():
+            mode = st.session_state.get("mode", "Singleplex")
+            if mode == "Evaluate" and EVALUATE_DIR.exists():
+                xlsx_files = sorted(EVALUATE_DIR.glob("**/*.xlsx"))
+                if xlsx_files:
+                    import pandas as pd
+
+                    st.subheader("Evaluation results preview")
+                    for xf in xlsx_files:
+                        try:
+                            df = pd.read_excel(xf, sheet_name="detail")
+                            st.write(f"**{xf.name}** — {len(df)} target alignments")
+                            st.dataframe(df.head(10), use_container_width=True)
+                        except Exception as exc:
+                            st.warning(f"Could not read {xf.name}: {exc}")
+                    st.caption("See the **Results** tab for full details and downloads.")
+                else:
+                    st.warning("Pipeline completed but no evaluation reports were generated. "
+                               "Check the log for warnings.")
+            elif FINAL_DIR.exists():
                 csvs = sorted(FINAL_DIR.glob("*.csv"))
                 if csvs:
                     import pandas as pd
@@ -747,28 +765,41 @@ def _tab_results():
     st.subheader("Evaluation reports")
 
     if EVALUATE_DIR.exists():
-        reports = sorted(
-            p for p in EVALUATE_DIR.iterdir()
-            if p.is_dir()
-        )
         xlsx_files = sorted(EVALUATE_DIR.glob("**/*.xlsx"))
     else:
-        reports = []
         xlsx_files = []
 
-    if reports:
-        st.write("Report directories: " + ", ".join(f"`{r.name}`" for r in reports))
-
     if xlsx_files:
-        for xf in xlsx_files:
+        import pandas as pd
+
+        selected_xlsx = st.selectbox(
+            "Select report to view",
+            options=[xf.name for xf in xlsx_files],
+            key="result_xlsx",
+        )
+        if selected_xlsx:
+            xf = next(x for x in xlsx_files if x.name == selected_xlsx)
+            try:
+                # Show summary sheet
+                df_summary = pd.read_excel(xf, sheet_name="summary", header=None)
+                st.write("**Summary**")
+                st.dataframe(df_summary, use_container_width=True, hide_index=True)
+
+                # Show detail sheet
+                df_detail = pd.read_excel(xf, sheet_name="detail")
+                st.write(f"**Detail** — {len(df_detail)} target alignments")
+                st.dataframe(df_detail, use_container_width=True)
+            except Exception as exc:
+                st.error(f"Error reading {selected_xlsx}: {exc}")
+
             st.download_button(
-                f"Download {xf.name}",
+                f"Download {selected_xlsx}",
                 data=xf.read_bytes(),
-                file_name=xf.name,
+                file_name=selected_xlsx,
                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-                key=f"dl_{xf.name}",
+                key=f"dl_{selected_xlsx}",
             )
-    elif not reports:
+    else:
         st.info("No evaluation reports found.")
 
     st.divider()
diff --git a/src/qprimer_designer/commands/prepare_input.py b/src/qprimer_designer/commands/prepare_input.py
@@ -32,6 +32,7 @@ def register(subparsers):
     parser.add_argument("--reftype", dest="reftype", required=True, choices=["on", "off"], help="on-target or off-target")
     parser.add_argument("--features", dest="pri_features", required=True, help="Primer features CSV")
     parser.add_argument("--prev", default="", help="Previous evaluation file (for off-target restriction)")
+    parser.add_argument("--skip-length-filter", action="store_true", help="Skip amplicon length constraints")
     parser.set_defaults(func=run)
 
 
@@ -47,10 +48,16 @@ def _ensure_list(x):
 def run(args):
     """Run the prepare-input command."""
     params = parse_params(args.param_file)
-    min_amp_len = int(params.get("AMPLEN_MIN", 60))
-    max_amp_len = int(params.get("AMPLEN_MAX", 200))
-    min_off_len = int(params.get("OFFLEN_MIN", 60))
-    max_off_len = int(params.get("OFFLEN_MAX", 2000))
+    if args.skip_length_filter:
+        min_amp_len = 0
+        max_amp_len = float('inf')
+        min_off_len = 0
+        max_off_len = float('inf')
+    else:
+        min_amp_len = int(params.get("AMPLEN_MIN", 60))
+        max_amp_len = int(params.get("AMPLEN_MAX", 200))
+        min_off_len = int(params.get("OFFLEN_MIN", 60))
+        max_off_len = int(params.get("OFFLEN_MAX", 2000))
     num_select = int(params.get("NUM_TOP_SENSITIVITY", 100))
 
     print(f"Preparing ML input from {args.mapped}...")
@@ -108,7 +115,14 @@ def run(args):
 
     drop_cols = ['orientation', 'forrev']
 
-    if args.reftype == 'on':
+    skip = args.skip_length_filter
+
+    if skip:
+        fors = maptbl[maptbl['forrev'] == 'f'].drop(columns=drop_cols)
+        revs = maptbl[maptbl['forrev'] == 'r'].drop(columns=drop_cols)
+        minl, maxl = 0, float('inf')
+        lfunc = max
+    elif args.reftype == 'on':
         fors = maptbl[(maptbl['orientation'] == 0) & (maptbl['forrev'] == 'f')].drop(columns=drop_cols)
         revs = maptbl[(maptbl['orientation'] == 16) & (maptbl['forrev'] == 'r')].drop(columns=drop_cols)
         minl, maxl = min_amp_len, max_amp_len
@@ -164,7 +178,7 @@ def run(args):
                 # Use the reverse primer's actual length from features
                 r_len = int(revs.loc[r_id, 'len'])
                 ampseq = tarseqs[t_f][st_f - 1: st_r + r_len]
-                if minl <= len(ampseq) <= maxl:
+                if len(ampseq) > 0 and minl <= len(ampseq) <= maxl:
                     targets_by_r[r_id].append(t_f)
                     starts_by_r[r_id].append(st_f)
                     amplens_by_r[r_id].add(len(ampseq))
diff --git a/workflows/Snakefile.example b/workflows/Snakefile.example
@@ -5,7 +5,7 @@
 ############################################
 
 # SINGLEPLEX REQUIRED: user must fill these for singleplex, ignored if not multiplex
-TARGETS = ['TEST']
+TARGETS = ['H3']
 CROSS   = []
 
 # shared option for multiplex and singleplex
@@ -146,7 +146,10 @@ if EVALUATE:
 
         PSET_NAME = PSET_PATH.stem
     else:
-        PSET_NAME = "ps2eval"
+        # Build a readable name from primer sequences
+        f_tag = (FOR[:5] + FOR[-5:]).upper() if len(FOR) >= 10 else FOR.upper()
+        r_tag = (REV[:5] + REV[-5:]).upper() if len(REV) >= 10 else REV.upper()
+        PSET_NAME = f"eval_f_{f_tag}_r_{r_tag}"
 
 ############################################
 # Final targets
@@ -381,8 +384,13 @@ rule prepare_pset_fasta:
             comp = str.maketrans("ACGTacgt", "TGCAtgca")
             rev_seq_rc = rev_seq.translate(comp)[::-1]
 
+            # Use a readable primer pair ID from the sequences
+            f_tag = (for_seq[:5] + for_seq[-5:]).upper() if len(for_seq) >= 10 else for_seq.upper()
+            r_tag = (rev_seq[:5] + rev_seq[-5:]).upper() if len(rev_seq) >= 10 else rev_seq.upper()
+            pair_id = f"f_{f_tag}_r_{r_tag}"
+
             with open(out, "w") as f:
-                f.write(f">1_for\n{for_seq}\n>1_rev\n{rev_seq_rc}\n")
+                f.write(f">{pair_id}_for\n{for_seq}\n>{pair_id}_rev\n{rev_seq_rc}\n")
 
         ids = check_ids(out)
 
@@ -593,16 +601,17 @@ rule prepare_input:
     output:
         "inputs/{virus}.{target}.input"
     params:
-        ref_type=lambda wc: "on" if wc.virus == wc.target else "off",
+        ref_type=lambda wc: "on" if (wc.virus == wc.target or EVALUATE) else "off",
         prev_arg=lambda wc: (
             f"--prev outputs/{wc.virus}.{wc.virus}.eval"
             if wc.virus != wc.target and not EVALUATE else ""
-        )
+        ),
+        skip_len=lambda wc: "--skip-length-filter" if EVALUATE else ""
     shell:
         "qprimer prepare-input "
         "--in {input.mapped} {params.prev_arg} --out {output} "
         "--ref {input.ref} --reftype {params.ref_type} "
-        "--features {input.features} --params {PARAMS}"
+        "--features {input.features} --params {PARAMS} {params.skip_len}"
 
 
 rule evaluate:
@@ -619,7 +628,7 @@ rule evaluate:
     resources:
         gpu=1
     params:
-        ref_type=lambda wc: "on" if wc.virus == wc.target else "off"
+        ref_type=lambda wc: "on" if (wc.virus == wc.target or EVALUATE) else "off"
     shell:
         "qprimer evaluate "
         "--in {input.inp} --out {output} "
diff --git a/workflows/Snakefile.template b/workflows/Snakefile.template
@@ -146,7 +146,10 @@ if EVALUATE:
 
         PSET_NAME = PSET_PATH.stem
     else:
-        PSET_NAME = "ps2eval"
+        # Build a readable name from primer sequences
+        f_tag = (FOR[:5] + FOR[-5:]).upper() if len(FOR) >= 10 else FOR.upper()
+        r_tag = (REV[:5] + REV[-5:]).upper() if len(REV) >= 10 else REV.upper()
+        PSET_NAME = f"eval_f_{f_tag}_r_{r_tag}"
 
 ############################################
 # Final targets
@@ -381,8 +384,13 @@ rule prepare_pset_fasta:
             comp = str.maketrans("ACGTacgt", "TGCAtgca")
             rev_seq_rc = rev_seq.translate(comp)[::-1]
 
+            # Use a readable primer pair ID from the sequences
+            f_tag = (for_seq[:5] + for_seq[-5:]).upper() if len(for_seq) >= 10 else for_seq.upper()
+            r_tag = (rev_seq[:5] + rev_seq[-5:]).upper() if len(rev_seq) >= 10 else rev_seq.upper()
+            pair_id = f"f_{f_tag}_r_{r_tag}"
+
             with open(out, "w") as f:
-                f.write(f">1_for\n{for_seq}\n>1_rev\n{rev_seq_rc}\n")
+                f.write(f">{pair_id}_for\n{for_seq}\n>{pair_id}_rev\n{rev_seq_rc}\n")
 
         ids = check_ids(out)
 
@@ -593,16 +601,17 @@ rule prepare_input:
     output:
         "inputs/{virus}.{target}.input"
     params:
-        ref_type=lambda wc: "on" if wc.virus == wc.target else "off",
+        ref_type=lambda wc: "on" if (wc.virus == wc.target or EVALUATE) else "off",
         prev_arg=lambda wc: (
             f"--prev outputs/{wc.virus}.{wc.virus}.eval"
             if wc.virus != wc.target and not EVALUATE else ""
-        )
+        ),
+        skip_len=lambda wc: "--skip-length-filter" if EVALUATE else ""
     shell:
         "qprimer prepare-input "
         "--in {input.mapped} {params.prev_arg} --out {output} "
         "--ref {input.ref} --reftype {params.ref_type} "
-        "--features {input.features} --params {PARAMS}"
+        "--features {input.features} --params {PARAMS} {params.skip_len}"
 
 
 rule evaluate:
@@ -619,7 +628,7 @@ rule evaluate:
     resources:
         gpu=1
     params:
-        ref_type=lambda wc: "on" if wc.virus == wc.target else "off"
+        ref_type=lambda wc: "on" if (wc.virus == wc.target or EVALUATE) else "off"
     shell:
         "qprimer evaluate "
         "--in {input.inp} --out {output} "