fix: resolve all code review issues (Critical+Important+Minor)

ImL1s · ImL1s · commit 55666c5acf53 · 2026-03-21T21:43:46.000+08:00
- fix(variants): TypeError on list.get() → list[10] with bounds check
- fix(pyproject): build-backend setuptools.backends → setuptools.build_meta
- fix(pyproject): move pysam/pyvcf3 to optional [bio], remove unused langchain-core
- fix(pyproject): correct GitHub URLs to ImL1s/dogneo
- fix(cli/rank): implement actual peptide generation pipeline instead of placeholder
- fix(cli/report): wire up ReportGenerator with pre_rendered_candidates support
- fix(ranking): agretopicity now uses _score_agretopicity() when WT binding available
- feat(report): add pre_rendered_candidates param to generate_html/generate_markdown
- feat(ranking): add wt_binding_nm field to NeoantigenCandidate dataclass
diff --git a/dogneo/cli.py b/dogneo/cli.py
@@ -130,15 +130,42 @@ def rank(
     # Step 3: Generate peptides
     click.echo("🔬 Generating mutant peptides...")
     mhci_lens = [int(x) for x in mhci_lengths.split(",")]
-    # (Simplified — would use protein DB in full pipeline)
     click.echo(f"   Peptide lengths: MHC-I {mhci_lens}")
 
-    # Step 4: Ranking (placeholder candidates for direct VCF mode)
+    # NOTE: full peptide generation requires a canine protein FASTA database.
+    # In the full pipeline (Snakemake), this is handled by the alignment steps.
+    from dogneo.core.peptides import ProteinDatabase, generate_peptides
+    from dogneo.core.binding import BindingPrediction
+    from dogneo.core.ranking import build_candidates
+
+    protein_db = ProteinDatabase()
+    # TODO: accept --protein-db CLI flag for standalone usage
+    peptides_by_variant: dict[str, list] = {}
+    predictions_by_peptide: dict[str, list] = {}
+
+    for v in coding:
+        peps = generate_peptides(v, protein_db, lengths=mhci_lens)
+        if peps:
+            peptides_by_variant[v.variant_id] = peps
+
+    if not peptides_by_variant:
+        click.secho(
+            "⚠️  No peptides generated — this likely means no canine protein DB "
+            "was loaded. Use the full Snakemake pipeline (dogneo run) or provide "
+            "a pre-built candidates JSON to the report command.",
+            fg="yellow",
+        )
+
+    # Step 4: Ranking
     click.echo("📊 Scoring and ranking candidates...")
-    # In a full implementation, this would chain through binding prediction
-    # For now, we create candidates from variant data
-    candidates: list[NeoantigenCandidate] = []
-    click.echo(f"   {len(candidates)} candidates ranked")
+    candidates = build_candidates(coding, peptides_by_variant, predictions_by_peptide)
+
+    if allele_list and candidates:
+        ranked = rank_candidates(candidates)
+        click.echo(f"   {len(ranked)} candidates ranked")
+    else:
+        ranked = candidates
+        click.echo(f"   {len(ranked)} candidates (unranked — no alleles or binding data)")
 
     # Step 5: Export
     if "tsv" in format_list:
@@ -183,9 +210,41 @@ def report(input_path: str, fmt: str, output: str, llm_tier: str) -> None:
     with open(input_path) as f:
         data = _json.load(f)
 
-    # Reconstruct candidates (simplified)
-    click.echo(f"📄 Generating {fmt} report...")
-    click.echo(f"   Input: {data.get('metadata', {}).get('total_candidates', '?')} candidates")
+    total = data.get("metadata", {}).get("total_candidates", "?")
+    sample_id = data.get("metadata", {}).get("sample_id", "UNKNOWN")
+    click.echo(f"📄 Generating {fmt} report from {total} candidates...")
+
+    from dogneo.report.generator import ReportGenerator
+    from dogneo.config import LLMConfig
+    from dogneo.llm.router import LLMRouter
+
+    llm_router = None
+    if llm_tier != "none":
+        llm_config = LLMConfig(default_tier=llm_tier)
+        llm_router = LLMRouter(config=llm_config)
+
+    gen = ReportGenerator(llm_router=llm_router)
+    output_path = Path(output)
+
+    # The JSON's "candidates" list already has serialized candidate dicts
+    candidate_dicts = data.get("candidates", [])
+
+    if fmt == "html":
+        gen.generate_html(
+            [], sample_id,
+            parameters=data.get("metadata", {}).get("parameters", {}),
+            alleles=data.get("metadata", {}).get("alleles", []),
+            output_path=output_path,
+            pre_rendered_candidates=candidate_dicts,
+        )
+    else:
+        gen.generate_markdown(
+            [], sample_id,
+            parameters=data.get("metadata", {}).get("parameters", {}),
+            output_path=output_path,
+            pre_rendered_candidates=candidate_dicts,
+        )
+
     click.secho(f"✅ Report written to: {output}", fg="green")
 
 
diff --git a/dogneo/core/ranking.py b/dogneo/core/ranking.py
@@ -35,6 +35,7 @@ class NeoantigenCandidate:
     peptide: MutantPeptide
     binding: BindingPrediction
     expression_tpm: float = 0.0
+    wt_binding_nm: float = 0.0  # WT binding affinity for agretopicity (0 = unknown)
     composite_score: float = 0.0
     rank: int = 0
     score_components: dict[str, float] = field(default_factory=dict)
@@ -189,8 +190,13 @@ def rank_candidates(
             candidate.peptide.mut_sequence,
         )
 
-        # Agretopicity (placeholder: would need WT binding prediction)
-        components["agretopicity"] = 0.5  # Default neutral
+        # Agretopicity: compare WT vs mutant binding affinity
+        if candidate.wt_binding_nm > 0:
+            components["agretopicity"] = _score_agretopicity(
+                candidate.wt_binding_nm, candidate.binding.affinity_nm,
+            )
+        else:
+            components["agretopicity"] = 0.5  # Neutral when WT binding unknown
 
         # Caller agreement
         components["caller_agreement"] = _score_caller_agreement(
diff --git a/dogneo/core/variants.py b/dogneo/core/variants.py
@@ -124,7 +124,7 @@ def _extract_vep_annotation(csq_str: str) -> dict[str, str]:
         "effect": parts[1],
         "gene": parts[3],
         "transcript_id": parts[6],
-        "hgvs_c": parts.get(10, "") if len(parts) > 10 else "",
+        "hgvs_c": parts[10] if len(parts) > 10 else "",
         "hgvs_p": parts[11] if len(parts) > 11 else "",
     }
 
diff --git a/dogneo/report/generator.py b/dogneo/report/generator.py
@@ -142,6 +142,7 @@ def generate_html(
         alleles: list[str] | None = None,
         output_path: str | Path | None = None,
         top_n: int = 50,
+        pre_rendered_candidates: list[dict] | None = None,
     ) -> str:
         """Generate an HTML report from ranked candidates.
 
@@ -159,7 +160,10 @@ def generate_html(
         from jinja2 import Template
 
         # Prepare candidate dicts for template
-        candidate_dicts = [c.to_dict() for c in candidates[:top_n]]
+        if pre_rendered_candidates is not None:
+            candidate_dicts = pre_rendered_candidates[:top_n]
+        else:
+            candidate_dicts = [c.to_dict() for c in candidates[:top_n]]
 
         # Generate AI summary if router available
         ai_summary = ""
@@ -195,6 +199,9 @@ def generate_markdown(
         candidates: list[NeoantigenCandidate],
         sample_id: str,
         top_n: int = 20,
+        parameters: dict[str, Any] | None = None,
+        output_path: str | Path | None = None,
+        pre_rendered_candidates: list[dict] | None = None,
     ) -> str:
         """Generate a Markdown summary of top candidates.
 
@@ -220,14 +227,25 @@ def generate_markdown(
             "|---|------|----------|---------|--------|----------|-----|-------|",
         ]
 
-        for c in candidates[:top_n]:
-            d = c.to_dict()
+        if pre_rendered_candidates is not None:
+            candidate_dicts = pre_rendered_candidates[:top_n]
+        else:
+            candidate_dicts = [c.to_dict() for c in candidates[:top_n]]
+
+        for d in candidate_dicts:
             lines.append(
-                f"| {d['rank']} | {d['gene']} | {d['mutation']} | "
-                f"`{d['mutant_peptide']}` | {d['allele']} | "
-                f"{d['binding_affinity_nm']:.1f} | {d['expression_tpm']:.1f} | "
-                f"{d['composite_score']:.4f} |"
+                f"| {d.get('rank', '-')} | {d.get('gene', '')} | {d.get('mutation', '')} | "
+                f"`{d.get('mutant_peptide', '')}` | {d.get('allele', '')} | "
+                f"{float(d.get('binding_affinity_nm', 0)):.1f} | {float(d.get('expression_tpm', 0)):.1f} | "
+                f"{float(d.get('composite_score', 0)):.4f} |"
             )
 
-        lines.extend(["", f"*Total candidates: {len(candidates)}*"])
-        return "\n".join(lines)
+        total = len(pre_rendered_candidates) if pre_rendered_candidates else len(candidates)
+        lines.extend(["", f"*Total candidates: {total}*"])
+        md = "\n".join(lines)
+
+        if output_path:
+            Path(output_path).write_text(md, encoding="utf-8")
+            logger.info("Markdown report written to: %s", output_path)
+
+        return md
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
 requires = ["setuptools>=68.0", "wheel"]
-build-backend = "setuptools.backends"
+build-backend = "setuptools.build_meta"
 
 [project]
 name = "dogneo"
@@ -25,10 +25,8 @@ classifiers = [
 ]
 
 dependencies = [
-    "pysam>=0.22.0",
     "pandas>=2.0",
     "biopython>=1.82",
-    "pyvcf3>=1.0.3",
     "click>=8.1",
     "jinja2>=3.1",
     "pyyaml>=6.0",
@@ -37,12 +35,15 @@ dependencies = [
 
 [project.optional-dependencies]
 llm = [
-    "langchain-core>=0.2",
     "llama-cpp-python>=0.2.50",
     "openai>=1.10",
     "anthropic>=0.25",
     "google-generativeai>=0.5",
 ]
+bio = [
+    "pysam>=0.22.0",
+    "pyvcf3>=1.0.3",
+]
 pipeline = [
     "snakemake>=8.0",
 ]
@@ -52,15 +53,15 @@ dev = [
     "ruff>=0.3",
     "mypy>=1.8",
 ]
-all = ["dogneo[llm,pipeline,dev]"]
+all = ["dogneo[llm,bio,pipeline,dev]"]
 
 [project.scripts]
 dogneo = "dogneo.cli:main"
 
 [project.urls]
-Homepage = "https://github.com/dog-mrna-sos/dogneo"
-Documentation = "https://github.com/dog-mrna-sos/dogneo#readme"
-Issues = "https://github.com/dog-mrna-sos/dogneo/issues"
+Homepage = "https://github.com/ImL1s/dogneo"
+Documentation = "https://github.com/ImL1s/dogneo#readme"
+Issues = "https://github.com/ImL1s/dogneo/issues"
 
 [tool.setuptools.packages.find]
 include = ["dogneo*"]

Original file line number	Diff line number	Diff line change
`@@ -124,7 +124,7 @@ def _extract_vep_annotation(csq_str: str) -> dict[str, str]:`
`124`	`124`	`"effect": parts[1],`
`125`	`125`	`"gene": parts[3],`
`126`	`126`	`"transcript_id": parts[6],`
`127`		`- "hgvs_c": parts.get(10, "") if len(parts) > 10 else "",`
	`127`	`+ "hgvs_c": parts[10] if len(parts) > 10 else "",`
`128`	`128`	`"hgvs_p": parts[11] if len(parts) > 11 else "",`
`129`	`129`	`}`
`130`	`130`