Skip to content

Commit 1051658

Browse files
committed
Clarify program flow for map sharpening and map to model
1 parent 761ff65 commit 1051658

File tree

8 files changed

+118
-9
lines changed

8 files changed

+118
-9
lines changed

libtbx/langchain/agent/best_files_tracker.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,8 @@ def _is_intermediate_file(self, path):
12771277
'superposed_predicted_untrimmed', # Intermediate predictions
12781278
'_ELBOW.', # Elbow geometry files (not fitted ligands)
12791279
'ELBOW.', # Elbow geometry files
1280+
'/tnb/', # trace_and_build intermediate directory
1281+
'/trace_and_build/', # trace_and_build intermediate directory
12801282
]
12811283

12821284
path_check = path.lower()
@@ -1287,6 +1289,11 @@ def _is_intermediate_file(self, path):
12871289
if pattern_lower in path_check or pattern_lower in basename_check:
12881290
return True
12891291

1292+
# Basename-specific patterns (regex)
1293+
import re
1294+
if re.match(r'path_\d+\.pdb$', basename_check):
1295+
return True # trace-and-build fragment files
1296+
12901297
return False
12911298

12921299

libtbx/langchain/agent/directive_extractor.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,15 @@
115115
- Resolution is the diffraction limit (d_min), typically 1.5-4.0 Å. Only extract as resolution if explicitly stated as resolution/d_min.
116116
- If the text says "Resolution limit: Not mentioned" or similar, do NOT extract a resolution value from anywhere else in the text.
117117
118-
**CRITICAL: max_refine_cycles vs after_program**
118+
**CRITICAL: max_refine_cycles vs after_program vs after_cycle**
119119
- max_refine_cycles=N: Limits the NUMBER of refinement jobs to N. The workflow continues normally until refinement, then stops after N refinement jobs.
120120
- after_program="X": FORCES program X to be run IMMEDIATELY, bypassing normal workflow. Only use when user wants to skip directly to a specific program.
121+
- after_cycle=N: Stops after N AGENT CYCLES (each cycle = one program execution). ONLY use when user says "stop after N cycles" with an explicit number.
121122
- "maximum of one refinement" or "at most one refinement" → ONLY set max_refine_cycles=1, do NOT set after_program
122123
- "solve the structure with one refinement" → max_refine_cycles=1 (workflow proceeds normally: xtriage → model → refine)
124+
- "stop after refinement" or "stop after one refinement" → max_refine_cycles=1, skip_validation=true
123125
- "just run refinement" or "only refinement" → after_program="phenix.refine" (skip to refinement immediately)
126+
- Do NOT use after_cycle for "stop after refinement" — that would stop after the first agent cycle (e.g., xtriage), not after refinement.
124127
125128
**CRITICAL: skip_validation RULE**
126129
If the user specifies ANY explicit stop condition (like "stop after X" or "Stop Condition: ..."),
@@ -851,6 +854,10 @@ def _log(msg):
851854
# e.g., after_program=phenix.map_symmetry but constraints say "build a model" → don't stop
852855
validated = _fix_multi_step_workflow_conflict(validated, _log)
853856

857+
# Fix after_cycle=1 when max_refine_cycles is set
858+
# LLM often confuses "one refinement cycle" with "one agent cycle"
859+
validated = _fix_after_cycle_refinement_conflict(validated, _log)
860+
854861
return validated
855862

856863

@@ -1023,6 +1030,47 @@ def _fix_multi_step_workflow_conflict(directives, log):
10231030
return directives
10241031

10251032

1033+
def _fix_after_cycle_refinement_conflict(directives, log):
1034+
"""
1035+
Fix conflict where LLM sets after_cycle=1 alongside max_refine_cycles.
1036+
1037+
When user says "stop after refinement" or "one refinement cycle", the LLM
1038+
sometimes produces both max_refine_cycles=1 AND after_cycle=1. The after_cycle=1
1039+
is wrong — it would stop after the first agent cycle (e.g., xtriage), not after
1040+
the refinement program completes.
1041+
1042+
Rules:
1043+
- after_cycle=1 + max_refine_cycles=N → remove after_cycle (keep max_refine_cycles)
1044+
- after_cycle=1 alone (no max_refine_cycles, no after_program) → suspicious,
1045+
only keep if no workflow programs would run before refinement
1046+
"""
1047+
stop_conditions = directives.get("stop_conditions", {})
1048+
if not stop_conditions:
1049+
return directives
1050+
1051+
after_cycle = stop_conditions.get("after_cycle")
1052+
max_refine = stop_conditions.get("max_refine_cycles")
1053+
after_program = stop_conditions.get("after_program")
1054+
1055+
# Case 1: after_cycle + max_refine_cycles — the after_cycle is redundant/wrong
1056+
if after_cycle is not None and max_refine is not None:
1057+
log("DIRECTIVES: Removing after_cycle=%d (conflicts with max_refine_cycles=%d)" %
1058+
(after_cycle, max_refine))
1059+
log("DIRECTIVES: (LLM confused 'one refinement' with 'one agent cycle')")
1060+
del directives["stop_conditions"]["after_cycle"]
1061+
1062+
# Case 2: after_cycle=1 alone (no after_program, no max_refine)
1063+
# This almost always means the LLM misinterpreted "stop after refinement"
1064+
# as "stop after 1 cycle". Convert to max_refine_cycles=1.
1065+
elif after_cycle == 1 and after_program is None and max_refine is None:
1066+
log("DIRECTIVES: Converting after_cycle=1 → max_refine_cycles=1")
1067+
log("DIRECTIVES: (after_cycle=1 would stop after xtriage, not after refinement)")
1068+
del directives["stop_conditions"]["after_cycle"]
1069+
directives["stop_conditions"]["max_refine_cycles"] = 1
1070+
1071+
return directives
1072+
1073+
10261074
def _fix_program_name(name):
10271075
"""
10281076
Try to fix common variations in program names.

libtbx/langchain/agent/sanity_checker.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,10 @@ def _check_model_for_refine(self, context: Dict) -> Optional[SanityIssue]:
211211
212212
If user has only search_model but requests refinement, give a specific
213213
error explaining they need to run Phaser/docking first.
214+
215+
EXCEPTION: If Phaser/dock_in_map haven't run yet, this is a normal
216+
workflow progression — the agent should be allowed to proceed so it
217+
can choose the positioning program.
214218
"""
215219
state = context.get("state", "")
216220

@@ -234,9 +238,25 @@ def _check_model_for_refine(self, context: Dict) -> Optional[SanityIssue]:
234238

235239
if not has_model:
236240
if has_search_model:
237-
# User has templates but no positioned model - common mistake!
241+
# Check if model-positioning programs haven't been attempted yet.
242+
# If so, this is normal workflow progression — the agent should
243+
# be allowed to proceed and choose Phaser/dock_in_map.
244+
history = context.get("history", [])
245+
programs_run = {h.get("program", "").lower() for h in history if isinstance(h, dict)}
238246
exp_type = context.get("experiment_type", "unknown")
239247

248+
positioning_programs = {"phenix.phaser"} if exp_type == "xray" else {"phenix.dock_in_map"}
249+
positioning_attempted = any(
250+
any(pos_prog in prog for pos_prog in positioning_programs)
251+
for prog in programs_run
252+
)
253+
254+
if not positioning_attempted:
255+
# Model positioning hasn't been tried yet — let the agent proceed
256+
# so it can choose the appropriate positioning program
257+
return None
258+
259+
# Positioning was attempted but model still not placed
240260
if exp_type == "xray":
241261
suggestion = (
242262
"You have a search model (predicted structure or template) but no positioned model. "

libtbx/langchain/agent/workflow_engine.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,10 +337,14 @@ def _has_placed_model(self, files, history_info, directives=None):
337337
return False
338338

339339
def _has_refined_model(self, files, history_info):
340-
"""Check if model has been refined."""
340+
"""Check if model has been refined IN THIS SESSION.
341+
342+
IMPORTANT: Only trust history-based evidence, not file names.
343+
User-provided input files may start with 'refine_' (e.g., refine_001_model.pdb)
344+
without any actual refinement having been done in this session. Relying on
345+
files.get('refined') would incorrectly skip to validation/STOP.
346+
"""
341347
return bool(
342-
files.get("refined") or
343-
files.get("rsr_output") or
344348
history_info.get("refine_count", 0) > 0 or
345349
history_info.get("rsr_count", 0) > 0
346350
)

libtbx/langchain/knowledge/file_categories.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ unclassified_pdb:
428428
- "*chainsaw*" # Chainsaw output (processed search models)
429429
- "lig*"
430430
- "*ligand*"
431+
- "path_[0-9]*.pdb" # trace-and-build fragments (intermediate)
432+
- "*TEMP*" # Temporary files (intermediate)
433+
- "*reference*" # Reference models (intermediate)
431434
notes: "Default category for generic PDB files - assume positioned model ready for refinement"
432435

433436
# =============================================================================
@@ -509,6 +512,14 @@ carryover_temp:
509512
- "*/CarryOn/*"
510513
notes: "NEVER use - predict_and_build intermediate files"
511514

515+
trace_fragment:
516+
description: "Trace-and-build fragment files from predict_and_build"
517+
parent_category: intermediate
518+
extensions: [".pdb"]
519+
patterns:
520+
- "path_[0-9]*.pdb" # trace_and_build path fragments (path_1.pdb, path_54.pdb, etc.)
521+
notes: "NEVER use - these are intermediate trace fragments, not complete models"
522+
512523
# =============================================================================
513524
# MAP SUBCATEGORIES
514525
# =============================================================================

libtbx/langchain/knowledge/programs.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ phenix.process_predicted_model:
876876
outputs:
877877
files:
878878
- pattern: "*processed*.pdb"
879-
type: model
879+
type: processed_predicted # NOT 'model' - this is still a search model
880880

881881
command: "phenix.process_predicted_model {model}"
882882

@@ -1243,7 +1243,7 @@ phenix.pdbtools:
12431243
# -----------------------------------------------------------------------------
12441244

12451245
phenix.resolve_cryo_em:
1246-
description: "Optimize cryo-EM map using density modification"
1246+
description: "Density modification of cryo-EM map (map optimization, NOT model building)"
12471247
category: map_optimization
12481248
experiment_types: [cryoem]
12491249

libtbx/langchain/knowledge/prompts_hybrid.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,25 @@ def is_half_map(filepath):
659659
"predict_and_build" in valid_list):
660660
workflow_section += "NOTE: Use predict_and_build with strategy: {\"stop_after_predict\": true}\n"
661661

662+
# Add program descriptions so LLM understands each program's purpose
663+
try:
664+
from libtbx.langchain.knowledge.yaml_loader import get_program
665+
except ImportError:
666+
try:
667+
from knowledge.yaml_loader import get_program
668+
except ImportError:
669+
get_program = None
670+
if get_program:
671+
desc_lines = []
672+
for prog in workflow_state.get("valid_programs", []):
673+
if prog == "STOP":
674+
continue
675+
prog_def = get_program(prog)
676+
if prog_def and prog_def.get("description"):
677+
desc_lines.append(" - %s: %s" % (prog, prog_def["description"]))
678+
if desc_lines:
679+
workflow_section += "\nProgram descriptions:\n" + "\n".join(desc_lines) + "\n"
680+
662681
# Add recommendations section if available
663682
recommendations = format_recommendations_for_prompt(workflow_state)
664683
if recommendations:

libtbx/langchain/knowledge/workflows.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -325,7 +325,7 @@ cryoem:
325325
- has: half_map
326326
- not_has: full_map
327327
- not_done: resolve_cryo_em # Map optimization should only run once
328-
hint: "Create optimized map from half-maps"
328+
hint: "Density modification to create optimized map from half-maps (NOT model building)"
329329

330330
- program: phenix.map_sharpening
331331
conditions:
@@ -357,7 +357,7 @@ cryoem:
357357
- has: half_map
358358
- not_has: full_map
359359
- not_done: resolve_cryo_em
360-
hint: "Create optimized full map from half-maps before docking"
360+
hint: "Density modification: create optimized full map from half-maps before docking (NOT model building)"
361361

362362
- program: phenix.map_sharpening
363363
conditions:

0 commit comments

Comments
 (0)