Skip to content

Commit abc8556

Browse files
committed
feat: Implement force-accept mode for quality validation after max retries
This commit introduces a configurable force-accept mode that changes how the BAEs framework handles code artifacts that fail quality validation after maximum retry attempts. ## Key Changes ### New Behavior (Default - Force-Accept Mode) - After max retries, code is accepted as-is with metadata tracking - Enables research/evaluation scenarios to capture actual output - Marks artifacts with 'force_accepted' flag and quality metrics - Tracks unresolved issues for analysis ### Opt-in Strict Mode - Set BAE_STRICT_MODE=true to restore original fail-fast behavior - Interrupts generation and returns error after max retries - Maintains strict quality gates for production use ## Modified Files ### Core Components - **config.py**: Added BAE_STRICT_MODE configuration variable - **baes/swea_agents/techlead_swea.py**: Updated _review_and_approve() to implement force-accept logic - **baes/core/enhanced_runtime_kernel.py**: Enhanced coordination plan execution to handle force-accepted artifacts ### Documentation & Testing - **env.template**: Added BAE_STRICT_MODE documentation - **tests/unit/core/test_force_accept_mode.py**: Comprehensive test suite (6 test cases) ## Benefits 1. **Research/Evaluation**: Captures framework output even with quality issues for comparative analysis 2. **Data Collection**: Enables measurement of partial completion and failure patterns 3. **Graceful Degradation**: Accepts imperfect code rather than producing nothing 4. **Backwards Compatible**: Strict mode available via environment variable ## Testing All tests pass (6/6): - Force-accept mode with quality/critical issues - Strict mode rejection behavior - Config parsing and metadata tracking Closes #<issue-number>
1 parent 060ee85 commit abc8556

File tree

5 files changed

+546
-77
lines changed

5 files changed

+546
-77
lines changed

baes/core/enhanced_runtime_kernel.py

Lines changed: 143 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -901,12 +901,24 @@ def _execute_coordination_plan(
901901
if review_result.get("success") and review_result.get("data", {}).get(
902902
"overall_approval", False
903903
):
904-
# Final review approved
904+
# Final review approved (or force-accepted)
905905
quality_score = review_result.get("data", {}).get(
906906
"system_quality_score", 0.0
907907
)
908+
force_accepted = review_result.get("force_accepted", False)
908909
simplified_name = self._get_simplified_task_name(task_name)
909910

911+
# Log force-accept status
912+
if force_accepted:
913+
logger.warning(
914+
"⚠️ %s FORCE-ACCEPTED after max retries - System deployed with quality issues",
915+
task_name
916+
)
917+
force_accept_reason = review_result.get("data", {}).get(
918+
"force_accept_reason", "Max retries reached"
919+
)
920+
logger.info(f" Reason: {force_accept_reason}")
921+
910922
# Presentation logging
911923
presentation_logger.techlead_review(
912924
True, simplified_name, quality_score
@@ -915,23 +927,32 @@ def _execute_coordination_plan(
915927

916928
# Debug logging
917929
if is_debug_mode():
918-
logger.info(
919-
"✅ %s APPROVED by TechLeadSWEA - System ready for deployment",
920-
task_name,
921-
)
930+
if force_accepted:
931+
logger.info(
932+
"⚠️ %s FORCE-ACCEPTED by TechLeadSWEA - System deployed with warnings",
933+
task_name,
934+
)
935+
else:
936+
logger.info(
937+
"✅ %s APPROVED by TechLeadSWEA - System ready for deployment",
938+
task_name,
939+
)
922940

923941
results.append(
924942
{
925943
"task": task_name,
926944
"success": True,
927945
"result": result,
928946
"techlead_approved": True,
947+
"force_accepted": force_accepted,
929948
"final_review": True,
930949
"deployment_ready": review_result.get("data", {}).get(
931950
"deployment_ready", False
932-
),
951+
) if not force_accepted else False, # Force-accepted means not truly deployment ready
933952
"system_quality_score": quality_score,
934953
"retry_count": retry_count,
954+
# Include force-accept metadata if applicable
955+
**({"force_accept_metadata": review_result.get("data", {})} if force_accepted else {})
935956
}
936957
)
937958
task_success = True
@@ -1030,10 +1051,22 @@ def _execute_coordination_plan(
10301051
if review_result.get("success") and review_result.get("data", {}).get(
10311052
"overall_approval", False
10321053
):
1033-
# Task approved by TechLeadSWEA
1054+
# Task approved by TechLeadSWEA (or force-accepted)
10341055
quality_score = review_result.get("data", {}).get("quality_score", 0.0)
1056+
force_accepted = review_result.get("force_accepted", False)
10351057
simplified_name = self._get_simplified_task_name(task_name)
10361058

1059+
# Log force-accept status
1060+
if force_accepted:
1061+
logger.warning(
1062+
"⚠️ %s FORCE-ACCEPTED after max retries - quality issues remain",
1063+
task_name
1064+
)
1065+
force_accept_reason = review_result.get("data", {}).get(
1066+
"force_accept_reason", "Max retries reached"
1067+
)
1068+
logger.info(f" Reason: {force_accept_reason}")
1069+
10371070
# Presentation logging
10381071
presentation_logger.techlead_review(
10391072
True, simplified_name, quality_score
@@ -1046,16 +1079,22 @@ def _execute_coordination_plan(
10461079

10471080
# Debug logging
10481081
if is_debug_mode():
1049-
logger.info("✅ %s APPROVED by TechLeadSWEA", task_name)
1082+
if force_accepted:
1083+
logger.info("⚠️ %s FORCE-ACCEPTED by TechLeadSWEA", task_name)
1084+
else:
1085+
logger.info("✅ %s APPROVED by TechLeadSWEA", task_name)
10501086

10511087
results.append(
10521088
{
10531089
"task": task_name,
10541090
"success": True,
10551091
"result": result,
10561092
"techlead_approved": True,
1093+
"force_accepted": force_accepted,
10571094
"quality_score": quality_score,
10581095
"retry_count": retry_count,
1096+
# Include force-accept metadata if applicable
1097+
**({"force_accept_metadata": review_result.get("data", {})} if force_accepted else {})
10591098
}
10601099
)
10611100
task_success = True
@@ -1143,33 +1182,60 @@ def _execute_coordination_plan(
11431182
technical_feedback[0],
11441183
)
11451184
elif not task_success:
1146-
# Max retries reached
1147-
logger.error(
1148-
"🛑 %s FAILED after %d attempts - stopping coordination plan",
1149-
task_name,
1150-
max_retries + 1,
1151-
)
1152-
results.append(
1153-
{
1154-
"task": task_name,
1155-
"success": False,
1156-
"error": f"Task rejected by TechLeadSWEA after {max_retries + 1} attempts",
1157-
"techlead_rejected": True,
1158-
"feedback_history": feedback_history,
1159-
"retry_count": retry_count,
1160-
}
1161-
)
1185+
# Max retries reached - check if we should fail-fast or force-accept
1186+
# Note: This code should rarely be reached in force-accept mode because
1187+
# TechLeadSWEA will approve with force_accepted=True before we get here.
1188+
# This is a safety check for edge cases.
1189+
strict_mode = os.getenv("BAE_STRICT_MODE", "false").lower() in ("true", "1", "yes", "on")
1190+
1191+
if strict_mode:
1192+
# STRICT MODE: Fail fast and interrupt generation
1193+
logger.error(
1194+
"🛑 [STRICT MODE] %s FAILED after %d attempts - stopping coordination plan",
1195+
task_name,
1196+
max_retries + 1,
1197+
)
1198+
results.append(
1199+
{
1200+
"task": task_name,
1201+
"success": False,
1202+
"error": f"Task rejected by TechLeadSWEA after {max_retries + 1} attempts",
1203+
"techlead_rejected": True,
1204+
"feedback_history": feedback_history,
1205+
"retry_count": retry_count,
1206+
}
1207+
)
11621208

1163-
# Fail fast - stop execution
1164-
raise MaxRetriesReachedError(
1165-
task_name,
1166-
swea_agent,
1167-
task_type,
1168-
retry_count,
1169-
max_retries,
1170-
f"Task rejected by TechLeadSWEA after {max_retries + 1} attempts",
1171-
feedback_history,
1172-
)
1209+
# Fail fast - stop execution
1210+
raise MaxRetriesReachedError(
1211+
task_name,
1212+
swea_agent,
1213+
task_type,
1214+
retry_count,
1215+
max_retries,
1216+
f"Task rejected by TechLeadSWEA after {max_retries + 1} attempts",
1217+
feedback_history,
1218+
)
1219+
else:
1220+
# FORCE-ACCEPT MODE: This shouldn't happen (TechLeadSWEA should have approved)
1221+
# but if it does, force-accept here as safety net
1222+
logger.warning(
1223+
"⚠️ [FORCE-ACCEPT MODE] Max retries reached but task not approved - force-accepting as safety net"
1224+
)
1225+
results.append(
1226+
{
1227+
"task": task_name,
1228+
"success": True,
1229+
"result": result,
1230+
"techlead_approved": True,
1231+
"force_accepted": True,
1232+
"force_accept_reason": "Safety net: max retries reached",
1233+
"quality_score": 0.0,
1234+
"retry_count": retry_count,
1235+
"feedback_history": feedback_history,
1236+
}
1237+
)
1238+
task_success = True
11731239

11741240
except Exception as e:
11751241
last_error = str(e)
@@ -1188,25 +1254,50 @@ def _execute_coordination_plan(
11881254
max_retries + 1,
11891255
)
11901256
else:
1191-
# Max retries reached
1192-
logger.error(
1193-
"🛑 %s FAILED after %d attempts - stopping coordination plan",
1194-
task_name,
1195-
max_retries + 1,
1196-
)
1197-
results.append(
1198-
{
1199-
"task": task_name,
1200-
"success": False,
1201-
"error": last_error,
1202-
"retry_count": retry_count,
1203-
}
1204-
)
1257+
# Max retries reached after execution errors
1258+
strict_mode = os.getenv("BAE_STRICT_MODE", "false").lower() in ("true", "1", "yes", "on")
1259+
1260+
if strict_mode:
1261+
# STRICT MODE: Fail fast and interrupt generation
1262+
logger.error(
1263+
"🛑 [STRICT MODE] %s FAILED after %d attempts - stopping coordination plan",
1264+
task_name,
1265+
max_retries + 1,
1266+
)
1267+
results.append(
1268+
{
1269+
"task": task_name,
1270+
"success": False,
1271+
"error": last_error,
1272+
"retry_count": retry_count,
1273+
}
1274+
)
12051275

1206-
# Fail fast - stop execution
1207-
raise MaxRetriesReachedError(
1208-
task_name, swea_agent, task_type, retry_count, max_retries, last_error
1209-
)
1276+
# Fail fast - stop execution
1277+
raise MaxRetriesReachedError(
1278+
task_name, swea_agent, task_type, retry_count, max_retries, last_error
1279+
)
1280+
else:
1281+
# FORCE-ACCEPT MODE: Accept what we have despite execution errors
1282+
logger.warning(
1283+
"⚠️ [FORCE-ACCEPT MODE] %s failed after %d attempts - force-accepting with errors",
1284+
task_name,
1285+
max_retries + 1,
1286+
)
1287+
results.append(
1288+
{
1289+
"task": task_name,
1290+
"success": True, # Mark as success to continue
1291+
"result": result if 'result' in locals() else {},
1292+
"techlead_approved": True,
1293+
"force_accepted": True,
1294+
"force_accept_reason": f"Execution errors after {max_retries + 1} attempts",
1295+
"execution_errors": [last_error],
1296+
"quality_score": 0.0,
1297+
"retry_count": retry_count,
1298+
}
1299+
)
1300+
task_success = True
12101301

12111302
# Phase 1 completion logging (generation only - no test execution yet)
12121303
successful_tasks = len([r for r in results if r.get("success", False)])

0 commit comments

Comments
 (0)