Skip to content

Commit 4da5487

Browse files
authored
feat: Wokforce support configurable fail handling (#3488)
1 parent 763194b commit 4da5487

File tree

6 files changed

+401
-98
lines changed

6 files changed

+401
-98
lines changed

camel/societies/workforce/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from .role_playing_worker import RolePlayingWorker
1616
from .single_agent_worker import SingleAgentWorker
17-
from .utils import PipelineTaskBuilder
17+
from .utils import FailureHandlingConfig, PipelineTaskBuilder, RecoveryStrategy
1818
from .workflow_memory_manager import WorkflowSelectionMethod
1919
from .workforce import Workforce, WorkforceMode
2020

@@ -25,4 +25,6 @@
2525
"SingleAgentWorker",
2626
"RolePlayingWorker",
2727
"WorkflowSelectionMethod",
28+
"FailureHandlingConfig",
29+
"RecoveryStrategy",
2830
]

camel/societies/workforce/prompts.py

Lines changed: 49 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -333,58 +333,9 @@
333333
334334
**STEP 2: DETERMINE RECOVERY STRATEGY (if quality insufficient)**
335335
336-
If the task quality is insufficient, select the best recovery strategy:
336+
If the task quality is insufficient, select the best recovery strategy from the ENABLED strategies below:
337337
338-
**Available Strategies:**
339-
340-
1. **retry** - Retry with the same worker and task content
341-
- **Best for**:
342-
* Network errors, connection timeouts, temporary API issues
343-
* Random failures that are likely temporary
344-
* Minor quality issues that may resolve on retry
345-
- **Not suitable for**:
346-
* Fundamental task misunderstandings
347-
* Worker capability gaps
348-
* Persistent quality problems
349-
350-
2. **reassign** - Assign to a different worker
351-
- **Best for**:
352-
* Current worker lacks required skills/expertise
353-
* Worker-specific quality issues
354-
* Task requires different specialization
355-
- **Not suitable for**:
356-
* Task description is unclear (use replan instead)
357-
* Task is too complex (use decompose instead)
358-
- **Note**: Only available for quality issues, not failures
359-
360-
3. **replan** - Modify task content with clearer instructions
361-
- **Best for**:
362-
* Unclear or ambiguous requirements
363-
* Missing context or information
364-
* Task description needs improvement
365-
- **Requirements**:
366-
* Provide modified_task_content with enhanced, clear instructions
367-
* Modified task must be actionable for an AI agent
368-
* Address the root cause identified in issues
369-
370-
4. **decompose** - Break into smaller, manageable subtasks
371-
- **Best for**:
372-
* Task is too complex for a single worker
373-
* Multiple distinct sub-problems exist
374-
* Persistent failures despite retries
375-
* Capability mismatches that need specialization
376-
- **Consider**:
377-
* Task depth (avoid if depth > 2)
378-
* Whether subtasks can run in parallel
379-
380-
5. **create_worker** - Create new specialized worker
381-
- **Best for**:
382-
* No existing worker has required capabilities
383-
* Need specialized skills not currently available
384-
- **Consider**:
385-
* Whether decomposition could work instead
386-
* Cost of creating new worker vs alternatives
387-
- **Note**: Only available for task failures, not quality issues
338+
{available_strategies}
388339
389340
**DECISION GUIDELINES:**
390341
@@ -403,6 +354,7 @@
403354
- No explanations or text outside the JSON structure
404355
- Ensure all required fields are included
405356
- Use null for optional fields when not applicable
357+
- ONLY use strategies listed above as ENABLED
406358
"""
407359
)
408360

@@ -423,6 +375,52 @@
423375
"modified_task_content": "new content if replan, else null"
424376
}"""
425377

378+
# Strategy descriptions for dynamic prompt generation
379+
STRATEGY_DESCRIPTIONS = {
380+
"retry": """**retry** - Retry with the same worker and task content
381+
- **Best for**:
382+
* Network errors, connection timeouts, temporary API issues
383+
* Random failures that are likely temporary
384+
* Minor quality issues that may resolve on retry
385+
- **Not suitable for**:
386+
* Fundamental task misunderstandings
387+
* Worker capability gaps
388+
* Persistent quality problems""",
389+
"reassign": """**reassign** - Assign to a different worker
390+
- **Best for**:
391+
* Current worker lacks required skills/expertise
392+
* Worker-specific issues or errors
393+
* Task requires different specialization
394+
- **Not suitable for**:
395+
* Task description is unclear (use replan instead)
396+
* Task is too complex (use decompose instead)""",
397+
"replan": """**replan** - Modify task content with clearer instructions
398+
- **Best for**:
399+
* Unclear or ambiguous requirements
400+
* Missing context or information
401+
* Task description needs improvement
402+
- **Requirements**:
403+
* Provide modified_task_content with enhanced, clear instructions
404+
* Modified task must be actionable for an AI agent
405+
* Address the root cause identified in issues""",
406+
"decompose": """**decompose** - Break into smaller, manageable subtasks
407+
- **Best for**:
408+
* Task is too complex for a single worker
409+
* Multiple distinct sub-problems exist
410+
* Persistent failures despite retries
411+
* Capability mismatches that need specialization
412+
- **Consider**:
413+
* Task depth (avoid if depth > 2)
414+
* Whether subtasks can run in parallel""",
415+
"create_worker": """**create_worker** - Create new specialized worker
416+
- **Best for**:
417+
* No existing worker has required capabilities
418+
* Need specialized skills not currently available
419+
- **Consider**:
420+
* Whether decomposition could work instead
421+
* Cost of creating new worker vs alternatives""",
422+
}
423+
426424
TASK_AGENT_SYSTEM_MESSAGE = """You are an intelligent task management assistant responsible for planning, analyzing, and quality control.
427425
428426
Your responsibilities include:

camel/societies/workforce/utils.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,110 @@ def __repr__(self):
224224
return f"RecoveryStrategy.{self.name}"
225225

226226

227+
class FailureHandlingConfig(BaseModel):
228+
r"""Configuration for failure handling behavior in Workforce.
229+
230+
This configuration allows users to customize how the Workforce handles
231+
task failures. This config allows users to disable reassignment or other
232+
recovery strategies as needed.
233+
234+
Args:
235+
max_retries (int): Maximum number of retry attempts before giving up
236+
on a task. (default: :obj:`3`)
237+
enabled_strategies (Optional[List[RecoveryStrategy]]): List of recovery
238+
strategies that are allowed to be used. Can be specified as
239+
RecoveryStrategy enums or strings (e.g., ["retry", "replan"]).
240+
If None, all strategies are enabled (with LLM analysis).
241+
If an empty list, no recovery strategies are applied and failed
242+
tasks are marked as failed immediately. If only ["retry"] is
243+
specified, simple retry is used without LLM analysis.
244+
(default: :obj:`None` - all strategies enabled)
245+
halt_on_max_retries (bool): Whether to halt the entire workforce
246+
when a task exceeds max retries. If False, the task is marked
247+
as failed and the workflow continues (similar to PIPELINE mode
248+
behavior). (default: :obj:`True` for AUTO_DECOMPOSE mode behavior)
249+
250+
Example:
251+
>>> # Using string list (simple)
252+
>>> config = FailureHandlingConfig(
253+
... enabled_strategies=["retry", "replan", "decompose"],
254+
... )
255+
>>>
256+
>>> # Using enum list
257+
>>> config = FailureHandlingConfig(
258+
... enabled_strategies=[
259+
... RecoveryStrategy.RETRY,
260+
... RecoveryStrategy.REPLAN,
261+
... ]
262+
... )
263+
>>>
264+
>>> # Simple retry only
265+
>>> config = FailureHandlingConfig(
266+
... enabled_strategies=["retry"],
267+
... max_retries=2,
268+
... )
269+
>>>
270+
>>> # No recovery - failed tasks are immediately marked as failed
271+
>>> config = FailureHandlingConfig(
272+
... enabled_strategies=[],
273+
... )
274+
>>>
275+
>>> # Allow failures without halting
276+
>>> config = FailureHandlingConfig(
277+
... halt_on_max_retries=False,
278+
... )
279+
"""
280+
281+
max_retries: int = Field(
282+
default=3,
283+
ge=1,
284+
description="Maximum retry attempts before giving up on a task",
285+
)
286+
287+
enabled_strategies: Optional[List[RecoveryStrategy]] = Field(
288+
default=None,
289+
description="List of enabled recovery strategies. None means all "
290+
"enabled. Empty list means no recovery (immediate failure). "
291+
"Can be strings like ['retry', 'replan'] or RecoveryStrategy enums.",
292+
)
293+
294+
halt_on_max_retries: bool = Field(
295+
default=True,
296+
description="Whether to halt workforce when max retries exceeded",
297+
)
298+
299+
@field_validator("enabled_strategies", mode="before")
300+
@classmethod
301+
def validate_enabled_strategies(
302+
cls, v
303+
) -> Optional[List[RecoveryStrategy]]:
304+
r"""Convert string list to RecoveryStrategy enum list."""
305+
if v is None:
306+
return None
307+
if not isinstance(v, list):
308+
raise ValueError("enabled_strategies must be a list or None")
309+
310+
result = []
311+
for item in v:
312+
if isinstance(item, RecoveryStrategy):
313+
result.append(item)
314+
elif isinstance(item, str):
315+
try:
316+
result.append(RecoveryStrategy(item.lower()))
317+
except ValueError:
318+
valid = [s.value for s in RecoveryStrategy]
319+
raise ValueError(
320+
f"Invalid strategy '{item}'. "
321+
f"Valid options: {valid}"
322+
)
323+
else:
324+
raise ValueError(
325+
f"Strategy must be string or RecoveryStrategy, "
326+
f"got {type(item).__name__}"
327+
)
328+
return result
329+
330+
227331
class FailureContext(BaseModel):
228332
r"""Context information about a task failure."""
229333

0 commit comments

Comments
 (0)