feat(robot-server): start logic for dynamic error recovery policy (#15707)

aaron-kulkarni · web-flow · commit e4ff49aea575 · 2024-07-24T13:12:25.000-04:00
# Overview Create robot-server function that turns a List of ErrorRecoveryRules into a full ErrorRecoveryPolicy. A future PR will implement the HTTP calls that actually create the list of rules. EXEC-589  # Test Plan  # Changelog  # Review requests  # Risk assessment
diff --git a/api/src/opentrons/protocol_engine/error_recovery_policy.py b/api/src/opentrons/protocol_engine/error_recovery_policy.py
@@ -28,10 +28,8 @@ class ErrorRecoveryType(enum.Enum):
     WAIT_FOR_RECOVERY = enum.auto()
     """Stop and wait for the error to be recovered from manually."""
 
-    # TODO(mm, 2023-03-18): Add something like this for
-    # https://opentrons.atlassian.net/browse/EXEC-302.
-    # CONTINUE = enum.auto()
-    # """Continue with the run, as if the command never failed."""
+    IGNORE_AND_CONTINUE = enum.auto()
+    """Continue with the run, as if the command never failed."""
 
 
 class ErrorRecoveryPolicy(Protocol):
diff --git a/api/src/opentrons/protocol_engine/state/commands.py b/api/src/opentrons/protocol_engine/state/commands.py
@@ -337,7 +337,10 @@ def _handle_fail_command_action(self, action: FailCommandAction) -> None:
                 other_command_ids_to_fail = list(
                     self._state.command_history.get_queue_ids()
                 )
-            elif action.type == ErrorRecoveryType.WAIT_FOR_RECOVERY:
+            elif (
+                action.type == ErrorRecoveryType.WAIT_FOR_RECOVERY
+                or action.type == ErrorRecoveryType.IGNORE_AND_CONTINUE
+            ):
                 other_command_ids_to_fail = []
             else:
                 assert_never(action.type)
diff --git a/robot-server/robot_server/runs/error_recovery_mapping.py b/robot-server/robot_server/runs/error_recovery_mapping.py
@@ -0,0 +1,46 @@
+"""Functions used for managing error recovery policy."""
+from typing import Optional
+from opentrons.protocol_engine.state.config import Config
+from robot_server.runs.error_recovery_models import ErrorRecoveryRule, ReactionIfMatch
+from opentrons.protocol_engine.commands.command_unions import (
+    Command,
+    CommandDefinedErrorData,
+)
+from opentrons.protocol_engine.error_recovery_policy import (
+    ErrorRecoveryPolicy,
+    ErrorRecoveryType,
+    standard_run_policy,
+)
+
+
+def create_error_recovery_policy_from_rules(
+    rules: list[ErrorRecoveryRule],
+) -> ErrorRecoveryPolicy:
+    """Given a list of error recovery rules return an error recovery policy."""
+
+    def _policy(
+        config: Config,
+        failed_command: Command,
+        defined_error_data: Optional[CommandDefinedErrorData],
+    ) -> ErrorRecoveryType:
+        for rule in rules:
+            for i, criteria in enumerate(rule.matchCriteria):
+                command_type_matches = (
+                    failed_command.commandType == criteria.command.commandType
+                )
+                error_type_matches = (
+                    defined_error_data is not None
+                    and defined_error_data.public.errorType
+                    == criteria.command.error.errorType
+                )
+                if command_type_matches and error_type_matches:
+                    if rule.ifMatch[i] == ReactionIfMatch.IGNORE_AND_CONTINUE:
+                        raise NotImplementedError  # No protocol engine support for this yet. It's in EXEC-302.
+                    elif rule.ifMatch[i] == ReactionIfMatch.FAIL_RUN:
+                        return ErrorRecoveryType.FAIL_RUN
+                    elif rule.ifMatch[i] == ReactionIfMatch.WAIT_FOR_RECOVERY:
+                        return ErrorRecoveryType.WAIT_FOR_RECOVERY
+
+        return standard_run_policy(config, failed_command, defined_error_data)
+
+    return _policy
diff --git a/robot-server/robot_server/runs/error_recovery_models.py b/robot-server/robot_server/runs/error_recovery_models.py
@@ -0,0 +1,69 @@
+"""Request and response models for dealing with error recovery policies."""
+from enum import Enum
+from pydantic import BaseModel, Field
+
+
+class ReactionIfMatch(Enum):
+    """The type of the error recovery setting.
+
+    * `"ignoreAndContinue"`: Ignore this error and future errors of the same type.
+    * `"failRun"`: Errors of this type should fail the run.
+    * `"waitForRecovery"`: Instances of this error should initiate a recover operation.
+
+    """
+
+    IGNORE_AND_CONTINUE = "ignoreAndContinue"
+    FAIL_RUN = "failRun"
+    WAIT_FOR_RECOVERY = "waitForRecovery"
+
+
+# There's a lot of nested classes here. This is the JSON schema this code models.
+# "ErrorRecoveryRule": {
+#     "matchCriteria": {
+#         "command": {
+#             "commandType": "foo",
+#             "error": {
+#                 "errorType": "bar"
+#             }
+#         }
+#     },
+#     "ifMatch": "baz"
+# }
+
+
+class ErrorMatcher(BaseModel):
+    """The error type that this rule applies to."""
+
+    errorType: str = Field(..., description="The error type that this rule applies to.")
+
+
+class CommandMatcher(BaseModel):
+    """Command/error data used for matching rules."""
+
+    commandType: str = Field(
+        ..., description="The command type that this rule applies to."
+    )
+    error: ErrorMatcher = Field(
+        ..., description="The error details that this rule applies to."
+    )
+
+
+class MatchCriteria(BaseModel):
+    """The criteria that this rule will attempt to match."""
+
+    command: CommandMatcher = Field(
+        ..., description="The command and error types that this rule applies to."
+    )
+
+
+class ErrorRecoveryRule(BaseModel):
+    """Request/Response model for new error recovery rule creation."""
+
+    matchCriteria: list[MatchCriteria] = Field(
+        default_factory=list,
+        description="The criteria that must be met for this rule to be applied.",
+    )
+    ifMatch: list[ReactionIfMatch] = Field(
+        default_factory=list,
+        description="The specific recovery setting that will be in use if the type parameters match.",
+    )
diff --git a/robot-server/robot_server/service/errors.py b/robot-server/robot_server/service/errors.py
@@ -1,5 +1,6 @@
 # TODO(mc, 2021-05-10): delete this file; these models have been moved to
 # robot_server/errors/error_responses.py and robot_server/errors/global_errors.py
+# Note: (2024-07-18): this file does not actually seem to be safe to delete
 from dataclasses import dataclass, asdict
 from enum import Enum
 from typing import Any, Dict, Optional, Sequence, Tuple
diff --git a/robot-server/tests/runs/test_error_recovery_mapping.py b/robot-server/tests/runs/test_error_recovery_mapping.py
@@ -0,0 +1,118 @@
+"""Unit tests for `error_recovery_mapping`."""
+import pytest
+from decoy import Decoy
+
+
+from opentrons.protocol_engine.commands.pipetting_common import (
+    LiquidNotFoundError,
+    LiquidNotFoundErrorInternalData,
+)
+from opentrons.protocol_engine.commands.command import (
+    DefinedErrorData,
+)
+from opentrons.protocol_engine.commands.command_unions import CommandDefinedErrorData
+from opentrons.protocol_engine.commands.liquid_probe import LiquidProbe
+from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryType
+from opentrons.protocol_engine.state.config import Config
+from opentrons.protocol_engine.types import DeckType
+from robot_server.runs.error_recovery_mapping import (
+    create_error_recovery_policy_from_rules,
+)
+from robot_server.runs.error_recovery_models import (
+    ErrorRecoveryRule,
+    MatchCriteria,
+    CommandMatcher,
+    ErrorMatcher,
+    ReactionIfMatch,
+)
+
+
+@pytest.fixture
+def mock_command(decoy: Decoy) -> LiquidProbe:
+    """Get a mock PickUpTip command."""
+    mock = decoy.mock(cls=LiquidProbe)
+    decoy.when(mock.commandType).then_return("liquidProbe")
+    return mock
+
+
+@pytest.fixture
+def mock_error_data(decoy: Decoy) -> CommandDefinedErrorData:
+    """Get a mock TipPhysicallyMissingError."""
+    mock = decoy.mock(
+        cls=DefinedErrorData[LiquidNotFoundError, LiquidNotFoundErrorInternalData]
+    )
+    mock_lnfe = decoy.mock(cls=LiquidNotFoundError)
+    decoy.when(mock.public).then_return(mock_lnfe)
+    decoy.when(mock_lnfe.errorType).then_return("liquidNotFound")
+    return mock
+
+
+@pytest.fixture
+def mock_criteria(decoy: Decoy) -> MatchCriteria:
+    """Get a mock Match Criteria."""
+    mock = decoy.mock(cls=MatchCriteria)
+    mock_command = decoy.mock(cls=CommandMatcher)
+    decoy.when(mock_command.commandType).then_return("liquidProbe")
+    mock_error_matcher = decoy.mock(cls=ErrorMatcher)
+    decoy.when(mock_error_matcher.errorType).then_return("liquidNotFound")
+    decoy.when(mock.command).then_return(mock_command)
+    decoy.when(mock_command.error).then_return(mock_error_matcher)
+    return mock
+
+
+@pytest.fixture
+def mock_rule(decoy: Decoy, mock_criteria: MatchCriteria) -> ErrorRecoveryRule:
+    """Get a mock ErrorRecoveryRule."""
+    mock = decoy.mock(cls=ErrorRecoveryRule)
+    decoy.when(mock.ifMatch).then_return([ReactionIfMatch.IGNORE_AND_CONTINUE])
+    decoy.when(mock.matchCriteria).then_return([mock_criteria])
+    return mock
+
+
+def test_create_error_recovery_policy_with_rules(
+    decoy: Decoy,
+    mock_command: LiquidProbe,
+    mock_error_data: CommandDefinedErrorData,
+    mock_rule: ErrorRecoveryRule,
+) -> None:
+    """Should return IGNORE_AND_CONTINUE if that's what we specify as the rule."""
+    policy = create_error_recovery_policy_from_rules([mock_rule])
+    exampleConfig = Config(
+        robot_type="OT-3 Standard",
+        deck_type=DeckType.OT3_STANDARD,
+    )
+    with pytest.raises(NotImplementedError):
+        policy(exampleConfig, mock_command, mock_error_data)
+
+
+def test_create_error_recovery_policy_undefined_error(
+    decoy: Decoy, mock_command: LiquidProbe
+) -> None:
+    """Should return a FAIL_RUN policy when error is not defined."""
+    rule1 = ErrorRecoveryRule(matchCriteria=[], ifMatch=[])
+
+    policy = create_error_recovery_policy_from_rules([rule1])
+    exampleConfig = Config(
+        robot_type="OT-3 Standard",
+        deck_type=DeckType.OT3_STANDARD,
+    )
+
+    assert policy(exampleConfig, mock_command, None) == ErrorRecoveryType.FAIL_RUN
+
+
+def test_create_error_recovery_policy_defined_error(
+    decoy: Decoy, mock_command: LiquidProbe, mock_error_data: CommandDefinedErrorData
+) -> None:
+    """Should return a WAIT_FOR_RECOVERY policy when error is defined."""
+    rule1 = ErrorRecoveryRule(matchCriteria=[], ifMatch=[])
+
+    policy = create_error_recovery_policy_from_rules([rule1])
+    exampleConfig = Config(
+        robot_type="OT-3 Standard",
+        deck_type=DeckType.OT3_STANDARD,
+    )
+
+    assert (
+        policy(exampleConfig, mock_command, mock_error_data)
+        == ErrorRecoveryType.WAIT_FOR_RECOVERY
+    )