-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodels.py
More file actions
91 lines (71 loc) · 3.99 KB
/
models.py
File metadata and controls
91 lines (71 loc) · 3.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from __future__ import annotations
from typing import Any, Literal
from openenv.core.env_server.types import Action, Observation, State
from pydantic import BaseModel, Field
Difficulty = Literal["easy", "medium", "hard"]
Urgency = Literal["low", "medium", "high"]
RiskLevel = Literal["low", "medium", "high"]
ActionChoice = Literal[
"route_it",
"route_finance",
"escalate",
"reply_with_template",
"resolve",
]
class TaskMetadata(BaseModel):
task_id: str = Field(..., description="Stable task identifier")
id: str = Field(..., description="Alias for task_id for spec compliance")
difficulty: Difficulty = Field(..., description="Task difficulty bucket")
title: str = Field(..., description="Short task name")
name: str = Field(..., description="Alias for title for spec compliance")
prompt: str = Field(..., description="Task prompt shown to the agent")
urgency: Urgency = Field(..., description="Operational urgency of the task")
compliance_risk: RiskLevel = Field(..., description="Risk of violating policy or legal process")
business_impact: RiskLevel = Field(..., description="Business impact if the task is mishandled")
tags: list[str] = Field(default_factory=list, description="Extra descriptors for analytics and demos")
expected_action: ActionChoice = Field(..., description="Ground-truth action for grading")
max_reward: float = Field(..., description="Maximum reward available for this task")
grader: str | None = Field(default=None, description="Optional grader entrypoint for validator task discovery")
partial_credit: dict[ActionChoice, float] = Field(
default_factory=dict,
description="Optional deterministic partial-credit actions scored between 0.0 and 1.0",
)
class InboxOpsAction(Action):
choice: str = Field(..., description="Action selected by the agent")
class InboxOpsReward(BaseModel):
value: float = Field(..., description="Reward returned for the last action")
max_value: float = Field(..., description="Maximum reward available for the task")
correct: bool = Field(..., description="Whether the selected action was correct")
difficulty: Difficulty = Field(..., description="Difficulty level for the graded task")
reason: str = Field(..., description="Short deterministic grading explanation")
class InboxOpsObservation(Observation):
task_id: str = Field(default="", description="Current task identifier")
difficulty: Difficulty | None = Field(default=None, description="Current task difficulty")
title: str = Field(default="", description="Current task title")
prompt: str = Field(default="", description="Task prompt shown to the agent")
choices: list[ActionChoice] = Field(default_factory=list, description="Available actions")
remaining_tasks: int = Field(default=0, description="Tasks remaining in the episode")
class InboxOpsStepResult(BaseModel):
observation: InboxOpsObservation
reward: InboxOpsReward
done: bool
info: dict[str, Any] = Field(default_factory=dict)
class InboxOpsState(State):
current_task_index: int = Field(default=0, description="Zero-based active task index")
total_tasks: int = Field(default=0, description="Total number of tasks in the episode")
completed_tasks: int = Field(default=0, description="Tasks already graded")
total_reward: float = Field(default=0.0, description="Accumulated episode reward")
active_task_id: str | None = Field(default=None, description="Current active task id")
last_action: str | None = Field(default=None, description="Most recent action value")
last_reward: float = Field(default=0.0, description="Most recent scalar reward")
last_error: str | None = Field(default=None, description="Most recent safe error message")
class CounterfactualActionScore(BaseModel):
action: ActionChoice
reward: float
correct: bool
reason: str
class CounterfactualAnalysis(BaseModel):
task_id: str
title: str
expected_action: ActionChoice
scores: list[CounterfactualActionScore]