forked from aniemerg/atropos
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtype_definitions.py
More file actions
109 lines (76 loc) · 2.97 KB
/
type_definitions.py
File metadata and controls
109 lines (76 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from typing import Any, Dict, List, Literal, Optional
from openai.types.chat import ChatCompletionContentPartParam
from typing_extensions import TypedDict
Content = str | list[ChatCompletionContentPartParam]
Item = Any
number = int | float
UUID = str
class Message(TypedDict):
role: Literal["system", "user", "assistant", "tool"]
content: Content
reward: Optional[float]
class AgentStep(TypedDict, total=False):
"""Represents a single step in an agent's history.
Attributes:
step: The step number.
messages: A list of messages exchanged during the step.
reward: The reward received at this step.
"""
step: int
messages: List[Message]
reward: float
# AgentHistory maps agent ids (e.g. "Player 1", "Player 2") to their respective list of steps.
AgentHistory = Dict[str, List[AgentStep]]
class Observation(TypedDict):
"""Represents an observation in a game history.
Attributes:
raw: The raw observation data (as a dictionary).
rendered: The rendered string of the observation suitable for input into an LLM.
"""
raw: Dict[str, Any]
rendered: Content
class GameStep(TypedDict):
"""Represents a single step in a game history. Essentially an (s,a,r) triple with metadata.
Attributes:
step: The step number.
agent: The agent who took the action (optional for final steps).
observation: The observation at this step.
action: The action taken by the agent (if any).
reward: The reward received; can be a float or a dictionary mapping agent names to rewards.
done: A flag indicating whether the game has ended after this step.
info: Additional information related to the step.
"""
step: int
agent_id: str
observation: Observation
action: str
reward: float | Dict[str, float]
done: bool
info: Dict[str, Any]
# GameHistory is represented as a list of game steps.
GameHistory = List[GameStep]
class EvaluationConfigGeneral(TypedDict):
"""Configuration section of evaluation results."""
total_evaluation_time_secondes: str
model_name: Optional[str]
generation_parameters: Dict[str, Any]
class EvaluationResults(TypedDict):
"""Results section containing metrics for tasks and aggregated results."""
all: Dict[str, float] # Aggregated metrics across all tasks
class EvaluationMetrics(TypedDict):
"""Complete evaluation metrics JSON structure."""
config_general: EvaluationConfigGeneral
results: EvaluationResults
class EvaluationSample(TypedDict, total=False):
"""Individual sample data written to JSONL files.
All fields are optional to accommodate different evaluation scenarios.
"""
messages: Optional[List[Dict[str, str]]]
question: Optional[str]
gold_answer: Optional[str]
gold_parsed: Optional[str]
model_parsed: Optional[str]
score: Optional[int]
correct: Optional[bool]
finish_reason: Optional[str]
response_after_think: Optional[str]