InboxOps/inference.py at main · Rajal-ui/InboxOps · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
from __future__ import annotations

import os
import sys
from typing import List

from openai import OpenAI
from dotenv import load_dotenv

from client import build_openai_client
from server.environment import InboxOpsEnvironment
from server.tasks import TASKS, is_valid_action


BENCHMARK = "inboxops"
TASK_NAME = "ops_triage"


load_dotenv()


def _bool_text(value: bool) -> str:
    return "true" if value else "false"


def _clamp(value: float) -> float:
    # Hackathon requirement: 0.0 and 1.0 are invalid.
    return max(0.01, min(0.99, value))


def _format_reward(value: float) -> str:
    return f"{_clamp(value):.2f}"


def _episode_score(total_reward: float) -> float:
    task_count = len(TASKS)
    raw_score = (total_reward / task_count) if task_count else 0.0
    return _clamp(raw_score)


def _select_action(observation: dict) -> str:
    task_id = observation.get("task_id", "")
    policy = {
        "task_easy": "route_it",
        "task_medium": "escalate",
        "task_hard": "reply_with_template",
    }
    return policy.get(task_id, "resolve")


def _llm_enabled() -> bool:
    return os.getenv("NO_LLM", "").strip().lower() not in {"1", "true", "yes"}


def _build_client() -> OpenAI:
    api_base_url = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
    model_name = os.getenv("MODEL_NAME", "gpt-4o-mini")
    api_key = os.getenv("HF_TOKEN")
    if not api_key:
        raise RuntimeError("HF_TOKEN is required")

    return build_openai_client(
        api_base_url=api_base_url,
        model_name=model_name,
        api_key=api_key,
    )


def _build_decision_prompt(observation: dict) -> str:
    metadata = observation.get("metadata", {})
    tags = ", ".join(metadata.get("tags", [])) or "none"
    choices = ", ".join(observation.get("choices", [])) or "none"
    return (
        "Choose the single best next action for this inbox-operations task.\n"
        f"Task ID: {observation.get('task_id', '')}\n"
        f"Title: {observation.get('title', '')}\n"
        f"Difficulty: {observation.get('difficulty', '')}\n"
        f"Prompt: {observation.get('prompt', '')}\n"
        f"Urgency: {metadata.get('urgency', 'unknown')}\n"
        f"Compliance risk: {metadata.get('compliance_risk', 'unknown')}\n"
        f"Business impact: {metadata.get('business_impact', 'unknown')}\n"
        f"Tags: {tags}\n"
        f"Allowed actions: {choices}\n"
        "Return exactly one action string from the allowed actions and nothing else."
    )


def _select_action_with_llm(client: OpenAI, observation: dict) -> str:
    model_name = os.getenv("MODEL_NAME", "gpt-4o-mini")
    completion = client.chat.completions.create(
        model=model_name,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are an operations triage agent. "
                    "Respond with exactly one allowed action string."
                ),
            },
            {"role": "user", "content": _build_decision_prompt(observation)},
        ],
        temperature=0,
        max_tokens=16,
        stream=False,
    )
    action = (completion.choices[0].message.content or "").strip().lower()
    if not is_valid_action(action):
        raise RuntimeError(f"invalid llm action: {action or 'empty'}")
    return action


def _emit_warning(message: str) -> None:
    print(f"[WARN] {message}", file=sys.stderr, flush=True)


def main() -> int:
    model_name = os.getenv("MODEL_NAME", "gpt-4o-mini")

    env = InboxOpsEnvironment()
    client: OpenAI | None = None
    try:
        client = _build_client() if _llm_enabled() else None
    except Exception as exc:
        _emit_warning(f"Failed to build client: {exc}")
        client = None

    for task in TASKS:
        rewards: List[float] = []
        steps = 0
        total_reward = 0.0

        print(f"[START] task={task.task_id} env={BENCHMARK} model={model_name}", flush=True)

        try:
            observation, _info = env.reset(seed=0, task_id=task.task_id)
            done = bool(observation.get("done", False))

            while not done:
                if client is None:
                    action = _select_action(observation)
                else:
                    try:
                        action = _select_action_with_llm(client, observation)
                    except Exception as exc:
                        _emit_warning(f"llm action selection failed, falling back to deterministic policy: {exc}")
                        client = None
                        action = _select_action(observation)

                observation, reward, done, info = env.step(action)

                clamped_reward = _clamp(reward)
                steps += 1
                total_reward += clamped_reward
                rewards.append(clamped_reward)

                last_error = info.get("error")
                error_text = str(last_error) if last_error else "null"
                print(
                    f"[STEP] step={steps} action={action} reward={clamped_reward:.2f} "
                    f"done={_bool_text(done)} error={error_text}",
                    flush=True,
                )

            final_score = sum(rewards) / len(rewards) if rewards else 0.0
            rewards_str = ",".join(f"{r:.2f}" for r in rewards)
            print(
                f"[END] success=true steps={steps} score={final_score:.3f} rewards={rewards_str}",
                flush=True,
            )
        except Exception as exc:
            _emit_warning(f"inference loop failed for task {task.task_id}: {exc}")
            final_score = sum(rewards) / len(rewards) if rewards else 0.0
            rewards_str = ",".join(f"{r:.2f}" for r in rewards) if rewards else "0.0"
            print(
                f"[END] success=false steps={steps} score={final_score:.3f} rewards={rewards_str}",
                flush=True,
            )

    return 0


if __name__ == "__main__":
    raise SystemExit(main())