Skip to content

Commit 48ac304

Browse files
Merge pull request #31 from comet-ml/feat/opik-environment-support
feat: add OPIK_ENVIRONMENT support for dev/test/prod trace separation
2 parents 54950d9 + 863462b commit 48ac304

9 files changed

Lines changed: 29 additions & 15 deletions

File tree

.env.example

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,14 @@ ISSUE_NUMBER=
2121
OPIK_API_KEY=
2222
OPIK_WORKSPACE=
2323

24+
# Opik environment tag — separates traces by context in the Opik UI.
25+
# Convention:
26+
# dev — local development and offline eval runs
27+
# test — test suite runs (set automatically by run_test_suite.py)
28+
# staging — UAT / pre-production validation
29+
# prod — GitHub Action (production triage)
30+
OPIK_ENVIRONMENT=dev
31+
2432
# Opik project for eval traces — defaults to scout:{SCOUT_GITHUB_REPO_OWNER}/{SCOUT_GITHUB_REPO_NAME}
2533
# SCOUT_EVAL_OPIK_PROJECT=
2634

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ jobs:
7373
SCOUT_GITHUB_REPO_NAME: ${{ vars.SCOUT_GITHUB_REPO_NAME }}
7474
OPIK_API_KEY: ${{ secrets.OPIK_API_KEY }}
7575
OPIK_WORKSPACE: ${{ vars.OPIK_WORKSPACE }}
76+
OPIK_ENVIRONMENT: prod
7677
ISSUE_NUMBER: ${{ github.event.issue.number || github.event.inputs.issue_number }}
7778
```
7879
@@ -93,6 +94,7 @@ The GitHub App must have these permissions:
9394
| `SCOUT_ESCALATION_TAG` | no | Label for escalated issues (default: `Escalated request`) |
9495
| `OPIK_API_KEY` | **yes** | Opik API key. Opik is required — Scout sources its system prompt from Opik and traces every run there. |
9596
| `OPIK_WORKSPACE` | **yes** | Opik workspace name |
97+
| `OPIK_ENVIRONMENT` | no | Tags traces by environment in the Opik UI. Convention: `dev` (local), `test` (test suite — set automatically), `staging` (UAT), `prod` (GitHub Action). |
9698
| `SCOUT_FEEDBACK_SINCE_DAYS` | no | Feedback sync only: how many days back to scan issues for 👍/👎 reactions (default: `7`) |
9799
| `ISSUE_NUMBER` | no | Override issue number (auto-detected from event payload) |
98100
| `SCOUT_MODEL` | no | Anthropic model ID (default: `claude-sonnet-4-6`) |

evals/README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@ ANTHROPIC_API_KEY= # required — Scout agent and LLM judge both use Claud
1818
GITHUB_TOKEN= # required — fetching issues and real-GitHub file mode during evals
1919
OPIK_API_KEY= # required — logging traces and reading datasets
2020
OPIK_WORKSPACE= # required — your Opik workspace name
21+
OPIK_ENVIRONMENT=dev # dev (local + offline evals), test (test suite — auto-set), staging (UAT), prod (GitHub Action)
2122
SCOUT_GITHUB_REPO_OWNER= # required — repo to fetch issues from
2223
SCOUT_GITHUB_REPO_NAME= # required — repo to fetch issues from
23-
SCOUT_EVAL_OPIK_PROJECT= # Optional — defaults to scout:{SCOUT_GITHUB_REPO_OWNER}/{SCOUT_GITHUB_REPO_NAME}
24+
SCOUT_EVAL_OPIK_PROJECT= # optional — defaults to scout:{SCOUT_GITHUB_REPO_OWNER}/{SCOUT_GITHUB_REPO_NAME}
2425
```
2526

2627
---
@@ -99,6 +100,8 @@ python evals/run_test_suite.py
99100

100101
Pass rate is printed on completion and visible in the Opik dashboard.
101102

103+
> **Environment convention:** `run_test_suite.py` automatically defaults to `OPIK_ENVIRONMENT=test` — no manual setup needed. Offline eval runs use `dev` from `.env`. Override either by setting `OPIK_ENVIRONMENT` in the shell before running (e.g. `OPIK_ENVIRONMENT=staging` for UAT). The GitHub Action sets `prod`.
104+
102105
---
103106

104107
## Simulation modes

evals/run_offline_eval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
from opik.evaluation.metrics import Usefulness
3737
from dotenv import load_dotenv
3838

39-
load_dotenv(override=True)
39+
load_dotenv()
4040

4141
from scout.agent import make_client, run_agent # noqa: E402
4242
from scout.providers.scenarios import build # noqa: E402
@@ -83,7 +83,7 @@ def eval_task(item: dict) -> dict:
8383
sim = build(scenario, spec)
8484
logger.info("scenario=%s target=#%d", scenario, target)
8585

86-
comment, _trace_id = run_agent(
86+
comment, _ = run_agent(
8787
sim,
8888
target,
8989
client=client,

evals/run_test_suite.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
import opik
3131
from dotenv import load_dotenv
3232

33-
load_dotenv(override=True)
33+
os.environ.setdefault("OPIK_ENVIRONMENT", "test")
34+
load_dotenv()
3435

3536
from scout.agent import make_client, run_agent # noqa: E402
3637
from scout.providers.scenarios import build # noqa: E402

evals/utils/fetch_github_issues.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
from dotenv import load_dotenv
3434
from github import Github
3535

36-
load_dotenv(override=True)
36+
load_dotenv()
3737

3838

3939
def fetch_issue(issue_obj) -> dict:

evals/utils/seed_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
import opik
3131
from dotenv import load_dotenv
3232

33-
load_dotenv(override=True)
33+
load_dotenv()
3434

3535
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
3636
logger = logging.getLogger(__name__)

evals/utils/seed_test_suite.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
import opik
3434
from dotenv import load_dotenv
3535

36-
load_dotenv(override=True)
36+
load_dotenv()
3737

3838
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
3939
logger = logging.getLogger(__name__)
@@ -77,7 +77,7 @@ def items_from_github(path: str) -> tuple[list[dict], str, str]:
7777
return items, owner, name
7878

7979

80-
def items_from_starter() -> list[dict]:
80+
def items_from_starter() -> tuple[list[dict], list[str], dict]:
8181
from evals.utils.starter_scenarios import GLOBAL_ASSERTIONS, GLOBAL_EXECUTION_POLICY, STARTER_SCENARIOS
8282
items = []
8383
for s in STARTER_SCENARIOS:
@@ -91,11 +91,11 @@ def items_from_starter() -> list[dict]:
9191

9292
def _get_or_create_suite(client: opik.Opik, global_assertions, global_execution_policy):
9393
try:
94-
suite = client.get_test_suite(name=SUITE_NAME, project_name=SUITE_PROJECT)
95-
logger.info("Found existing test suite %r — reusing.", SUITE_NAME)
96-
return suite
94+
suite = client.get_test_suite(name=SUITE_NAME, project_name=SUITE_PROJECT)
95+
logger.info("Found existing test suite %r — reusing.", SUITE_NAME)
96+
return suite
9797
except Exception as e:
98-
logger.info("Test suite %r not found (%s) — creating it.", SUITE_NAME, type(e).__name__)
98+
logger.info("Test suite %r not found (%s) — creating it.", SUITE_NAME, type(e).__name__)
9999

100100
return client.create_test_suite(
101101
name=SUITE_NAME,
@@ -131,8 +131,8 @@ def main() -> None:
131131
all_items: list[dict] = []
132132

133133
if args.from_github:
134-
items, _owner, _name = items_from_github(args.from_github)
135-
all_items.extend(items)
134+
github_items, *_ = items_from_github(args.from_github)
135+
all_items.extend(github_items)
136136

137137
if args.from_starter:
138138
items, global_assertions, global_execution_policy = items_from_starter()

src/scout/triage.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def load_system_prompt() -> str:
281281
try:
282282
chat = client.get_chat_prompt(
283283
name=SCOUT_OPIK_PROMPT_NAME,
284-
version=version,
284+
commit=version,
285285
project_name=OPIK_PROJECT,
286286
)
287287
except PromptTemplateStructureMismatch:

0 commit comments

Comments
 (0)