Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions connector_builder_agents/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ dependencies = [
"openai-agents>=0.2.11",
"mcp-agent>=0.1.15", # Transitive dependency of openai-agents
"pydantic>=2.0.0", # For phase data models
"openai>=1.0.0", # For OpenAI evals integration
]

[project.scripts]
airbyte-connector-builder-agents = "connector_builder_agents.src.cli:main"
airbyte-connector-evals = "connector_builder_agents.src.eval_cli:main"

[dependency-groups]
dev = [
"poethepoet>=0.37.0",
"pytest>=7.0.0",
]
43 changes: 33 additions & 10 deletions connector_builder_agents/src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,32 @@
import time
from pathlib import Path

from agents import (
set_default_openai_api,
set_default_openai_client,
set_tracing_disabled,
)

try:
from agents import (
set_default_openai_api,
set_default_openai_client,
set_tracing_disabled,
)
except ImportError:

def set_default_openai_api(*args, **kwargs):
pass

def set_default_openai_client(*args, **kwargs):
pass

def set_tracing_disabled(*args, **kwargs):
pass


from dotenv import load_dotenv
from openai import AsyncOpenAI


try:
from openai import AsyncOpenAI
except ImportError:
AsyncOpenAI = None


# Initialize env vars:
Expand Down Expand Up @@ -80,10 +99,14 @@ def initialize_models() -> None:
os.environ["OPENAI_API_KEY"] = openai_api_key

print(f"ℹ️ Using Custom OpenAI-Compatible LLM Endpoint: {OPENAI_BASE_URL}")
github_models_client = AsyncOpenAI(
base_url=OPENAI_BASE_URL,
api_key=os.environ.get("OPENAI_API_KEY", None),
)
if AsyncOpenAI is not None:
github_models_client = AsyncOpenAI(
base_url=OPENAI_BASE_URL,
api_key=os.environ.get("OPENAI_API_KEY", None),
)
else:
print("⚠️ OpenAI package not available. Skipping client initialization.")
return
set_default_openai_client(
github_models_client,
use_for_tracing=False,
Expand Down
129 changes: 129 additions & 0 deletions connector_builder_agents/src/eval_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""CLI commands for managing OpenAI evals integration."""

import argparse
import json
import sys

from .eval_workflow_integration import EvalWorkflowManager
from .evals_integration import (
DEFAULT_GOLDEN_EXAMPLES,
ConnectorReadinessEvaluator,
create_test_data_jsonl,
)


def create_eval_definition_command(args):
"""Create an OpenAI eval definition."""
evaluator = ConnectorReadinessEvaluator()

try:
eval_id = evaluator.create_eval_definition(
eval_name=args.name, description=args.description
)
print(f"✅ Created eval definition: {eval_id}")
return 0
except Exception as e:
print(f"❌ Failed to create eval definition: {e}")
return 1


def create_test_data_command(args):
"""Create JSONL test data from golden examples."""
try:
create_test_data_jsonl(golden_examples=DEFAULT_GOLDEN_EXAMPLES, output_path=args.output)
print(f"✅ Created test data: {args.output}")
return 0
except Exception as e:
print(f"❌ Failed to create test data: {e}")
return 1


def run_evaluation_command(args):
"""Run evaluation on a readiness report."""
manager = EvalWorkflowManager()

try:
evaluation_result = manager.run_evaluation(api_name=args.api_name, trace_id=args.trace_id)

if evaluation_result:
summary = manager.generate_evaluation_summary(evaluation_result)
print(summary)

if args.output:
with open(args.output, "w") as f:
json.dump(evaluation_result, f, indent=2, default=str)
print(f"\n📄 Detailed results saved to: {args.output}")

return 0
else:
print("❌ Evaluation failed - no readiness report found or evaluation error")
return 1

except Exception as e:
print(f"❌ Evaluation failed: {e}")
return 1


def list_golden_examples_command(args):
"""List available golden examples."""
print("📋 Available Golden Examples:")
print("=" * 50)

for i, golden in enumerate(DEFAULT_GOLDEN_EXAMPLES, 1):
print(f"\n{i}. {golden.api_name}")
print(f" Description: {golden.description}")
print(f" Expected Streams: {', '.join(golden.expected_streams)}")
print(f" Min Records: {golden.min_records_per_stream}")
print(f" Max Warnings: {golden.max_acceptable_warnings}")

return 0


def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="OpenAI Evals integration for connector readiness evaluation"
)
subparsers = parser.add_subparsers(dest="command", help="Available commands")

create_eval_parser = subparsers.add_parser(
"create-eval", help="Create an OpenAI eval definition"
)
create_eval_parser.add_argument(
"--name", default="connector-readiness-evaluation", help="Name for the eval definition"
)
create_eval_parser.add_argument(
"--description",
default="Evaluates connector readiness reports against golden examples",
help="Description for the eval definition",
)
create_eval_parser.set_defaults(func=create_eval_definition_command)

test_data_parser = subparsers.add_parser(
"create-test-data", help="Create JSONL test data from golden examples"
)
test_data_parser.add_argument(
"--output", default="eval_test_data.jsonl", help="Output path for JSONL test data"
)
test_data_parser.set_defaults(func=create_test_data_command)

eval_parser = subparsers.add_parser("evaluate", help="Run evaluation on readiness report")
eval_parser.add_argument("--api-name", help="API name for golden example selection")
eval_parser.add_argument("--trace-id", help="Trace ID for correlation")
eval_parser.add_argument("--output", help="Output path for detailed results JSON")
eval_parser.set_defaults(func=run_evaluation_command)

list_parser = subparsers.add_parser("list-golden", help="List available golden examples")
list_parser.set_defaults(func=list_golden_examples_command)

args = parser.parse_args()

if not args.command:
parser.print_help()
return 1

return args.func(args)


if __name__ == "__main__":
sys.exit(main())
Loading
Loading