-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpyproject.toml
More file actions
127 lines (114 loc) · 4.31 KB
/
pyproject.toml
File metadata and controls
127 lines (114 loc) · 4.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
[project]
name = "llm-evaluation-system"
dynamic = ["version"]
description = "MCP server for agentic LLM evaluation: jury scoring, agent tracing via OpenTelemetry, document-grounded QA generation, PDF reports."
readme = "README.md"
requires-python = ">=3.12,<3.15"
license = { text = "Apache-2.0" }
authors = [{ name = "AWS" }]
keywords = ["mcp", "llm", "evaluation", "inspect-ai", "bedrock", "opentelemetry", "claude-code"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
# Core MCP + eval engine
"mcp>=1.0.0",
"inspect-ai>=0.3.0",
"boto3>=1.35.0",
"click>=8.1.7",
"rich>=13.7.0",
"prompt-toolkit>=3.0.43",
"pydantic>=2.0.0",
"anyio>=4.0.0",
# OpenTelemetry — required for capturing Bedrock calls in agentic evals.
# `proto` is needed by the in-harness OTLP receiver that consumes spans
# from subprocess-isolated agents (eval_mcp/otlp_receiver.py). The HTTP
# OTLP exporter is what the agent subprocess uses to ship spans back
# (set via OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf). `distro` is what
# makes `opentelemetry-instrument` actually run the instrumentations
# — without it, the CLI is a no-op even though the entry points exist.
"opentelemetry-instrumentation-botocore>=0.50b0",
"opentelemetry-sdk>=1.30",
"opentelemetry-proto>=1.30",
"opentelemetry-exporter-otlp-proto-http>=1.30",
"opentelemetry-distro>=0.50b0",
# Local viewer + HTTP MCP transport
"fastapi>=0.104.1",
"uvicorn[standard]>=0.24.0",
"httpx>=0.27.0",
# PDF reports + document QA generation
"pypdf>=4.0.0",
"fpdf2>=2.8.0",
# TOML round-trip for the Codex installer (preserves user comments
# and key order when merging the [mcp_servers.eval] table).
"tomlkit>=0.13.0",
]
[project.optional-dependencies]
# Non-Bedrock model provider SDKs. Inspect AI imports each one lazily, so
# only install the extra if you plan to evaluate models from those providers.
providers = [
"openai>=2.26.0",
"anthropic>=0.80.0",
"google-genai>=1.69.0",
"azure-identity",
"azure-ai-inference",
"groq>=0.28.0",
# Note: mistralai is intentionally omitted — the package name is not
# published on PyPI (returns 404), so any constraint here makes
# `uv lock` unsatisfiable. Inspect AI imports it lazily, so users who
# actually need Mistral support can install whatever fork they prefer.
]
# Container-sandbox path for evaluating containerized agents on Kubernetes.
k8s-sandbox = ["inspect-k8s-sandbox>=0.4.0"]
# Postgres-backed chat history for the EKS web app. Not used by the MCP
# itself — only needed by the FastAPI chat backend in `backend/api/`.
backend = ["asyncpg>=0.30.0"]
dev = [
"watchfiles>=1.0.0",
"pytest>=9.0.3",
"pytest-asyncio>=0.21.0",
"black>=23.0.0",
"ruff>=0.1.0",
"mypy>=1.5.0",
]
[project.scripts]
eval-mcp = "eval_mcp.cli:main"
eval-chat = "backend.core.main:main"
[build-system]
requires = ["setuptools>=68.0.0", "setuptools-scm>=8.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.setuptools_scm]
# Version is derived from the latest `v*` git tag. When building on a
# tagged commit the version is clean (e.g. "0.3.5"); off-tag commits
# get a dev version (e.g. "0.3.6.dev3"). `local_scheme = "no-local-version"`
# strips the "+gHASH.dDATE" suffix so the result is always PyPI-acceptable
# (PEP 440 forbids local version identifiers on public indexes).
# `fallback_version` covers sdist builds and shallow clones where git
# history isn't available.
local_scheme = "no-local-version"
fallback_version = "0.0.0"
[tool.setuptools.packages.find]
where = ["."]
include = ["backend*", "eval_mcp*"]
[tool.setuptools.package-data]
eval_mcp = ["viewer_static/**/*", "provider_pricing.json", "INSTALL.md"]
"eval_mcp.core" = ["provider_pricing.json"]
[tool.black]
line-length = 100
target-version = ["py312"]
[tool.ruff]
line-length = 100
target-version = "py312"
[tool.mypy]
python_version = "3.12"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]