holmesgpt/pyproject.toml at master · HolmesGPT/holmesgpt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
[tool.poetry]
name = "holmesgpt"
version = "0.0.0"
description = ""
authors = ["Natan Yellin <natan@robusta.dev>"]
readme = "README.md"
packages = [{ include = "holmes" }]

[tool.poetry.scripts]
holmes = "holmes.main:run"

[tool.poetry.dependencies]
python = "^3.10"
openai = "^2.8.0"
jiter = "!=0.13.0"  # Exclude 0.13.0 due to corrupted Windows wheel (RECORD file issue)
jinja2 = "^3.1.2"
typer = "^0.22.0"
typer-slim = "^0.22.0"
python-benedict = "^0.33.1"
humanize = "^4.9.0"
rich = "^13.7.1"
fastapi = "^0.121.1"
uvicorn = "^0.40"
pydantic = "^2.7"
supabase = "2.28.0"
colorlog = "^6.8.2"
strenum = "^0.4.15"
markdown = "^3.6"
certifi = "^2024.7.4"
boto3 = "^1.34.145"
cachetools = "^5.5.0"
bs4 = "^0.0.2"
markdownify = "^1.1.0"
opensearch-py = "^2.8.0"
backoff = "^2.2.1"
# Pin to stable release (check https://github.com/BerriAI/litellm/releases for updates)
litellm = "1.83.7"
sentry-sdk = {extras = ["fastapi"], version = "^2.20.0"}
confluent-kafka = "^2.6.1"
kubernetes = "^32.0.1"
mcp = "v1.25.0"
prompt-toolkit = "^3.0.51"
pygments = "^2.18.0"
azure-identity = "^1.23.0"
azure-core = "^1.34.0"
requests = "^2.32.4"
PyJWT = {extras = ["crypto"], version = "^2.8.0"}
azure-mgmt-sql = "^4.0.0b21"
pyodbc = "^5.0.1"
azure-monitor-query = "^1.2.0"
azure-mgmt-monitor = "^7.0.0b1"
azure-mgmt-alertsmanagement = "^1.0.0"
azure-mgmt-resource = "^23.3.0"
tenacity = "^9.1.2"
requests-aws4auth = "^1.3.1"
prometrix = "0.2.12"
httpx = {extras = ["socks"], version = "^0.28.1"}
ag-ui-protocol = "^0.1.9"
google-cloud-aiplatform = ">=1.133.0"
slack-sdk = "^3.39.0"

jq = "^1.10.0"
bashlex = "^0.18"
kopf = "^1.37.0"
apscheduler = "^3.10.4"

# Indirect dependencies to restrict vectors to address security vulnerability
starlette = "^0.49.1"
urllib3 = "^2.6.3"
sqlalchemy = "^2.0.46"
pg8000 = "^1.31.5"
pymysql = "^1.1.2"
clickhouse-sqlalchemy = "^0.3.2"
pymssql = "^2.3.0"
pymongo = "^4.7.0"
# Required for HTTP CONNECT proxy support in the websockets stdlib client
# (used by the conversation worker's Supabase Realtime subscription).
python-socks = {version = "^2.8.1", extras = ["asyncio"]}

[tool.poetry.group.otel]
optional = true

[tool.poetry.group.otel.dependencies]
opentelemetry-api = "^1.30.0"
opentelemetry-sdk = "^1.30.0"
opentelemetry-exporter-otlp-proto-grpc = "^1.30.0"
opentelemetry-instrumentation-httpx = ">=0.51b0"

[tool.poetry.group.dev]
optional = true

[tool.poetry.group.dev.dependencies]
mkdocs-material = "^9.5.39"
mkdocs-glightbox = "^0.4.0"
pytest = "^8.3.3"
pytest-xdist = "^3.6.1"
pytest-json-report = "^1.5.0"
ruff = "^0.7.3"
braintrust = "^0.1.2"
autoevals = "^0.0.129"
pre-commit = "^4.0.1"
responses = "^0.23.1"
freezegun = "^1.5.1"
tomli = {version = "^2.0.1", python = "<3.11"}
mypy = "^1.16.0"
pytest-cov = "^6.2.1"
types-python-dateutil = "^2.9.0.20250708"
pytest-dotenv = "^0.5.2"
pytest-sugar = "^1.1.1"
pytest-shared-session-scope = "^0.4.0"
mkdocs-awesome-nav = "^3.2.0"
mike = "^2.1.0"
pytest-asyncio = "^1.3.0"
respx = "^0.22.0"
# OpenTelemetry packages for Coralogix eval tests (send traces/metrics)
opentelemetry-api = "^1.30.0"
opentelemetry-sdk = "^1.30.0"
opentelemetry-exporter-otlp-proto-grpc = "^1.30.0"
opentelemetry-exporter-otlp-proto-http = "^1.20.0"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.mypy]
ignore_missing_imports = true
scripts_are_modules = true
exclude = [
    "tests/llm/fixtures/.*",
    "dist/.*",
    "site/.*",
    "experimental/.*",
]

[tool.pytest.ini_options]
asyncio_mode = "auto"
markers = [
    "llm: Evaluate LLM behaviour (prompt, tools, etc.)",
    "datetime: Tests involving datetime functionality",
    "logs: Tests involving log processing",
    "metrics: Tests involving metrics processing",
    "context_window: Tests involving context window handling",
    "network: Tests requiring network connectivity",
    "skills: Tests involving skill functionality",
    "chain-of-causation: Tests involving chain-of-causation analysis",
    "slackbot: Tests involving Slack bot functionality",
    "numerical: Tests involving numerical calculations or data",
    "counting: Ask holmes to count kubernetes/cloud resources",
    "kubernetes: Tests for Kubernetes-specific troubleshooting scenarios",
    "easy: Tests that are supposed to pass - if these fail, it indicates a regression. Only add evals here if they pass at least 20/20 runs.",
    "medium: Tests that we want to focus on in the near future and get holmes to pass them",
    "hard: Tests that are hard and holmes does not pass today and might not pass in the near future",
    "transparency: Holmes communicates to the user when it encounters problems fulfilling the user's request",
    "kafka: Tests involving Kafka functionality",
    "leaked-information: cases where the eval is accidentally leaking information that should not be available to the LLM - e.g. if the names of environment variables or the docker image gives away part of the test",
    "port-forward: Tests requiring port forwarding to local services (automatically added to tests with port_forwards in test_case.yaml)",
    "toolset-limitation: Tests that cannot be solved no matter how smart the model, unless we improve the underlying toolsets themselves",
    "database: Tests involving database interactions",
    "traces: Tests where the ai is expected to find the solution using the traces",
    "datadog: DataDog toolset",
    "storage: Disk related, like I/O or disk space",
    "question-answer: Simple question-answer tests where Holmes answers straightforward questions about the system",
    "prometheus: Tests involving Prometheus metrics",
    "newrelic: New Relic toolset",
    "no-cicd: Tests to skip in the GitHub action because we're missing prerequisites in the KIND cluster like a Prometheus instance",
    "compaction: Tests for conversation history compaction functionality",
    "embeds: Tests ensuring embeds are as expected",
    "one-test: Runs only one simple eval",
    "loki: Loki toolset",
    "coralogix: Runs coralogix evals",
    "fast: Fast cloud-only tests that don't require Kubernetes infrastructure",
    "frontend: Run frontend tests",
    "grafana: Tests involving Grafana dashboard interactions",
    "images: Tests involving multimodal image content (vision, rendering, MCP ImageContent)",
    "elasticsearch: Tests for Elasticsearch/OpenSearch toolset functionality",
    "victorialogs: Tests for VictoriaLogs toolset functionality",
    "integration: Integration tests requiring external services (e.g., running server, real APIs)",
    "confluence: Tests for Confluence page fetching toolset",
    # REGRESSION TEST SELECTION CRITERIA:
    # - Must pass 30+ iterations reliably with Sonnet-4.5 model
    # - No external API dependencies (Loki OK, NewRelic/DataDog excluded)
    # - Tests core HolmesGPT functionality that should never regress
    # - Fast execution to avoid slowing CI/CD pipelines
    "regression: Critical regression tests that must always pass",
    "benchmark: Challenging tests for evaluating and benchmarking model capabilities",
    "db-connectors: Tests for the database/SQL connector toolset",
    "mcp: Tests for MCP (Model Context Protocol) server integration",
    "conversation_worker: conversation worker integration tests (requires running Holmes + Supabase)",
    "manual: Tests requiring manual interaction (browser login, etc.) — skipped in CI",
    "token-limit: Tests that enforce a maximum token usage budget"
]

addopts = [
    "--cov-config=pyproject.toml",
    "--cov=holmes",
    "-rs",  # Show skip reasons by default
    "--tb=short",  # Show shorter tracebacks by default
    "--durations=10",  # Show 10 slowest tests after each run
    "-n", "auto",  # Run tests in parallel using pytest-xdist (auto-detect CPU count)
    "--dist", "loadgroup",  # Distribute tests by load, respecting xdist_group marks
]

# Logging configuration for pytest
log_cli = true
log_cli_level = "INFO"
log_cli_format = "%(asctime)s [%(levelname)8s] [%(name)s] %(message)s"
log_cli_date_format = "%Y-%m-%d %H:%M:%S"

# Note: log_file is intentionally not set because pytest-xdist parallel workers
# would corrupt a shared log file. Use log_cli (configured above) for debugging;
# it produces per-worker output that is safe under xdist.

[tool.coverage.run]
branch = true
omit = [
    "tests/*",
]

[tool.coverage.report]
fail_under = 46