Skip to content

Commit b4ba5fa

Browse files
authored
Merge pull request #3 from getlago/feature/anthropic-openai-gemini-native
Add native Anthropic, OpenAI, and Gemini SDK support
2 parents e75fb89 + ec249db commit b4ba5fa

58 files changed

Lines changed: 7177 additions & 29 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/dependabot.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
version: 2
2+
3+
# Keep the GitHub Actions in our workflows up to date. We pin every action to a
4+
# full commit SHA for supply-chain safety (see .github/workflows/publish.yml),
5+
# which means SHAs don't auto-update — Dependabot opens PRs that bump the SHA
6+
# and the trailing version comment together, so the pins stay both fixed and
7+
# fresh (and pick up upstream security patches).
8+
updates:
9+
- package-ecosystem: "github-actions"
10+
directory: "/"
11+
schedule:
12+
interval: "weekly"
13+
commit-message:
14+
prefix: "ci"
15+
groups:
16+
github-actions:
17+
patterns:
18+
- "*"

.github/workflows/publish.yml

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
name: publish
2+
3+
# Triggered when a v*.*.* tag is pushed. The tag's version must match the
4+
# version in pyproject.toml — the workflow fails otherwise so you can't
5+
# accidentally publish a mismatched build.
6+
#
7+
# One-time setup on PyPI:
8+
# https://pypi.org/manage/account/publishing/
9+
# PyPI Project Name: lago-agent-sdk
10+
# Owner: getlago
11+
# Repository name: lago-agent-sdk-python
12+
# Workflow name: publish.yml
13+
# Environment name: pypi
14+
#
15+
# Also configure deployment protection on the `pypi` GitHub environment
16+
# (required reviewers + restrict to protected v*.*.* tags) — see RELEASING.md.
17+
#
18+
# After setup, releasing is one command from your laptop:
19+
# git tag v0.1.0 && git push --tags
20+
#
21+
# Third-party actions are pinned to full commit SHAs (not tags) so a hijacked
22+
# or re-pointed tag can't inject code into a job that mints OIDC tokens. The
23+
# trailing comment records the human-readable version each SHA corresponds to.
24+
25+
on:
26+
push:
27+
tags:
28+
- "v*.*.*"
29+
30+
# Least-privilege default for every job. Jobs that need more (OIDC, release
31+
# creation) opt in explicitly below.
32+
permissions:
33+
contents: read
34+
35+
jobs:
36+
# ----------------------------------------------------------------------
37+
# 1. Run the full CI gate first. If anything is red, abort before build.
38+
# ----------------------------------------------------------------------
39+
test:
40+
name: test (py${{ matrix.python-version }})
41+
runs-on: ubuntu-latest
42+
permissions:
43+
contents: read
44+
strategy:
45+
fail-fast: true
46+
matrix:
47+
python-version: ["3.10", "3.11", "3.12"]
48+
steps:
49+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
50+
- name: Install uv
51+
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0
52+
with:
53+
python-version: ${{ matrix.python-version }}
54+
enable-cache: true
55+
cache-dependency-glob: "uv.lock"
56+
- run: uv sync --all-extras
57+
- run: uv run ruff check src tests
58+
- run: uv run ruff format --check src tests
59+
- run: uv run mypy src
60+
- run: uv run pytest tests/unit -q --cov=lago_agent_sdk --cov-fail-under=80
61+
62+
# ----------------------------------------------------------------------
63+
# 2. Build the sdist + wheel and assert the tag matches the package version.
64+
# ----------------------------------------------------------------------
65+
build:
66+
name: build
67+
needs: test
68+
runs-on: ubuntu-latest
69+
permissions:
70+
contents: read
71+
steps:
72+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
73+
- name: Install uv
74+
uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0
75+
with:
76+
python-version: "3.12"
77+
- name: Verify tag matches pyproject version
78+
run: |
79+
TAG="${GITHUB_REF_NAME#v}"
80+
PKG=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
81+
if [ "$TAG" != "$PKG" ]; then
82+
echo "::error::tag $GITHUB_REF_NAME (=$TAG) != pyproject.toml version $PKG"
83+
exit 1
84+
fi
85+
echo "Tag $TAG matches pyproject version $PKG"
86+
- name: Build distributions
87+
run: |
88+
uv pip install --system build
89+
python -m build
90+
- name: Upload dist artifacts
91+
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
92+
with:
93+
name: dist
94+
path: dist/
95+
if-no-files-found: error
96+
97+
# ----------------------------------------------------------------------
98+
# 3. Publish to PyPI via OIDC trusted publishing.
99+
# ----------------------------------------------------------------------
100+
publish:
101+
name: publish to PyPI
102+
needs: build
103+
runs-on: ubuntu-latest
104+
# OIDC requires `id-token: write`; nothing else is needed here.
105+
permissions:
106+
id-token: write
107+
contents: read
108+
# Defense-in-depth: only ever publish from a v*.*.* tag ref, even if the
109+
# environment's protected-tag rule is removed or misconfigured in the UI.
110+
if: startsWith(github.ref, 'refs/tags/v')
111+
# The `pypi` environment is what you bind to in PyPI's trusted-publisher
112+
# config — and where deployment protection rules (required reviewers,
113+
# protected-tag restriction) are enforced. See RELEASING.md.
114+
environment:
115+
name: pypi
116+
url: https://pypi.org/p/lago-agent-sdk
117+
steps:
118+
- name: Download dist
119+
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
120+
with:
121+
name: dist
122+
path: dist/
123+
- name: Publish
124+
uses: pypa/gh-action-pypi-publish@ecb4c3dfd4790f14e30aaeac04855c7413ee9368 # v1.12.2
125+
# No `password:` — OIDC handles auth automatically.
126+
127+
# ----------------------------------------------------------------------
128+
# 4. Create a GitHub Release tied to the tag, attaching the artifacts.
129+
# ----------------------------------------------------------------------
130+
release:
131+
name: github release
132+
needs: publish
133+
runs-on: ubuntu-latest
134+
permissions:
135+
contents: write
136+
steps:
137+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
138+
- name: Download dist
139+
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
140+
with:
141+
name: dist
142+
path: dist/
143+
- name: Create GitHub Release
144+
env:
145+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
146+
run: |
147+
gh release create "$GITHUB_REF_NAME" \
148+
--title "$GITHUB_REF_NAME" \
149+
--generate-notes \
150+
dist/*

CHANGELOG.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,48 @@ All notable changes to this project will be documented here. Format follows [Kee
44

55
## [Unreleased]
66

7+
### Fixed
8+
- **Anthropic `messages.create(stream=True)` under-billed input tokens.** The stream wrapper read only top-level `usage`, which on a basic stream appears only on `message_delta` as `{output_tokens: N}` — the authoritative `input_tokens` / `cache_*` counts arrive nested under `message.usage` on the `message_start` event and were ignored, so input billed 0. The wrapper now merges usage from `message_start` (input/cache) and `message_delta` (cumulative output). Sync + async paths; regression tests use the realistic wire shape (delta carries no input echo).
9+
- **Legacy `google-generativeai` SDK silently emitted no events.** The detector matched both the new `google-genai` and the deprecated `google-generativeai` SDKs, but the wrapper only instruments the unified `Client.models` / `.aio` surface — a legacy `GenerativeModel` routed through and wrapped nothing. `wrap()` now rejects legacy clients with a clear pointer to migrate to `google-genai`.
10+
11+
### Security
12+
- Hardened the publish workflow: least-privilege `permissions: contents: read` default (only `publish` gets `id-token: write`, only `release` gets `contents: write`), and every third-party action pinned to a full commit SHA so a re-pointed tag can't inject code into the OIDC-token-minting job.
13+
- Added `if: startsWith(github.ref, 'refs/tags/v')` to the `publish` job as defense-in-depth — it refuses to run on a non-tag ref even if the environment's protected-tag rule is misconfigured.
14+
- Added `.github/dependabot.yml` (github-actions ecosystem) so the SHA pins stay fresh — Dependabot bumps the SHA and version comment together rather than letting actions silently age.
15+
- RELEASING.md now documents `pypi` environment protection (required reviewers + protected-tag restriction) as a **required** setup step, not optional, since trusted publishing is only as strong as that environment's rules.
16+
17+
### Documentation
18+
- README: clarified that `cache_read`, `audio_input`, and `image_input` are **subsets** of `input` for OpenAI and Gemini (not additive) — summing them with `llm_input_tokens` double-counts.
19+
20+
### Added
21+
- Native `google-genai` SDK support covering `client.models.generate_content` + `generate_content_stream`, sync + async (`client.aio.models`).
22+
- `extract_gemini_native` adapter maps `usage_metadata`: `prompt_token_count → input`, `candidates_token_count → output`, `cached_content_token_count → cache_read`, `thoughts_token_count → reasoning`, `prompt_tokens_details[modality=AUDIO/IMAGE] → audio_input/image_input`, `candidates_tokens_details[modality=AUDIO] → audio_output`, count of `candidates[0].content.parts[].function_call → tool_calls`.
23+
- **Gemini 2.5 surfaces reasoning tokens by default** (`thoughts_token_count`) — fires `llm_reasoning_tokens` automatically. Note the semantic difference vs OpenAI: Gemini's reasoning is ADDITIVE to output (`candidates + thoughts = total billable output`); OpenAI's reasoning is a SUBSET of `completion_tokens`. Documented in adapter docstring + README.
24+
- `gemini` optional dependency group: `pip install 'lago-agent-sdk[gemini]'`.
25+
- 21 new unit tests (15 adapter + 6 wrapper) and 4 live integration tests (gated on `GEMINI_API_KEY`). Total: 304 unit tests.
26+
- 5 captured response fixtures from the real Gemini API (plain, tool use, streaming, thinking, multi-turn).
27+
- Detector now returns `gemini` (was `google`) for `google-genai` clients.
28+
29+
### Added (OpenAI — earlier in this branch)
30+
- Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`.
31+
- `extract_openai_native` adapter handles both API shapes with auto-detection:
32+
- Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`.
33+
- Responses API: `input_tokens`, `output_tokens`, `input_tokens_details.cached_tokens`, `output_tokens_details.reasoning_tokens`, count of `output[].type == "function_call"`.
34+
- **First provider to populate `llm_reasoning_tokens`** — OpenAI o-series models (`o4-mini`, `o1`, etc.) surface reasoning token counts separately.
35+
- Auto-injection of `stream_options={"include_usage": True}` when the customer sets `stream=True` without it, so streamed Chat Completions emit usage on the final chunk.
36+
- `audio_output` field added to `CanonicalUsage` (maps to `llm_audio_output_tokens`), populated by GPT-4o-audio responses.
37+
- `openai` optional dependency group: `pip install 'lago-agent-sdk[openai]'`.
38+
- 27 new unit tests (18 adapter + 9 wrapper) and 5 live integration tests (gated on `OPENAI_API_KEY`). Total: 283 unit tests.
39+
- 10 captured response fixtures from the real OpenAI API (plain chat, tool use, auto-caching, streaming with usage, o-series reasoning, multi-turn, Responses API plain + tool use + reasoning).
40+
41+
### Previously in unreleased (Anthropic)
42+
- Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants).
43+
- `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`.
44+
- `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`.
45+
- 19 unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`).
46+
- 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn).
47+
48+
749
## [0.1.0] — initial release
850

951
### Added

README.md

Lines changed: 91 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ pip install lago-agent-sdk
2929

3030
For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
3131
For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
32+
For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
33+
For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
34+
For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`).
3235

3336
## Quickstart — Bedrock
3437

@@ -52,6 +55,25 @@ sdk.flush()
5255

5356
The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background.
5457

58+
## Quickstart — Anthropic
59+
60+
```python
61+
from anthropic import Anthropic
62+
from lago_agent_sdk import LagoSDK
63+
64+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
65+
client = sdk.wrap(Anthropic(api_key="..."))
66+
67+
resp = client.messages.create(
68+
model="claude-sonnet-4-6",
69+
max_tokens=200,
70+
messages=[{"role": "user", "content": "Hello"}],
71+
)
72+
sdk.flush()
73+
```
74+
75+
Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case.
76+
5577
## Quickstart — Mistral
5678

5779
```python
@@ -68,6 +90,49 @@ resp = client.chat.complete(
6890
sdk.flush()
6991
```
7092

93+
## Quickstart — OpenAI
94+
95+
```python
96+
from openai import OpenAI
97+
from lago_agent_sdk import LagoSDK
98+
99+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
100+
client = sdk.wrap(OpenAI(api_key="..."))
101+
102+
resp = client.chat.completions.create(
103+
model="gpt-4o-mini",
104+
messages=[{"role": "user", "content": "Hello"}],
105+
max_completion_tokens=200,
106+
)
107+
sdk.flush()
108+
```
109+
110+
Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses.
111+
112+
**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
113+
114+
## Quickstart — Gemini
115+
116+
```python
117+
from google import genai
118+
from lago_agent_sdk import LagoSDK
119+
120+
sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
121+
client = sdk.wrap(genai.Client(api_key="..."))
122+
123+
resp = client.models.generate_content(
124+
model="gemini-2.5-flash",
125+
contents="Hello",
126+
)
127+
sdk.flush()
128+
```
129+
130+
Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`).
131+
132+
**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI:
133+
- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output)
134+
- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning)
135+
71136
## Multi-tenant — pick a subscription per call
72137

73138
Three ways to set the `external_subscription_id`, in priority order:
@@ -92,28 +157,37 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
92157
|---|---|---|
93158
| AWS Bedrock | `Converse` (sync + stream) ||
94159
| AWS Bedrock | `InvokeModel` (sync + stream), 7 model families ||
160+
| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) ||
95161
| Mistral | native SDK (`chat.complete` + `chat.stream`) ||
96-
| OpenAI | native SDK | Phase 2 |
97-
| Anthropic | native SDK | Phase 2 |
98-
| Google Gemini | native SDK | Phase 2 |
162+
| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) ||
163+
| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) ||
99164
| LiteLLM | callback bridge | Phase 4 |
100165

101166
## Token dimensions captured
102167

103-
`CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider:
104-
105-
| Field | Lago metric code | Bedrock | Mistral native |
106-
|---|---|---|---|
107-
| input | `llm_input_tokens` |||
108-
| output | `llm_output_tokens` |||
109-
| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ (when cache hits) |
110-
| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) ||
111-
| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) ||
112-
| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output) |
113-
| tool_calls | `llm_tool_calls` |||
114-
| image_input / audio_input | `llm_image/audio_input_tokens` |||
115-
116-
Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships.
168+
`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
169+
170+
| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini |
171+
|---|---|---|---|---|---|---|
172+
| input | `llm_input_tokens` ||||||
173+
| output | `llm_output_tokens` ||||||
174+
| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) || ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) |
175+
| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) |||||
176+
| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) |||||
177+
| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** |
178+
| tool_calls | `llm_tool_calls` ||||||
179+
| audio_input | `llm_audio_input_tokens` |||| ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
180+
| audio_output | `llm_audio_output_tokens` |||| ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
181+
| image_input | `llm_image_input_tokens` |||| ✗ (Phase 3) | ✓ (multimodal IMAGE) |
182+
183+
**Semantic note on `reasoning`:**
184+
- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`.
185+
- **Gemini's `thoughts_token_count` is ADDITIVE to `output`**`candidates + thoughts = total billable output`.
186+
187+
**Semantic note on input breakdowns (avoid double-counting):**
188+
For both OpenAI and Gemini, `cache_read`, `audio_input`, and `image_input` are **subsets of `input`**, not additive to it — they are a breakdown of tokens already counted in `llm_input_tokens`. For example, OpenAI reports `cached_tokens` under `prompt_tokens_details` *within* `prompt_tokens`, and Gemini's docs state `prompt_token_count` "includes the number of tokens in the cached content". A billable metric that sums `llm_input_tokens + llm_cached_input_tokens` (or `+ llm_audio_input_tokens`, `+ llm_image_input_tokens`) will **double-count**. Bill on `llm_input_tokens` as the total; use the breakdown fields only for cost attribution or discounted-rate tiers (e.g. cached input billed at a lower rate), subtracting them from `input` rather than adding.
189+
190+
OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
117191

118192
## Error policy
119193

0 commit comments

Comments
 (0)