getlago
diff --git a/‎.github/dependabot.yml‎
Lines changed: 18 additions & 0 deletions b/‎.github/dependabot.yml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 150 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 42 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 91 additions & 17 deletions b/‎README.md‎
Lines changed: 91 additions & 17 deletions
@@ -0,0 +1,18 @@
+version: 2
+
+# Keep the GitHub Actions in our workflows up to date. We pin every action to a
+# full commit SHA for supply-chain safety (see .github/workflows/publish.yml),
+# which means SHAs don't auto-update — Dependabot opens PRs that bump the SHA
+# and the trailing version comment together, so the pins stay both fixed and
+# fresh (and pick up upstream security patches).
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    commit-message:
+      prefix: "ci"
+    groups:
+      github-actions:
+        patterns:
+          - "*"
@@ -0,0 +1,150 @@
+name: publish
+
+# Triggered when a v*.*.* tag is pushed. The tag's version must match the
+# version in pyproject.toml — the workflow fails otherwise so you can't
+# accidentally publish a mismatched build.
+#
+# One-time setup on PyPI:
+#   https://pypi.org/manage/account/publishing/
+#   PyPI Project Name: lago-agent-sdk
+#   Owner:             getlago
+#   Repository name:   lago-agent-sdk-python
+#   Workflow name:     publish.yml
+#   Environment name:  pypi
+#
+# Also configure deployment protection on the `pypi` GitHub environment
+# (required reviewers + restrict to protected v*.*.* tags) — see RELEASING.md.
+#
+# After setup, releasing is one command from your laptop:
+#   git tag v0.1.0 && git push --tags
+#
+# Third-party actions are pinned to full commit SHAs (not tags) so a hijacked
+# or re-pointed tag can't inject code into a job that mints OIDC tokens. The
+# trailing comment records the human-readable version each SHA corresponds to.
+
+on:
+  push:
+    tags:
+      - "v*.*.*"
+
+# Least-privilege default for every job. Jobs that need more (OIDC, release
+# creation) opt in explicitly below.
+permissions:
+  contents: read
+
+jobs:
+  # ----------------------------------------------------------------------
+  # 1. Run the full CI gate first. If anything is red, abort before build.
+  # ----------------------------------------------------------------------
+  test:
+    name: test (py${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    strategy:
+      fail-fast: true
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Install uv
+        uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0
+        with:
+          python-version: ${{ matrix.python-version }}
+          enable-cache: true
+          cache-dependency-glob: "uv.lock"
+      - run: uv sync --all-extras
+      - run: uv run ruff check src tests
+      - run: uv run ruff format --check src tests
+      - run: uv run mypy src
+      - run: uv run pytest tests/unit -q --cov=lago_agent_sdk --cov-fail-under=80
+
+  # ----------------------------------------------------------------------
+  # 2. Build the sdist + wheel and assert the tag matches the package version.
+  # ----------------------------------------------------------------------
+  build:
+    name: build
+    needs: test
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Install uv
+        uses: astral-sh/setup-uv@38f3f104447c67c051c4a08e39b64a148898af3a # v4.2.0
+        with:
+          python-version: "3.12"
+      - name: Verify tag matches pyproject version
+        run: |
+          TAG="${GITHUB_REF_NAME#v}"
+          PKG=$(uv run python -c "import tomllib; print(tomllib.load(open('pyproject.toml','rb'))['project']['version'])")
+          if [ "$TAG" != "$PKG" ]; then
+            echo "::error::tag $GITHUB_REF_NAME (=$TAG) != pyproject.toml version $PKG"
+            exit 1
+          fi
+          echo "Tag $TAG matches pyproject version $PKG"
+      - name: Build distributions
+        run: |
+          uv pip install --system build
+          python -m build
+      - name: Upload dist artifacts
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: dist
+          path: dist/
+          if-no-files-found: error
+
+  # ----------------------------------------------------------------------
+  # 3. Publish to PyPI via OIDC trusted publishing.
+  # ----------------------------------------------------------------------
+  publish:
+    name: publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    # OIDC requires `id-token: write`; nothing else is needed here.
+    permissions:
+      id-token: write
+      contents: read
+    # Defense-in-depth: only ever publish from a v*.*.* tag ref, even if the
+    # environment's protected-tag rule is removed or misconfigured in the UI.
+    if: startsWith(github.ref, 'refs/tags/v')
+    # The `pypi` environment is what you bind to in PyPI's trusted-publisher
+    # config — and where deployment protection rules (required reviewers,
+    # protected-tag restriction) are enforced. See RELEASING.md.
+    environment:
+      name: pypi
+      url: https://pypi.org/p/lago-agent-sdk
+    steps:
+      - name: Download dist
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: dist
+          path: dist/
+      - name: Publish
+        uses: pypa/gh-action-pypi-publish@ecb4c3dfd4790f14e30aaeac04855c7413ee9368 # v1.12.2
+        # No `password:` — OIDC handles auth automatically.
+
+  # ----------------------------------------------------------------------
+  # 4. Create a GitHub Release tied to the tag, attaching the artifacts.
+  # ----------------------------------------------------------------------
+  release:
+    name: github release
+    needs: publish
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Download dist
+        uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
+        with:
+          name: dist
+          path: dist/
+      - name: Create GitHub Release
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh release create "$GITHUB_REF_NAME" \
+            --title "$GITHUB_REF_NAME" \
+            --generate-notes \
+            dist/*
@@ -4,6 +4,48 @@ All notable changes to this project will be documented here. Format follows [Kee
 
 ## [Unreleased]
 
+### Fixed
+- **Anthropic `messages.create(stream=True)` under-billed input tokens.** The stream wrapper read only top-level `usage`, which on a basic stream appears only on `message_delta` as `{output_tokens: N}` — the authoritative `input_tokens` / `cache_*` counts arrive nested under `message.usage` on the `message_start` event and were ignored, so input billed 0. The wrapper now merges usage from `message_start` (input/cache) and `message_delta` (cumulative output). Sync + async paths; regression tests use the realistic wire shape (delta carries no input echo).
+- **Legacy `google-generativeai` SDK silently emitted no events.** The detector matched both the new `google-genai` and the deprecated `google-generativeai` SDKs, but the wrapper only instruments the unified `Client.models` / `.aio` surface — a legacy `GenerativeModel` routed through and wrapped nothing. `wrap()` now rejects legacy clients with a clear pointer to migrate to `google-genai`.
+
+### Security
+- Hardened the publish workflow: least-privilege `permissions: contents: read` default (only `publish` gets `id-token: write`, only `release` gets `contents: write`), and every third-party action pinned to a full commit SHA so a re-pointed tag can't inject code into the OIDC-token-minting job.
+- Added `if: startsWith(github.ref, 'refs/tags/v')` to the `publish` job as defense-in-depth — it refuses to run on a non-tag ref even if the environment's protected-tag rule is misconfigured.
+- Added `.github/dependabot.yml` (github-actions ecosystem) so the SHA pins stay fresh — Dependabot bumps the SHA and version comment together rather than letting actions silently age.
+- RELEASING.md now documents `pypi` environment protection (required reviewers + protected-tag restriction) as a **required** setup step, not optional, since trusted publishing is only as strong as that environment's rules.
+
+### Documentation
+- README: clarified that `cache_read`, `audio_input`, and `image_input` are **subsets** of `input` for OpenAI and Gemini (not additive) — summing them with `llm_input_tokens` double-counts.
+
+### Added
+- Native `google-genai` SDK support covering `client.models.generate_content` + `generate_content_stream`, sync + async (`client.aio.models`).
+- `extract_gemini_native` adapter maps `usage_metadata`: `prompt_token_count → input`, `candidates_token_count → output`, `cached_content_token_count → cache_read`, `thoughts_token_count → reasoning`, `prompt_tokens_details[modality=AUDIO/IMAGE] → audio_input/image_input`, `candidates_tokens_details[modality=AUDIO] → audio_output`, count of `candidates[0].content.parts[].function_call → tool_calls`.
+- **Gemini 2.5 surfaces reasoning tokens by default** (`thoughts_token_count`) — fires `llm_reasoning_tokens` automatically. Note the semantic difference vs OpenAI: Gemini's reasoning is ADDITIVE to output (`candidates + thoughts = total billable output`); OpenAI's reasoning is a SUBSET of `completion_tokens`. Documented in adapter docstring + README.
+- `gemini` optional dependency group: `pip install 'lago-agent-sdk[gemini]'`.
+- 21 new unit tests (15 adapter + 6 wrapper) and 4 live integration tests (gated on `GEMINI_API_KEY`). Total: 304 unit tests.
+- 5 captured response fixtures from the real Gemini API (plain, tool use, streaming, thinking, multi-turn).
+- Detector now returns `gemini` (was `google`) for `google-genai` clients.
+
+### Added (OpenAI — earlier in this branch)
+- Native `openai` SDK support covering both APIs: `chat.completions.create` and `responses.create`, each with sync + streaming. Same coverage on `AsyncOpenAI`.
+- `extract_openai_native` adapter handles both API shapes with auto-detection:
+  - Chat Completions: `prompt_tokens`, `completion_tokens`, `prompt_tokens_details.{cached_tokens, audio_tokens}`, `completion_tokens_details.{reasoning_tokens, audio_tokens}`, count of `choices[0].message.tool_calls`.
+  - Responses API: `input_tokens`, `output_tokens`, `input_tokens_details.cached_tokens`, `output_tokens_details.reasoning_tokens`, count of `output[].type == "function_call"`.
+- **First provider to populate `llm_reasoning_tokens`** — OpenAI o-series models (`o4-mini`, `o1`, etc.) surface reasoning token counts separately.
+- Auto-injection of `stream_options={"include_usage": True}` when the customer sets `stream=True` without it, so streamed Chat Completions emit usage on the final chunk.
+- `audio_output` field added to `CanonicalUsage` (maps to `llm_audio_output_tokens`), populated by GPT-4o-audio responses.
+- `openai` optional dependency group: `pip install 'lago-agent-sdk[openai]'`.
+- 27 new unit tests (18 adapter + 9 wrapper) and 5 live integration tests (gated on `OPENAI_API_KEY`). Total: 283 unit tests.
+- 10 captured response fixtures from the real OpenAI API (plain chat, tool use, auto-caching, streaming with usage, o-series reasoning, multi-turn, Responses API plain + tool use + reasoning).
+
+### Previously in unreleased (Anthropic)
+- Native `anthropic` SDK support. Wraps `Anthropic.messages.create` (including `stream=True`) and `Anthropic.messages.stream(...)` context manager. Same coverage on `AsyncAnthropic` (sync + async variants).
+- `extract_anthropic_native` adapter with the full Anthropic field map: `input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`, `cache_creation.ephemeral_5m_input_tokens`, `cache_creation.ephemeral_1h_input_tokens`, `content[].type == "tool_use"`.
+- `anthropic` optional dependency group: `pip install 'lago-agent-sdk[anthropic]'`.
+- 19 unit tests (adapter + wrapper) and 3 live integration tests (gated on `ANTHROPIC_API_KEY`).
+- 9 captured response fixtures from the real Anthropic API (plain, tool use, 5m + 1h prompt caching, extended thinking, streaming, multi-turn).
+
+
 ## [0.1.0] — initial release
 
 ### Added
 
@@ -29,6 +29,9 @@ pip install lago-agent-sdk
 
 For Bedrock support: `pip install 'lago-agent-sdk[bedrock]'` (adds `boto3`).
 For Mistral support: `pip install 'lago-agent-sdk[mistral]'` (adds `mistralai`).
+For Anthropic native support: `pip install 'lago-agent-sdk[anthropic]'` (adds `anthropic`).
+For OpenAI native support: `pip install 'lago-agent-sdk[openai]'` (adds `openai`).
+For Gemini native support: `pip install 'lago-agent-sdk[gemini]'` (adds `google-genai`).
 
 ## Quickstart — Bedrock
 
@@ -52,6 +55,25 @@ sdk.flush()
 
 The wrapped client behaves identically to the original — same arguments, same return shape, same exceptions. The SDK adds an in-memory queue that batches events to Lago in the background.
 
+## Quickstart — Anthropic
+
+```python
+from anthropic import Anthropic
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(Anthropic(api_key="..."))
+
+resp = client.messages.create(
+    model="claude-sonnet-4-6",
+    max_tokens=200,
+    messages=[{"role": "user", "content": "Hello"}],
+)
+sdk.flush()
+```
+
+Works with `Anthropic` and `AsyncAnthropic`. Both `messages.create(..., stream=True)` and the `messages.stream(...)` context manager are instrumented — usage is captured from the final `message_delta` event in either case.
+
 ## Quickstart — Mistral
 
 ```python
@@ -68,6 +90,49 @@ resp = client.chat.complete(
 sdk.flush()
 ```
 
+## Quickstart — OpenAI
+
+```python
+from openai import OpenAI
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(OpenAI(api_key="..."))
+
+resp = client.chat.completions.create(
+    model="gpt-4o-mini",
+    messages=[{"role": "user", "content": "Hello"}],
+    max_completion_tokens=200,
+)
+sdk.flush()
+```
+
+Works with `OpenAI` and `AsyncOpenAI`. Covers both **Chat Completions** (`client.chat.completions.create`) and the newer **Responses API** (`client.responses.create`), sync + streaming. For streaming, the wrapper auto-injects `stream_options={"include_usage": True}` so the final chunk carries usage data — without it OpenAI emits no usage on streamed responses.
+
+**Reasoning tokens** (`llm_reasoning_tokens`) populate automatically when you call an o-series model (`o4-mini`, `o1`, etc.) — OpenAI is the first provider to expose this metric separately.
+
+## Quickstart — Gemini
+
+```python
+from google import genai
+from lago_agent_sdk import LagoSDK
+
+sdk = LagoSDK(api_key="...", default_subscription_id="sub_acme")
+client = sdk.wrap(genai.Client(api_key="..."))
+
+resp = client.models.generate_content(
+    model="gemini-2.5-flash",
+    contents="Hello",
+)
+sdk.flush()
+```
+
+Wraps the modern `google-genai` SDK (`from google import genai`). Covers `client.models.generate_content` + `generate_content_stream`, sync + async (via `client.aio.models`).
+
+**Reasoning tokens** populate automatically on Gemini 2.5 — the model reasons internally by default and surfaces `thoughts_token_count`. Note the semantic difference vs OpenAI:
+- **OpenAI:** `reasoning_tokens` is a *subset* of `completion_tokens` (already counted in output)
+- **Gemini:** `thoughts_token_count` is *additive* to `candidates_token_count` (total Google bill = output + reasoning)
+
 ## Multi-tenant — pick a subscription per call
 
 Three ways to set the `external_subscription_id`, in priority order:
@@ -92,28 +157,37 @@ Backed by `contextvars` for safe propagation across `asyncio` tasks.
 |---|---|---|
 | AWS Bedrock | `Converse` (sync + stream) | ✓ |
 | AWS Bedrock | `InvokeModel` (sync + stream), 7 model families | ✓ |
+| Anthropic | native SDK (`messages.create` + `messages.stream`, sync + async) | ✓ |
 | Mistral | native SDK (`chat.complete` + `chat.stream`) | ✓ |
-| OpenAI | native SDK | Phase 2 |
-| Anthropic | native SDK | Phase 2 |
-| Google Gemini | native SDK | Phase 2 |
+| OpenAI | native SDK (`chat.completions.create` + `responses.create`, sync + async + stream) | ✓ |
+| Google Gemini | native SDK (`google-genai`: `models.generate_content` + `generate_content_stream`, sync + async) | ✓ |
 | LiteLLM | callback bridge | Phase 4 |
 
 ## Token dimensions captured
 
-`CanonicalUsage` carries 10 numeric fields. Which ones populate depends on the provider:
-
-| Field | Lago metric code | Bedrock | Mistral native |
-|---|---|---|---|
-| input | `llm_input_tokens` | ✓ | ✓ |
-| output | `llm_output_tokens` | ✓ | ✓ |
-| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ (when cache hits) |
-| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✗ |
-| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✗ |
-| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output) |
-| tool_calls | `llm_tool_calls` | ✓ | ✓ |
-| image_input / audio_input | `llm_image/audio_input_tokens` | ✗ | ✗ |
-
-Reasoning, image, and audio fields will populate when Phase 2 native OpenAI ships.
+`CanonicalUsage` carries 11 numeric fields. Which ones populate depends on the provider:
+
+| Field | Lago metric code | Bedrock | Anthropic | Mistral | OpenAI | Gemini |
+|---|---|---|---|---|---|---|
+| input | `llm_input_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| output | `llm_output_tokens` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| cache_read | `llm_cached_input_tokens` | ✓ (Anthropic) | ✓ | ✓ (when cache hits) | ✓ (auto-cache) | ✓ (CachedContent API) |
+| cache_write | `llm_cache_creation_tokens` | ✓ (Anthropic) | ✓ | ✗ | ✗ | ✗ |
+| cache_write_5m / 1h | `llm_cache_write_5m/1h_tokens` | ✓ (Anthropic InvokeModel) | ✓ | ✗ | ✗ | ✗ |
+| reasoning | `llm_reasoning_tokens` | ✗ (folded into output) | ✗ (folded into output, even with extended thinking) | ✗ (folded into output) | **✓ (o-series, subset)** | **✓ (Gemini 2.5, additive)** |
+| tool_calls | `llm_tool_calls` | ✓ | ✓ | ✓ | ✓ | ✓ |
+| audio_input | `llm_audio_input_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
+| audio_output | `llm_audio_output_tokens` | ✗ | ✗ | ✗ | ✓ (GPT-4o-audio) | ✓ (multimodal AUDIO) |
+| image_input | `llm_image_input_tokens` | ✗ | ✗ | ✗ | ✗ (Phase 3) | ✓ (multimodal IMAGE) |
+
+**Semantic note on `reasoning`:**
+- **OpenAI's `reasoning_tokens` is a SUBSET of `output`** — already counted in `completion_tokens`.
+- **Gemini's `thoughts_token_count` is ADDITIVE to `output`** — `candidates + thoughts = total billable output`.
+
+**Semantic note on input breakdowns (avoid double-counting):**
+For both OpenAI and Gemini, `cache_read`, `audio_input`, and `image_input` are **subsets of `input`**, not additive to it — they are a breakdown of tokens already counted in `llm_input_tokens`. For example, OpenAI reports `cached_tokens` under `prompt_tokens_details` *within* `prompt_tokens`, and Gemini's docs state `prompt_token_count` "includes the number of tokens in the cached content". A billable metric that sums `llm_input_tokens + llm_cached_input_tokens` (or `+ llm_audio_input_tokens`, `+ llm_image_input_tokens`) will **double-count**. Bill on `llm_input_tokens` as the total; use the breakdown fields only for cost attribution or discounted-rate tiers (e.g. cached input billed at a lower rate), subtracting them from `input` rather than adding.
+
+OpenAI's Predicted Outputs tokens (`accepted_prediction_tokens`, `rejected_prediction_tokens`) are not surfaced — see the OpenAI adapter docstring for details on this intentional gap.
 
 ## Error policy