BudEcosystem
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 57 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎.github/workflows/publish.yml‎
Lines changed: 69 additions & 0 deletions b/‎.github/workflows/publish.yml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 25 additions & 0 deletions b/‎.gitignore‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 144 additions & 0 deletions b/‎README.md‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎example/fetch_catalog.py‎
Lines changed: 75 additions & 0 deletions b/‎example/fetch_catalog.py‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 23 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 23 additions & 0 deletions
@@ -0,0 +1,57 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11", "3.12", "3.13"]
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python ${{ matrix.python-version }}
+        run: uv python install ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: uv sync --all-extras
+
+      - name: Run linting
+        run: uv run ruff check src/ tests/
+
+      - name: Run tests
+        run: uv run pytest tests/ -v --cov=src/bud_model_catalog --cov-report=xml
+
+      - name: Upload coverage
+        uses: codecov/codecov-action@v4
+        if: matrix.python-version == '3.12'
+        with:
+          files: ./coverage.xml
+          fail_ci_if_error: false
+
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Build package
+        run: uv build
+
+      - name: Check package
+        run: uvx twine check dist/*
@@ -0,0 +1,69 @@
+name: Publish to PyPI
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  build:
+    name: Build distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Build package
+        run: uv build
+
+      - name: Store distribution packages
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+  publish-pypi:
+    name: Publish to PyPI
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/bud-model-catalog
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Download distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+
+  publish-testpypi:
+    name: Publish to TestPyPI
+    needs: build
+    runs-on: ubuntu-latest
+    if: github.event.release.prerelease
+    environment:
+      name: testpypi
+      url: https://test.pypi.org/p/bud-model-catalog
+    permissions:
+      id-token: write
+
+    steps:
+      - name: Download distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+
+      - name: Publish to TestPyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,25 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+
+# Virtual environments
+.venv/
+venv/
+
+# Package metadata
+*.egg-info/
+
+# Build artifacts
+dist/
+build/
+
+# Generated output
+catalog.json
+
+# Environment variables
+.env
+
+# Tool caches
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
@@ -0,0 +1,144 @@
+# bud-model-catalog
+
+Multi-source LLM model catalog with cost-accurate pricing. Fetches model metadata from [LiteLLM](https://github.com/BerriAI/litellm) and [truefoundry/models](https://github.com/truefoundry/models), merges them with cost-accurate pricing, filters deprecated models, and returns a unified catalog keyed by TensorZero provider/model.
+
+## Install
+
+```bash
+pip install bud-model-catalog
+```
+
+## Quick Start
+
+```python
+from bud_model_catalog import CatalogClient
+
+# Synchronous usage
+result = CatalogClient().fetch_catalog_sync()
+print(f"Fetched {len(result.models)} models")
+print(f"Stats: {result.stats}")
+```
+
+## Async Usage
+
+```python
+import asyncio
+from bud_model_catalog import CatalogClient, CatalogConfig
+
+async def main():
+    config = CatalogConfig(include_deprecated=True, timeout=60)
+    client = CatalogClient(config)
+    result = await client.fetch_catalog()
+
+    for key, model in list(result.models.items())[:5]:
+        print(f"{key}: input={model.get('input_cost_per_token')}")
+
+asyncio.run(main())
+```
+
+Or use the module-level convenience function:
+
+```python
+from bud_model_catalog import fetch_catalog
+
+result = await fetch_catalog()
+```
+
+## Configuration
+
+All options are passed via `CatalogConfig`:
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `litellm_url` | `str` | GitHub raw URL | URL to the LiteLLM model prices JSON |
+| `ai_models_url` | `str` | GitHub archive URL | URL to the truefoundry/models ZIP archive |
+| `timeout` | `int` | `30` | HTTP request timeout in seconds (must be > 0) |
+| `include_deprecated` | `bool` | `False` | Whether to include deprecated models in output |
+| `max_retries` | `int` | `2` | Maximum retry attempts per HTTP request (with exponential backoff) |
+| `cache` | `bool` | `True` | Enable ETag-based conditional GET caching across calls |
+
+```python
+from bud_model_catalog import CatalogConfig
+
+config = CatalogConfig(
+    timeout=60,
+    include_deprecated=True,
+    max_retries=3,
+    cache=True,
+)
+```
+
+Validation is enforced at construction time:
+
+```python
+CatalogConfig(timeout=-1)   # ValueError: timeout must be positive
+CatalogConfig(litellm_url="not-a-url")  # ValueError: must be an HTTP(S) URL
+```
+
+## Error Handling
+
+```python
+from bud_model_catalog import CatalogClient, CatalogConfig, SourceFetchError
+
+try:
+    result = CatalogClient().fetch_catalog_sync()
+except SourceFetchError as e:
+    print(f"Failed to fetch data: {e}")
+```
+
+- `SourceFetchError` — raised when LiteLLM fetch fails (HTTP error, invalid JSON, timeout)
+- ai-models failures are handled gracefully — the SDK falls back to LiteLLM-only costs
+
+## API Reference
+
+### `CatalogClient`
+
+Main entry point for fetching the catalog.
+
+- `CatalogClient(config=None)` — create a client with optional `CatalogConfig`
+- `await client.fetch_catalog()` — async fetch, returns `CatalogResult`
+- `client.fetch_catalog_sync()` — sync wrapper, safe in both sync and async contexts
+
+### `CatalogResult`
+
+Pydantic model returned from fetch operations.
+
+- `models: dict[str, dict]` — merged model catalog keyed by `{provider}/{model}`
+- `stats: MergeStats` — merge statistics
+- `litellm_fetched_at: datetime` — timestamp of LiteLLM fetch
+- `ai_models_fetched_at: datetime | None` — timestamp of ai-models fetch (None if failed/skipped)
+
+### `MergeStats`
+
+- `total_litellm` — total models from LiteLLM source
+- `total_output` — models in final output
+- `matched` — models matched with ai-models data
+- `unmatched` — models without ai-models match
+- `deprecated_removed` — models filtered as deprecated
+- `cost_fields_updated` — individual cost field values updated from ai-models
+
+## Logging
+
+The SDK uses Python's `logging` module. Enable output to see fetch/merge details:
+
+```python
+import logging
+logging.basicConfig(level=logging.INFO)
+```
+
+Key log messages:
+- `INFO` — fetch counts, merge statistics, cache hits
+- `WARNING` — ai-models fallback, malformed YAML files skipped, retry attempts
+
+## Development
+
+```bash
+# Install dev dependencies
+pip install -e ".[dev]"
+
+# Run tests
+pytest -v
+
+# Lint
+ruff check src/ tests/
+```
@@ -0,0 +1,75 @@
+"""Example: fetch and inspect the Bud model catalog."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from collections import Counter
+
+from bud_model_catalog import CatalogClient, CatalogConfig, SourceFetchError
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Fetch the Bud model catalog and print a summary.")
+    parser.add_argument("--output", "-o", help="Write the full model catalog to a JSON file.")
+    parser.add_argument(
+        "--include-deprecated",
+        action="store_true",
+        default=False,
+        help="Include deprecated models in the catalog (default: exclude them).",
+    )
+    args = parser.parse_args()
+
+    # ── 1. Fetch catalog with CLI-driven config ──────────────────────────
+    config = CatalogConfig(include_deprecated=args.include_deprecated)
+    print(f"Fetching catalog (include_deprecated={args.include_deprecated}) ...")
+    client = CatalogClient(config)
+
+    try:
+        result = client.fetch_catalog_sync()
+    except SourceFetchError as exc:
+        print(f"Failed to fetch catalog: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    # ── 2. Inspect results ──────────────────────────────────────────────
+    stats = result.stats
+    print(f"\nTotal models in catalog : {stats.total_output}")
+    print(f"LiteLLM source models  : {stats.total_litellm}")
+    print(f"Matched with ai-models : {stats.matched}")
+    print(f"Unmatched              : {stats.unmatched}")
+    print(f"Deprecated removed     : {stats.deprecated_removed}")
+    print(f"Cost fields updated    : {stats.cost_fields_updated}")
+    print(f"\nLiteLLM fetched at     : {result.litellm_fetched_at}")
+    print(f"AI-models fetched at   : {result.ai_models_fetched_at}")
+
+    # ── 3. Browse by provider ───────────────────────────────────────────
+    provider_counts: Counter[str] = Counter()
+    for info in result.models.values():
+        provider_counts[info.get("litellm_provider", "unknown")] += 1
+
+    print(f"\n{'Provider':<30} {'Models':>6}")
+    print("-" * 38)
+    for provider, count in provider_counts.most_common(10):
+        print(f"{provider:<30} {count:>6}")
+    if len(provider_counts) > 10:
+        print(f"... and {len(provider_counts) - 10} more providers")
+
+    # ── 4. Inspect a single model ───────────────────────────────────────
+    first_key = next(iter(result.models))
+    model = result.models[first_key]
+    print(f"\nSample model: {first_key}")
+    for field in ("litellm_provider", "max_tokens", "max_input_tokens", "max_output_tokens",
+                  "input_cost_per_token", "output_cost_per_token"):
+        if field in model:
+            print(f"  {field}: {model[field]}")
+
+    # ── 5. Optional JSON dump ────────────────────────────────────────────
+    if args.output:
+        with open(args.output, "w") as fh:
+            json.dump(result.models, fh, indent=2, default=str)
+        print(f"\nCatalog written to {args.output}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "bud-model-catalog"
+version = "0.1.0"
+description = "Multi-source LLM model catalog with cost-accurate pricing"
+requires-python = ">=3.10"
+dependencies = [
+    "httpx>=0.27",
+    "pydantic>=2.0",
+    "pyyaml>=6.0",
+]
+
+[project.optional-dependencies]
+dev = ["pytest>=8.0", "pytest-asyncio>=0.23", "respx>=0.21", "ruff>=0.4"]
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/bud_model_catalog"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"