From 584f569bdde9815c2dbab03725b28929a8d27601 Mon Sep 17 00:00:00 2001 From: Niko Maroulis Date: Tue, 10 Mar 2026 17:41:18 -0400 Subject: [PATCH 1/4] fix: Include model name in Vertex AI request params and use v1beta1 for preview models Three fixes for Vertex AI provider: 1. Include "model" key in build_request_params output so chat/chat_stream use the correct model name in URL construction instead of falling back to the default "gemini-2.0-flash" 2. Use v1beta1 API version for preview/experimental models which require it (v1 returns 404 for these models) 3. Add debug logging for request URLs to aid troubleshooting --- lib/nous/providers/vertex_ai.ex | 54 ++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/lib/nous/providers/vertex_ai.ex b/lib/nous/providers/vertex_ai.ex index 5062592..b1d6faf 100644 --- a/lib/nous/providers/vertex_ai.ex +++ b/lib/nous/providers/vertex_ai.ex @@ -96,7 +96,7 @@ defmodule Nous.Providers.VertexAI do # Reuse Gemini message format — Vertex AI uses the same content structure {system_prompt, contents} = Nous.Messages.to_provider_format(messages, :gemini) - params = %{"contents" => contents} + params = %{"model" => model.model, "contents" => contents} params = if system_prompt do @@ -132,11 +132,13 @@ defmodule Nous.Providers.VertexAI do model = Map.get(params, "model") || Map.get(params, :model) || "gemini-2.0-flash" with {:ok, token} <- resolve_token(opts), - {:ok, url_base} <- resolve_base_url(opts) do + {:ok, url_base} <- resolve_base_url(opts, model) do url = build_url(url_base, model, :generate) headers = build_headers(token) timeout = Keyword.get(opts, :timeout, @default_timeout) + Logger.debug("Vertex AI chat request: url=#{url}, model=#{model}") + # Remove model from params (it's in the URL) body = params |> Map.delete("model") |> Map.delete(:model) @@ -149,12 +151,14 @@ defmodule Nous.Providers.VertexAI do model = Map.get(params, "model") || Map.get(params, :model) || "gemini-2.0-flash" with {:ok, token} <- resolve_token(opts), - {:ok, url_base} <- resolve_base_url(opts) do + {:ok, url_base} <- resolve_base_url(opts, model) do url = build_url(url_base, model, :stream) headers = build_headers(token) timeout = Keyword.get(opts, :timeout, @streaming_timeout) finch_name = Keyword.get(opts, :finch_name, Nous.Finch) + Logger.debug("Vertex AI stream request: url=#{url}, model=#{model}") + # Remove model from params (it's in the URL) body = params |> Map.delete("model") |> Map.delete(:model) @@ -166,23 +170,51 @@ defmodule Nous.Providers.VertexAI do @doc """ Build a Vertex AI endpoint URL from project ID and region. + Uses `v1beta1` API version for preview/experimental models and `v1` for stable models. + If no model name is provided, defaults to `v1`. + ## Examples iex> Nous.Providers.VertexAI.endpoint("my-project", "us-central1") "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" + iex> Nous.Providers.VertexAI.endpoint("my-project", "us-central1", "gemini-2.0-flash") + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" + + iex> Nous.Providers.VertexAI.endpoint("my-project", "europe-west1", "gemini-3.1-pro-preview") + "https://europe-west1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/europe-west1" + """ - @spec endpoint(String.t(), String.t()) :: String.t() - def endpoint(project_id, region \\ "us-central1") do - "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project_id}/locations/#{region}" + @spec endpoint(String.t(), String.t(), String.t() | nil) :: String.t() + def endpoint(project_id, region \\ "us-central1", model \\ nil) do + api_version = api_version_for_model(model) + + "https://#{region}-aiplatform.googleapis.com/#{api_version}/projects/#{project_id}/locations/#{region}" + end + + @doc """ + Returns the appropriate API version for a model name. + + Preview and experimental models use `v1beta1`, stable models use `v1`. + """ + @spec api_version_for_model(String.t() | nil) :: String.t() + def api_version_for_model(nil), do: "v1" + + def api_version_for_model(model) when is_binary(model) do + if String.contains?(model, "preview") or String.contains?(model, "experimental") do + "v1beta1" + else + "v1" + end end - # Resolve the base URL from options, app config, or env vars - defp resolve_base_url(opts) do + # Resolve the base URL from options, app config, or env vars. + # When building from env vars, uses the model name to determine the API version. + defp resolve_base_url(opts, model) do url = Keyword.get(opts, :base_url) || get_in(Application.get_env(:nous, :vertex_ai, []), [:base_url]) || - build_default_base_url() + build_default_base_url(model) if url && url != "" do {:ok, url} @@ -216,12 +248,12 @@ defmodule Nous.Providers.VertexAI do end # Build default base URL from environment variables - defp build_default_base_url do + defp build_default_base_url(model) do project = System.get_env("GOOGLE_CLOUD_PROJECT") || System.get_env("GCLOUD_PROJECT") region = System.get_env("GOOGLE_CLOUD_REGION") || "us-central1" if project do - endpoint(project, region) + endpoint(project, region, model) else nil end From 4f244f2291cb4b0cc4f6c3d39ac47fedae77ac57 Mon Sep 17 00:00:00 2001 From: Niko Maroulis Date: Tue, 10 Mar 2026 17:46:13 -0400 Subject: [PATCH 2/4] chore: Bump version to 0.12.7, add changelog entry Changelog documents the Vertex AI model routing fix and v1beta1 support for preview/experimental models. --- CHANGELOG.md | 13 +++++++++++++ mix.exs | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 38ba0b7..68a97d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ All notable changes to this project will be documented in this file. +## [0.12.7] - 2026-03-10 + +### Fixed + +- **Vertex AI model routing**: Fixed `build_request_params/3` not including the `"model"` key in the params map, causing `chat/2` and `chat_stream/2` to always fall back to `"gemini-2.0-flash"` regardless of the requested model. +- **Vertex AI 404 on preview models**: Use `v1beta1` API version for preview and experimental models (e.g., `gemini-3.1-pro-preview`). The `v1` endpoint returns 404 for these models. + +### Added + +- `Nous.Providers.VertexAI.api_version_for_model/1` — returns `"v1beta1"` for preview/experimental models, `"v1"` for stable models. +- `Nous.Providers.VertexAI.endpoint/3` now accepts an optional model name to select the correct API version. +- Debug logging for Vertex AI request URLs. + ## [0.13.2] - 2026-03-07 ### Added diff --git a/mix.exs b/mix.exs index 1bee83b..ce60f05 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule Nous.MixProject do use Mix.Project - @version "0.13.2" + @version "0.12.7" @source_url "https://github.com/nyo16/nous" def project do From 51bc84dd7968820c1ec5348730f5166655863fde Mon Sep 17 00:00:00 2001 From: Niko Maroulis Date: Tue, 10 Mar 2026 17:48:04 -0400 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20Correct=20changelog=20version=20numb?= =?UTF-8?q?ers=20(0.13.x=20=E2=86=92=200.12.x)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68a97d3..ae255f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ All notable changes to this project will be documented in this file. - `Nous.Providers.VertexAI.endpoint/3` now accepts an optional model name to select the correct API version. - Debug logging for Vertex AI request URLs. -## [0.13.2] - 2026-03-07 +## [0.12.6] - 2026-03-07 ### Added @@ -25,7 +25,7 @@ All notable changes to this project will be documented in this file. - New config options: `:auto_update_memory`, `:auto_update_every`, `:reflection_model`, `:reflection_max_tokens`, `:reflection_max_messages`, `:reflection_max_memories` - New example: `examples/memory/auto_update.exs` -## [0.13.1] - 2026-03-06 +## [0.12.5] - 2026-03-06 ### Added From 0d0e35845bffb9924e877e7c16bcf2466387bd7b Mon Sep 17 00:00:00 2001 From: Niko Maroulis Date: Thu, 12 Mar 2026 16:59:41 -0400 Subject: [PATCH 4/4] fix: Vertex AI v1/v1beta1 bug, global endpoint support, input validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v1beta1 fix from 584f569 had a critical bug: Model.parse stored a hardcoded v1 URL in model.base_url when GOOGLE_CLOUD_PROJECT was set, bypassing the provider's v1beta1 selection logic. Preview models still 404'd in the most common setup. Fix: default_base_url(:vertex_ai) now returns nil — URL is built at request time by the provider with proper v1/v1beta1 selection. Also adds: - Global endpoint support (required for Gemini 3.x preview models) - GOOGLE_CLOUD_LOCATION env var as fallback for GOOGLE_CLOUD_REGION - Input validation for project ID and region with helpful error messages - Comprehensive docs (README + moduledoc) with model table, setup guide - Integration test exercising Flash + Pro on global endpoint - Multi-region example script Tested: 782 tests, 0 failures. Integration-tested with service account against gemini-3.1-pro-preview and gemini-3-flash-preview on global endpoint. --- CHANGELOG.md | 12 + README.md | 116 ++++++-- examples/providers/vertex_ai_goth_test.exs | 6 +- .../providers/vertex_ai_integration_test.exs | 255 ++++++++++++++++++ examples/providers/vertex_ai_multi_region.exs | 100 +++++++ lib/nous/model.ex | 12 +- lib/nous/providers/vertex_ai.ex | 232 +++++++++++++--- mix.exs | 2 +- test/nous/providers/vertex_ai_test.exs | 131 +++++++-- 9 files changed, 765 insertions(+), 101 deletions(-) create mode 100644 examples/providers/vertex_ai_integration_test.exs create mode 100644 examples/providers/vertex_ai_multi_region.exs diff --git a/CHANGELOG.md b/CHANGELOG.md index ae255f7..776c57e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ All notable changes to this project will be documented in this file. +## [0.12.8] - 2026-03-12 + +### Fixed + +- **Vertex AI v1/v1beta1 bug**: `Model.parse("vertex_ai:gemini-2.5-pro-preview-06-05")` with `GOOGLE_CLOUD_PROJECT` set was storing a hardcoded `v1` URL in `model.base_url`, causing the provider's `v1beta1` selection logic to be bypassed. Preview models now correctly use `v1beta1` at request time. + +### Added + +- **Vertex AI input validation**: Project ID and region from environment variables are now validated with helpful error messages instead of producing opaque DNS/HTTP errors. +- **`GOOGLE_CLOUD_LOCATION` support**: Added as a fallback for `GOOGLE_CLOUD_REGION`, consistent with other Google Cloud libraries and tooling. +- Multi-region example script: `examples/providers/vertex_ai_multi_region.exs` + ## [0.12.7] - 2026-03-10 ### Fixed diff --git a/README.md b/README.md index 9f6b6f6..6231a9b 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ IO.puts("Tokens: #{result.usage.total_tokens}") | OpenAI | `openai:gpt-4` | ✅ | | Anthropic | `anthropic:claude-sonnet-4-5-20250929` | ✅ | | Google Gemini | `gemini:gemini-2.0-flash` | ✅ | -| Google Vertex AI | `vertex_ai:gemini-2.0-flash` | ✅ | +| Google Vertex AI | `vertex_ai:gemini-3.1-pro-preview` | ✅ | | Groq | `groq:llama-3.1-70b-versatile` | ✅ | | Ollama | `ollama:llama2` | ✅ | | OpenRouter | `openrouter:anthropic/claude-3.5-sonnet` | ✅ | @@ -108,15 +108,46 @@ All HTTP providers use pure Elixir HTTP clients (Req + Finch). LlamaCpp runs in- agent = Nous.new("lmstudio:qwen3") # Local (free) agent = Nous.new("openai:gpt-4") # OpenAI agent = Nous.new("anthropic:claude-sonnet-4-5-20250929") # Anthropic -agent = Nous.new("vertex_ai:gemini-2.0-flash") # Google Vertex AI +agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") # Google Vertex AI agent = Nous.new("llamacpp:local", llamacpp_model: llm) # Local NIF ``` ### Google Vertex AI Setup -Vertex AI provides enterprise access to Gemini models. To use it with a service account: +Vertex AI provides enterprise access to Gemini models via Google Cloud. It supports +VPC-SC, CMEK, IAM, regional/global endpoints, and all the latest Gemini models. -**1. Create a service account:** +#### Supported Models + +| Model | Model ID | Endpoint | API Version | +|-------|----------|----------|-------------| +| Gemini 3.1 Pro (preview) | `gemini-3.1-pro-preview` | global only | v1beta1 | +| Gemini 3 Flash (preview) | `gemini-3-flash-preview` | global only | v1beta1 | +| Gemini 3.1 Flash-Lite (preview) | `gemini-3.1-flash-lite-preview` | global only | v1beta1 | +| Gemini 2.5 Pro | `gemini-2.5-pro` | regional + global | v1 | +| Gemini 2.5 Flash | `gemini-2.5-flash` | regional + global | v1 | +| Gemini 2.0 Flash | `gemini-2.0-flash` | regional + global | v1 | + +> **Note:** Preview and experimental models automatically use the `v1beta1` API version. +> The Gemini 3.x preview models are **global endpoint only** — set `GOOGLE_CLOUD_LOCATION=global`. + +#### Regional vs Global Endpoints + +Vertex AI offers two endpoint types: + +- **Regional** (e.g., `us-central1`, `europe-west1`): Low-latency, data residency guarantees + ``` + https://us-central1-aiplatform.googleapis.com/v1/projects/{project}/locations/us-central1 + ``` +- **Global**: Higher availability, required for Gemini 3.x preview models + ``` + https://aiplatform.googleapis.com/v1beta1/projects/{project}/locations/global + ``` + +The provider automatically selects the correct hostname and API version based on the +region and model name. Set `GOOGLE_CLOUD_LOCATION=global` for Gemini 3.x preview models. + +#### Step 1: Create a Service Account ```bash export PROJECT_ID="your-project-id" @@ -129,30 +160,48 @@ gcloud iam service-accounts create nous-vertex-ai \ --display-name="Nous Vertex AI" \ --project=$PROJECT_ID -# Grant permission +# Grant the Vertex AI User role gcloud projects add-iam-policy-binding $PROJECT_ID \ --member="serviceAccount:nous-vertex-ai@${PROJECT_ID}.iam.gserviceaccount.com" \ --role="roles/aiplatform.user" -# Download key and store as env var -gcloud iam service-accounts keys create /tmp/sa.json \ +# Download the key file +gcloud iam service-accounts keys create /tmp/sa-key.json \ --iam-account="nous-vertex-ai@${PROJECT_ID}.iam.gserviceaccount.com" +``` + +#### Step 2: Set Environment Variables + +```bash +# Load the service account JSON into an env var (recommended — no file path dependency) +export GOOGLE_CREDENTIALS="$(cat /tmp/sa-key.json)" + +# Required: your GCP project ID +export GOOGLE_CLOUD_PROJECT="your-project-id" -# Set the env vars -export GOOGLE_CREDENTIALS="$(cat /tmp/sa.json)" -export GOOGLE_CLOUD_PROJECT="$PROJECT_ID" -export GOOGLE_CLOUD_REGION="us-central1" +# Required for Gemini 3.x preview models (global endpoint only) +export GOOGLE_CLOUD_LOCATION="global" + +# Or use a regional endpoint for stable models: +# export GOOGLE_CLOUD_LOCATION="us-central1" +# export GOOGLE_CLOUD_LOCATION="europe-west1" ``` -**2. Add Goth to your deps** (handles token refresh from the service account): +Both `GOOGLE_CLOUD_REGION` and `GOOGLE_CLOUD_LOCATION` are supported (consistent with +other Google Cloud libraries). `GOOGLE_CLOUD_REGION` takes precedence if both are set. +Defaults to `us-central1` if neither is set. + +#### Step 3: Add Goth to Your Application + +Goth handles OAuth2 token fetching and auto-refresh from the service account credentials. ```elixir +# mix.exs {:goth, "~> 1.4"} ``` -**3. Start Goth in your supervision tree:** - ```elixir +# application.ex — start Goth in your supervision tree credentials = System.get_env("GOOGLE_CREDENTIALS") |> Jason.decode!() children = [ @@ -160,33 +209,52 @@ children = [ ] ``` -**4. Configure Nous to use Goth:** +#### Step 4: Configure and Use ```elixir -# Option A: Via app config (recommended for production) +# Option A: App config (recommended for production) # config/config.exs config :nous, :vertex_ai, goth: MyApp.Goth -# Then just use it — no extra options needed: -agent = Nous.new("vertex_ai:gemini-2.0-flash") +# Then use it — Goth handles token refresh automatically: +agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") {:ok, result} = Nous.run(agent, "Hello from Vertex AI!") ``` ```elixir -# Option B: Per-model (useful for multiple projects/regions) -agent = Nous.new("vertex_ai:gemini-2.0-flash", +# Option B: Per-model Goth (useful for multiple projects) +agent = Nous.new("vertex_ai:gemini-3-flash-preview", default_settings: %{goth: MyApp.Goth} ) ``` ```elixir -# Option C: Direct access token (no Goth needed, e.g. for quick testing) -export VERTEX_AI_ACCESS_TOKEN="$(gcloud auth print-access-token)" +# Option C: Explicit base_url (for custom endpoint or specific region) +alias Nous.Providers.VertexAI + +agent = Nous.new("vertex_ai:gemini-3.1-pro-preview", + base_url: VertexAI.endpoint("my-project", "global", "gemini-3.1-pro-preview"), + default_settings: %{goth: MyApp.Goth} +) +``` -agent = Nous.new("vertex_ai:gemini-2.0-flash") +```elixir +# Option D: Quick testing with gcloud CLI (no Goth needed) +# export VERTEX_AI_ACCESS_TOKEN="$(gcloud auth print-access-token)" +agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") ``` -See [`examples/providers/vertex_ai_goth_test.exs`](examples/providers/vertex_ai_goth_test.exs) for a runnable example. +#### Input Validation + +The provider validates `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_LOCATION` at request time +and returns helpful error messages for invalid values instead of opaque DNS or HTTP errors. + +#### Examples + +- [`examples/providers/vertex_ai.exs`](examples/providers/vertex_ai.exs) — Basic usage with access token +- [`examples/providers/vertex_ai_goth_test.exs`](examples/providers/vertex_ai_goth_test.exs) — Service account with Goth +- [`examples/providers/vertex_ai_multi_region.exs`](examples/providers/vertex_ai_multi_region.exs) — Multi-region + v1/v1beta1 demo +- [`examples/providers/vertex_ai_integration_test.exs`](examples/providers/vertex_ai_integration_test.exs) — Full integration test (Flash + Pro, streaming + non-streaming) ## Features diff --git a/examples/providers/vertex_ai_goth_test.exs b/examples/providers/vertex_ai_goth_test.exs index e3e2dde..faaa0ae 100644 --- a/examples/providers/vertex_ai_goth_test.exs +++ b/examples/providers/vertex_ai_goth_test.exs @@ -5,7 +5,7 @@ # Prerequisites: # export GOOGLE_CREDENTIALS='{"type":"service_account","project_id":"...","private_key":"...",...}' # export GOOGLE_CLOUD_PROJECT="your-project-id" -# export GOOGLE_CLOUD_REGION="europe-west1" # optional, defaults to europe-west1 (Frankfurt) +# export GOOGLE_CLOUD_REGION="us-central1" # optional, defaults to us-central1 # # Run: # mix run test_vertex_ai.exs @@ -25,7 +25,7 @@ end IO.puts("=== Vertex AI Test with Service Account ===\n") IO.puts("Project: #{project}") -IO.puts("Region: #{System.get_env("GOOGLE_CLOUD_REGION", "europe-west1")}\n") +IO.puts("Region: #{System.get_env("GOOGLE_CLOUD_REGION", "us-central1")}\n") # Start Goth with service account credentials from env var credentials = Jason.decode!(credentials_json) @@ -38,7 +38,7 @@ IO.puts("Goth started successfully.\n") IO.puts("--- Test 1: Non-streaming ---") agent = - Nous.new("vertex_ai:gemini-3.1-pro", + Nous.new("vertex_ai:gemini-2.0-flash", instructions: "You are a helpful assistant. Be concise.", default_settings: %{goth: Nous.TestGoth} ) diff --git a/examples/providers/vertex_ai_integration_test.exs b/examples/providers/vertex_ai_integration_test.exs new file mode 100644 index 0000000..51254b1 --- /dev/null +++ b/examples/providers/vertex_ai_integration_test.exs @@ -0,0 +1,255 @@ +#!/usr/bin/env elixir + +# Integration test for Vertex AI provider hardening +# +# Tests the v1/v1beta1 fix, GOOGLE_CLOUD_LOCATION support, and validation. +# +# Credentials (pick one): +# 1. Service account JSON in env var: +# export GOOGLE_CREDENTIALS='{"type":"service_account",...}' +# +# 2. Service account JSON file path (standard GCP convention): +# export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json" +# +# 3. Pre-obtained access token: +# export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token) +# +# Required: +# export GOOGLE_CLOUD_PROJECT="your-project-id" +# +# Optional: +# export GOOGLE_CLOUD_REGION="us-central1" # defaults to us-central1 +# export GOOGLE_CLOUD_LOCATION="us-central1" # fallback for REGION +# +# Run: +# mix run examples/providers/vertex_ai_integration_test.exs + +alias Nous.Providers.VertexAI + +project = System.get_env("GOOGLE_CLOUD_PROJECT") + +unless project do + IO.puts(""" + Missing GOOGLE_CLOUD_PROJECT. Set: + export GOOGLE_CLOUD_PROJECT="your-project-id" + + Also provide credentials via one of: + export GOOGLE_CREDENTIALS='' + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/sa.json" + export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token) + """) + + System.halt(1) +end + +# --- Resolve credentials --- +{auth_method, agent_opts} = + cond do + json = System.get_env("GOOGLE_CREDENTIALS") -> + credentials = Jason.decode!(json) + + {:ok, _} = + Goth.start_link(name: Nous.IntegrationGoth, source: {:service_account, credentials}) + + {"Goth (GOOGLE_CREDENTIALS)", %{goth: Nous.IntegrationGoth}} + + path = System.get_env("GOOGLE_APPLICATION_CREDENTIALS") -> + credentials = path |> File.read!() |> Jason.decode!() + + {:ok, _} = + Goth.start_link(name: Nous.IntegrationGoth, source: {:service_account, credentials}) + + {"Goth (GOOGLE_APPLICATION_CREDENTIALS: #{path})", %{goth: Nous.IntegrationGoth}} + + token = System.get_env("VERTEX_AI_ACCESS_TOKEN") -> + {"Access token (VERTEX_AI_ACCESS_TOKEN)", %{api_key: token}} + + true -> + IO.puts( + "No credentials found. Set GOOGLE_CREDENTIALS, GOOGLE_APPLICATION_CREDENTIALS, or VERTEX_AI_ACCESS_TOKEN." + ) + + System.halt(1) + end + +region = + System.get_env("GOOGLE_CLOUD_REGION") || + System.get_env("GOOGLE_CLOUD_LOCATION") || + "us-central1" + +IO.puts("=== Vertex AI Integration Test ===\n") +IO.puts("Project: #{project}") +IO.puts("Region: #{region}") +IO.puts("Auth: #{auth_method}\n") + +# Helper to build agent opts +make_agent = fn model_name, extra_opts -> + opts = + [instructions: "You are a helpful assistant. Be extremely concise — one sentence max."] + |> Keyword.merge(extra_opts) + + default_settings = Map.merge(agent_opts, Keyword.get(opts, :default_settings, %{})) + opts = Keyword.put(opts, :default_settings, default_settings) + + # If using access token, pass as api_key + opts = + if api_key = agent_opts[:api_key] do + Keyword.put_new(opts, :api_key, api_key) + else + opts + end + + Nous.new("vertex_ai:#{model_name}", opts) +end + +passed = 0 +failed = 0 + +run_test = fn name, fun -> + IO.write(" #{name}... ") + + try do + case fun.() do + :ok -> + IO.puts("PASS") + {:ok, :pass} + + {:error, reason} -> + IO.puts("FAIL: #{inspect(reason)}") + {:ok, :fail} + end + rescue + e -> + IO.puts("ERROR: #{Exception.message(e)}") + {:ok, :fail} + end +end + +global_url_flash = VertexAI.endpoint(project, "global", "gemini-3-flash-preview") +global_url_pro = VertexAI.endpoint(project, "global", "gemini-3.1-pro-preview") + +results = + [ + # --- URL construction tests --- + {"v1beta1 URL for preview model", + fn -> + url = VertexAI.endpoint(project, "global", "gemini-3.1-pro-preview") + + if url =~ "/v1beta1/projects/" do + IO.write("(#{url}) ") + :ok + else + {:error, "Expected v1beta1 URL, got: #{url}"} + end + end}, + {"global endpoint uses aiplatform.googleapis.com (no region prefix)", + fn -> + url = VertexAI.endpoint(project, "global", "gemini-3.1-pro-preview") + + if url =~ "https://aiplatform.googleapis.com/" and url =~ "/locations/global" do + IO.write("(#{url}) ") + :ok + else + {:error, "Expected global URL, got: #{url}"} + end + end}, + {"Model.parse base_url is nil (deferred to provider)", + fn -> + model = Nous.Model.parse("vertex_ai:gemini-3.1-pro-preview") + + if model.base_url == nil do + :ok + else + {:error, "Expected nil base_url, got: #{inspect(model.base_url)}"} + end + end}, + + # --- Flash model (global) --- + {"Flash non-streaming (gemini-3-flash-preview, global)", + fn -> + agent = make_agent.("gemini-3-flash-preview", base_url: global_url_flash) + + case Nous.run(agent, "Say hello in exactly 3 words.") do + {:ok, result} -> + IO.write("(#{result.output}) ") + :ok + + {:error, error} -> + {:error, error} + end + end}, + {"Flash streaming (gemini-3-flash-preview, global)", + fn -> + agent = make_agent.("gemini-3-flash-preview", base_url: global_url_flash) + + case Nous.run_stream(agent, "Say goodbye in exactly 3 words.") do + {:ok, stream} -> + IO.write("(") + + stream + |> Enum.each(fn + {:text_delta, text} -> IO.write(text) + {:finish, _} -> IO.write(") ") + _ -> :ok + end) + + :ok + + {:error, error} -> + {:error, error} + end + end}, + + # --- Pro model (global) --- + {"Pro non-streaming (gemini-3.1-pro-preview, global)", + fn -> + agent = make_agent.("gemini-3.1-pro-preview", base_url: global_url_pro) + + case Nous.run(agent, "Say 'pro works' and nothing else.") do + {:ok, result} -> + IO.write("(#{result.output}) ") + :ok + + {:error, error} -> + {:error, error} + end + end}, + {"Pro streaming (gemini-3.1-pro-preview, global)", + fn -> + agent = make_agent.("gemini-3.1-pro-preview", base_url: global_url_pro) + + case Nous.run_stream(agent, "Say 'streaming pro' and nothing else.") do + {:ok, stream} -> + IO.write("(") + + stream + |> Enum.each(fn + {:text_delta, text} -> IO.write(text) + {:finish, _} -> IO.write(") ") + _ -> :ok + end) + + :ok + + {:error, error} -> + {:error, error} + end + end} + ] + +IO.puts("--- Running #{length(results)} tests ---\n") + +outcomes = + Enum.map(results, fn {name, fun} -> + {_, result} = run_test.(name, fun) + result + end) + +pass_count = Enum.count(outcomes, &(&1 == :pass)) +fail_count = Enum.count(outcomes, &(&1 == :fail)) + +IO.puts("\n--- Results: #{pass_count} passed, #{fail_count} failed out of #{length(results)} ---") + +if fail_count > 0 do + System.halt(1) +end diff --git a/examples/providers/vertex_ai_multi_region.exs b/examples/providers/vertex_ai_multi_region.exs new file mode 100644 index 0000000..8121e1c --- /dev/null +++ b/examples/providers/vertex_ai_multi_region.exs @@ -0,0 +1,100 @@ +#!/usr/bin/env elixir + +# Multi-region Vertex AI example +# +# Demonstrates: +# - Testing the same prompt against multiple GCP regions +# - v1 vs v1beta1 API version selection (stable vs preview models) +# - Both streaming and non-streaming requests +# +# Prerequisites: +# export GOOGLE_CLOUD_PROJECT=your-project-id +# export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token) +# +# Run: +# mix run examples/providers/vertex_ai_multi_region.exs + +alias Nous.Providers.VertexAI + +project = System.get_env("GOOGLE_CLOUD_PROJECT") +token = System.get_env("VERTEX_AI_ACCESS_TOKEN") + +unless project && token do + IO.puts(""" + Missing environment variables. Set: + export GOOGLE_CLOUD_PROJECT="your-project-id" + export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token) + """) + + System.halt(1) +end + +regions = ["us-central1", "europe-west1", "asia-northeast1"] +stable_model = "gemini-2.0-flash" +preview_model = "gemini-2.5-pro-preview-06-05" + +IO.puts("=== Vertex AI Multi-Region Test ===\n") +IO.puts("Project: #{project}") +IO.puts("Regions: #{Enum.join(regions, ", ")}\n") + +# Show v1 vs v1beta1 URL selection +IO.puts("--- API Version Selection ---") +IO.puts("Stable (#{stable_model}):") +IO.puts(" #{VertexAI.endpoint(project, "us-central1", stable_model)}") +IO.puts("Preview (#{preview_model}):") +IO.puts(" #{VertexAI.endpoint(project, "us-central1", preview_model)}") +IO.puts("") + +# Test each region with a non-streaming request +IO.puts("--- Non-Streaming: #{stable_model} ---") + +for region <- regions do + base_url = VertexAI.endpoint(project, region, stable_model) + IO.puts("\n[#{region}] #{base_url}") + + agent = + Nous.new("vertex_ai:#{stable_model}", + instructions: "You are a helpful assistant. Be extremely concise.", + base_url: base_url, + api_key: token + ) + + case Nous.run(agent, "What region are you running in? One word answer.") do + {:ok, result} -> + IO.puts("[#{region}] Response: #{result.output}") + + {:error, error} -> + IO.puts("[#{region}] Error: #{inspect(error)}") + end +end + +IO.puts("\n--- Streaming: #{stable_model} ---") + +region = hd(regions) +base_url = VertexAI.endpoint(project, region, stable_model) +IO.puts("\n[#{region}] Streaming...") + +agent = + Nous.new("vertex_ai:#{stable_model}", + instructions: "You are a helpful assistant. Be concise.", + base_url: base_url, + api_key: token + ) + +case Nous.run_stream(agent, "Write a haiku about cloud computing.") do + {:ok, stream} -> + IO.write("[#{region}] ") + + stream + |> Enum.each(fn + {:text_delta, text} -> IO.write(text) + {:thinking_delta, _} -> :ok + {:finish, _} -> IO.puts("") + _other -> :ok + end) + + {:error, error} -> + IO.puts("[#{region}] Streaming error: #{inspect(error)}") +end + +IO.puts("\nDone!") diff --git a/lib/nous/model.ex b/lib/nous/model.ex index d391f0a..7b48792 100644 --- a/lib/nous/model.ex +++ b/lib/nous/model.ex @@ -206,16 +206,8 @@ defmodule Nous.Model do defp default_base_url(:anthropic), do: "https://api.anthropic.com" defp default_base_url(:gemini), do: "https://generativelanguage.googleapis.com/v1beta" - defp default_base_url(:vertex_ai) do - project = System.get_env("GOOGLE_CLOUD_PROJECT") || System.get_env("GCLOUD_PROJECT") - region = System.get_env("GOOGLE_CLOUD_REGION") || "us-central1" - - if project do - "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project}/locations/#{region}" - else - nil - end - end + # Vertex AI URL is built at request time by the provider (with proper v1/v1beta1 selection) + defp default_base_url(:vertex_ai), do: nil defp default_base_url(:groq), do: "https://api.groq.com/openai/v1" defp default_base_url(:ollama), do: "http://localhost:11434/v1" diff --git a/lib/nous/providers/vertex_ai.ex b/lib/nous/providers/vertex_ai.ex index b1d6faf..ef99563 100644 --- a/lib/nous/providers/vertex_ai.ex +++ b/lib/nous/providers/vertex_ai.ex @@ -3,7 +3,21 @@ defmodule Nous.Providers.VertexAI do Google Vertex AI provider implementation. Supports Gemini models via the Vertex AI API, which provides enterprise features - like VPC-SC, CMEK, and regional endpoints. + like VPC-SC, CMEK, IAM, and regional/global endpoints. + + ## Supported Models + + | Model | Model ID | Endpoint | API Version | + |--------------------------|--------------------------------|----------------|-------------| + | Gemini 3.1 Pro (preview) | `gemini-3.1-pro-preview` | global only | v1beta1 | + | Gemini 3 Flash (preview) | `gemini-3-flash-preview` | global only | v1beta1 | + | Gemini 3.1 Flash-Lite | `gemini-3.1-flash-lite-preview`| global only | v1beta1 | + | Gemini 2.5 Pro | `gemini-2.5-pro` | regional/global| v1 | + | Gemini 2.5 Flash | `gemini-2.5-flash` | regional/global| v1 | + | Gemini 2.0 Flash | `gemini-2.0-flash` | regional/global| v1 | + + Preview and experimental models automatically use the `v1beta1` API version. + Stable models use `v1`. This is determined by `api_version_for_model/1`. ## Authentication @@ -15,64 +29,120 @@ defmodule Nous.Providers.VertexAI do 3. `VERTEX_AI_ACCESS_TOKEN` environment variable 4. Application config: `config :nous, :vertex_ai, api_key: "..."` - ### Using Goth (Recommended) + ### Using Goth with a Service Account (Recommended) - If you already use Goth for Google Cloud services (PubSub, etc.), you can reuse it. - Goth handles service account credentials, token caching, and auto-refresh via the - `GOOGLE_APPLICATION_CREDENTIALS` environment variable. + Goth handles OAuth2 token fetching, caching, and auto-refresh from a GCP service account. + Load the service account JSON from an environment variable (no file path dependency): - Add Goth to your deps and supervision tree: + # Set env vars: + # export GOOGLE_CREDENTIALS='{"type":"service_account","project_id":"...",...}' + # export GOOGLE_CLOUD_PROJECT="your-project-id" + # export GOOGLE_CLOUD_LOCATION="global" # required for Gemini 3.x preview # mix.exs {:goth, "~> 1.4"} - # application.ex + # application.ex — start Goth in your supervision tree + credentials = System.get_env("GOOGLE_CREDENTIALS") |> Jason.decode!() + children = [ - {Goth, name: MyApp.Goth} + {Goth, name: MyApp.Goth, source: {:service_account, credentials}} ] Then configure Nous to use it: - # config.exs + # config.exs (recommended for production) config :nous, :vertex_ai, goth: MyApp.Goth - Or pass it per-model: + # Then just use it: + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") + {:ok, result} = Nous.run(agent, "Hello!") + + Or pass Goth per-model (useful for multiple projects): - model = Model.parse("vertex_ai:gemini-2.0-flash", + agent = Nous.new("vertex_ai:gemini-3-flash-preview", default_settings: %{goth: MyApp.Goth} ) ### Using an Access Token - You can pass a pre-obtained token (e.g., from `gcloud auth print-access-token`): + For quick testing without Goth (tokens expire after ~1 hour): + + # export VERTEX_AI_ACCESS_TOKEN="$(gcloud auth print-access-token)" + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") + + Or pass it explicitly: - model = Model.parse("vertex_ai:gemini-2.0-flash", + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview", api_key: System.get_env("VERTEX_AI_ACCESS_TOKEN") ) ## URL Construction - The base URL is constructed from project and region: + The base URL is built at request time from environment variables and the model name. + The provider selects the correct hostname and API version automatically. + + ### Regional Endpoints (for stable models) https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region} - Set via environment variables: + ### Global Endpoint (required for Gemini 3.x preview models) - - `GOOGLE_CLOUD_PROJECT` — GCP project ID - - `GOOGLE_CLOUD_REGION` — GCP region (defaults to `us-central1`) + https://aiplatform.googleapis.com/v1beta1/projects/{project}/locations/global - Or pass `:base_url` explicitly: + ### Environment Variables - model = Model.parse("vertex_ai:gemini-2.0-flash", - base_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" - ) + - `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) — GCP project ID (required) + - `GOOGLE_CLOUD_REGION` (or `GOOGLE_CLOUD_LOCATION`) — GCP region or `global` (defaults to `us-central1`) + + Both `GOOGLE_CLOUD_REGION` and `GOOGLE_CLOUD_LOCATION` are supported, consistent with + other Google Cloud libraries and tooling. `GOOGLE_CLOUD_REGION` takes precedence if both + are set. + + ### Explicit Base URL + + You can override the auto-constructed URL entirely: + + alias Nous.Providers.VertexAI + + # Use the endpoint helper to build the URL with correct API version: + url = VertexAI.endpoint("my-project", "global", "gemini-3.1-pro-preview") + # => "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global" + + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview", base_url: url) + + ## Input Validation + + The provider validates `GOOGLE_CLOUD_PROJECT` and the region at request time and returns + helpful error messages for invalid values (e.g., typos, wrong format) instead of opaque + DNS or HTTP errors. ## Configuration - # In config.exs + # config.exs + config :nous, :vertex_ai, + goth: MyApp.Goth + + # Or with an explicit base_url (overrides env var URL construction): config :nous, :vertex_ai, goth: MyApp.Goth, - base_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" + base_url: "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global" + + ## Examples + + # Gemini 3.1 Pro on global endpoint (preview, v1beta1) + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview") + + # Gemini 3 Flash on global endpoint (preview, v1beta1) + agent = Nous.new("vertex_ai:gemini-3-flash-preview") + + # Gemini 2.0 Flash on regional endpoint (stable, v1) + agent = Nous.new("vertex_ai:gemini-2.0-flash") + + # With explicit region override + agent = Nous.new("vertex_ai:gemini-3.1-pro-preview", + base_url: VertexAI.endpoint("my-project", "global", "gemini-3.1-pro-preview") + ) """ @@ -168,28 +238,46 @@ defmodule Nous.Providers.VertexAI do end @doc """ - Build a Vertex AI endpoint URL from project ID and region. + Build a Vertex AI endpoint URL from project ID, region, and optional model name. Uses `v1beta1` API version for preview/experimental models and `v1` for stable models. If no model name is provided, defaults to `v1`. + When region is `"global"`, uses `aiplatform.googleapis.com` (no region prefix). + Regional endpoints use `{region}-aiplatform.googleapis.com`. + + Gemini 3.x preview models (`gemini-3.1-pro-preview`, `gemini-3-flash-preview`, etc.) + are only available on the global endpoint. + ## Examples + # Regional endpoint, stable model (v1) iex> Nous.Providers.VertexAI.endpoint("my-project", "us-central1") "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" iex> Nous.Providers.VertexAI.endpoint("my-project", "us-central1", "gemini-2.0-flash") "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" - iex> Nous.Providers.VertexAI.endpoint("my-project", "europe-west1", "gemini-3.1-pro-preview") - "https://europe-west1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/europe-west1" + # Global endpoint, preview model (v1beta1) — required for Gemini 3.x + iex> Nous.Providers.VertexAI.endpoint("my-project", "global", "gemini-3.1-pro-preview") + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global" + + iex> Nous.Providers.VertexAI.endpoint("my-project", "global", "gemini-3-flash-preview") + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global" """ @spec endpoint(String.t(), String.t(), String.t() | nil) :: String.t() def endpoint(project_id, region \\ "us-central1", model \\ nil) do api_version = api_version_for_model(model) - "https://#{region}-aiplatform.googleapis.com/#{api_version}/projects/#{project_id}/locations/#{region}" + host = + if region == "global" do + "aiplatform.googleapis.com" + else + "#{region}-aiplatform.googleapis.com" + end + + "https://#{host}/#{api_version}/projects/#{project_id}/locations/#{region}" end @doc """ @@ -211,22 +299,32 @@ defmodule Nous.Providers.VertexAI do # Resolve the base URL from options, app config, or env vars. # When building from env vars, uses the model name to determine the API version. defp resolve_base_url(opts, model) do - url = + explicit_url = Keyword.get(opts, :base_url) || - get_in(Application.get_env(:nous, :vertex_ai, []), [:base_url]) || - build_default_base_url(model) + get_in(Application.get_env(:nous, :vertex_ai, []), [:base_url]) - if url && url != "" do - {:ok, url} - else - {:error, - %{ - reason: :no_base_url, - message: - "No Vertex AI base URL configured. Provide :base_url option, " <> - "set GOOGLE_CLOUD_PROJECT environment variable, or configure " <> - "config :nous, :vertex_ai, base_url: \"...\"" - }} + cond do + explicit_url && explicit_url != "" -> + {:ok, explicit_url} + + true -> + case build_default_base_url(model) do + {:ok, url} -> + {:ok, url} + + {:error, _} = error -> + error + + :not_configured -> + {:error, + %{ + reason: :no_base_url, + message: + "No Vertex AI base URL configured. Provide :base_url option, " <> + "set GOOGLE_CLOUD_PROJECT or GOOGLE_CLOUD_LOCATION environment variable, or configure " <> + "config :nous, :vertex_ai, base_url: \"...\"" + }} + end end end @@ -250,12 +348,58 @@ defmodule Nous.Providers.VertexAI do # Build default base URL from environment variables defp build_default_base_url(model) do project = System.get_env("GOOGLE_CLOUD_PROJECT") || System.get_env("GCLOUD_PROJECT") - region = System.get_env("GOOGLE_CLOUD_REGION") || "us-central1" + + region = + System.get_env("GOOGLE_CLOUD_REGION") || + System.get_env("GOOGLE_CLOUD_LOCATION") || + "us-central1" if project do - endpoint(project, region, model) + with {:ok, project} <- validate_project_id(project), + {:ok, region} <- validate_region(region) do + {:ok, endpoint(project, region, model)} + end else - nil + :not_configured + end + end + + # Validates a GCP project ID matches the expected format. + defp validate_project_id(nil) do + {:error, + %{ + reason: :invalid_project_id, + message: "GOOGLE_CLOUD_PROJECT is not set." + }} + end + + defp validate_project_id(project) do + if Regex.match?(~r/^[a-z][a-z0-9-]{4,28}[a-z0-9]$/, project) do + {:ok, project} + else + {:error, + %{ + reason: :invalid_project_id, + message: + "Invalid GCP project ID: #{inspect(project)}. " <> + "Project IDs must be 6-30 characters, start with a letter, " <> + "and contain only lowercase letters, digits, and hyphens." + }} + end + end + + # Validates a GCP region matches the expected format (e.g., us-central1, global). + defp validate_region(region) do + if Regex.match?(~r/^[a-z]+-[a-z]+\d+$/, region) or region == "global" do + {:ok, region} + else + {:error, + %{ + reason: :invalid_region, + message: + "Invalid GCP region: #{inspect(region)}. " <> + "Expected format like 'us-central1', 'europe-west1', or 'global'." + }} end end diff --git a/mix.exs b/mix.exs index ce60f05..b948acb 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule Nous.MixProject do use Mix.Project - @version "0.12.7" + @version "0.12.8" @source_url "https://github.com/nyo16/nous" def project do diff --git a/test/nous/providers/vertex_ai_test.exs b/test/nous/providers/vertex_ai_test.exs index 97cfdd3..a69f467 100644 --- a/test/nous/providers/vertex_ai_test.exs +++ b/test/nous/providers/vertex_ai_test.exs @@ -1,5 +1,5 @@ defmodule Nous.Providers.VertexAITest do - use ExUnit.Case, async: true + use ExUnit.Case, async: false alias Nous.Providers.VertexAI @@ -39,6 +39,26 @@ defmodule Nous.Providers.VertexAITest do assert VertexAI.endpoint("my-project", "europe-west1") == "https://europe-west1-aiplatform.googleapis.com/v1/projects/my-project/locations/europe-west1" end + + test "uses v1beta1 for preview models" do + assert VertexAI.endpoint("my-project", "us-central1", "gemini-2.5-pro-preview-06-05") == + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1" + end + + test "uses v1beta1 for experimental models" do + assert VertexAI.endpoint("my-project", "us-central1", "gemini-2.0-flash-experimental") == + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1" + end + + test "uses v1 for stable models" do + assert VertexAI.endpoint("my-project", "us-central1", "gemini-2.0-flash") == + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1" + end + + test "global endpoint uses aiplatform.googleapis.com without region prefix" do + assert VertexAI.endpoint("my-project", "global", "gemini-3.1-pro-preview") == + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global" + end end describe "Model.parse/2 integration" do @@ -50,35 +70,19 @@ defmodule Nous.Providers.VertexAITest do assert model.api_key == "test-token" end - test "constructs base_url from env vars" do + test "base_url is nil at parse time (built at request time by the provider)" do System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") System.put_env("GOOGLE_CLOUD_REGION", "europe-west4") try do model = Nous.Model.parse("vertex_ai:gemini-2.0-flash") - - assert model.base_url == - "https://europe-west4-aiplatform.googleapis.com/v1/projects/test-project/locations/europe-west4" + assert model.base_url == nil after System.delete_env("GOOGLE_CLOUD_PROJECT") System.delete_env("GOOGLE_CLOUD_REGION") end end - test "defaults region to us-central1" do - System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") - System.delete_env("GOOGLE_CLOUD_REGION") - - try do - model = Nous.Model.parse("vertex_ai:gemini-2.0-flash") - - assert model.base_url == - "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1" - after - System.delete_env("GOOGLE_CLOUD_PROJECT") - end - end - test "base_url is nil when no project env var set" do System.delete_env("GOOGLE_CLOUD_PROJECT") System.delete_env("GCLOUD_PROJECT") @@ -113,6 +117,95 @@ defmodule Nous.Providers.VertexAITest do end end + describe "URL resolution from env vars" do + test "GOOGLE_CLOUD_LOCATION is used as fallback for region" do + System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") + System.put_env("GOOGLE_CLOUD_LOCATION", "asia-northeast1") + System.delete_env("GOOGLE_CLOUD_REGION") + + try do + # The URL should resolve successfully (will fail at HTTP level, not config level) + {:error, reason} = + VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: "test", base_url: nil) + + # If we get an HTTP error (map with :status), the URL was resolved correctly + refute is_map(reason) and reason[:reason] == :no_base_url + after + System.delete_env("GOOGLE_CLOUD_PROJECT") + System.delete_env("GOOGLE_CLOUD_LOCATION") + end + end + + test "GOOGLE_CLOUD_REGION takes precedence over GOOGLE_CLOUD_LOCATION" do + System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") + System.put_env("GOOGLE_CLOUD_REGION", "europe-west1") + System.put_env("GOOGLE_CLOUD_LOCATION", "asia-northeast1") + + try do + {:error, reason} = + VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: "test", base_url: nil) + + refute is_map(reason) and reason[:reason] == :no_base_url + after + System.delete_env("GOOGLE_CLOUD_PROJECT") + System.delete_env("GOOGLE_CLOUD_REGION") + System.delete_env("GOOGLE_CLOUD_LOCATION") + end + end + + test "invalid project ID returns helpful error" do + System.put_env("GOOGLE_CLOUD_PROJECT", "BAD PROJECT!") + System.delete_env("GOOGLE_CLOUD_REGION") + + try do + {:error, reason} = + VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: "test", base_url: nil) + + assert reason.reason == :invalid_project_id + assert reason.message =~ "Invalid GCP project ID" + assert reason.message =~ "BAD PROJECT!" + after + System.delete_env("GOOGLE_CLOUD_PROJECT") + end + end + + test "invalid region returns helpful error" do + System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") + System.put_env("GOOGLE_CLOUD_REGION", "not a region!") + + try do + {:error, reason} = + VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: "test", base_url: nil) + + assert reason.reason == :invalid_region + assert reason.message =~ "Invalid GCP region" + assert reason.message =~ "not a region!" + after + System.delete_env("GOOGLE_CLOUD_PROJECT") + System.delete_env("GOOGLE_CLOUD_REGION") + end + end + + test "preview models use v1beta1 URL at request time" do + System.put_env("GOOGLE_CLOUD_PROJECT", "test-project") + System.delete_env("GOOGLE_CLOUD_REGION") + + try do + # The model.base_url should be nil (deferred to provider) + model = Nous.Model.parse("vertex_ai:gemini-2.5-pro-preview-06-05", api_key: "test") + assert model.base_url == nil + + # When the provider resolves the URL, it should use v1beta1 + # We can't easily test the full flow without HTTP, but we can verify + # the endpoint function directly + assert VertexAI.endpoint("test-project", "us-central1", "gemini-2.5-pro-preview-06-05") =~ + "v1beta1" + after + System.delete_env("GOOGLE_CLOUD_PROJECT") + end + end + end + describe "token resolution" do test "chat returns error when no credentials available" do System.delete_env("VERTEX_AI_ACCESS_TOKEN")