diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 11797a1..d2a4ac7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ permissions:
 
 env:
   MIX_ENV: test
-  ELIXIR_VERSION: "1.17.3"
+  ELIXIR_VERSION: "1.18.3"
   OTP_VERSION: "27.2"
 
 jobs:
@@ -96,9 +96,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - os: ubuntu-latest
-            elixir: "1.17.3"
-            otp: "27.2"
           - os: ubuntu-latest
             elixir: "1.18.3"
             otp: "27.2"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6cb09ad..38ba0b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,29 @@
 
 All notable changes to this project will be documented in this file.
 
+## [0.13.2] - 2026-03-07
+
+### Added
+
+- **Auto-update memory**: `Nous.Plugins.Memory` can now automatically reflect on conversations and update memories after each run — no explicit tool calls needed. Enable with `auto_update_memory: true` in `memory_config`. Configurable reflection model, frequency, and context limits.
+  - New `after_run/3` callback in `Nous.Plugin` behaviour — runs once after the entire agent run completes. Wired into both `AgentRunner.run/3` and `run_with_context/3`.
+  - `Nous.Plugin.run_after_run/4` helper for executing the hook across all plugins
+  - New config options: `:auto_update_memory`, `:auto_update_every`, `:reflection_model`, `:reflection_max_tokens`, `:reflection_max_messages`, `:reflection_max_memories`
+  - New example: `examples/memory/auto_update.exs`
+
+## [0.13.1] - 2026-03-06
+
+### Added
+
+- **Vertex AI provider**: `Nous.Providers.VertexAI` for accessing Gemini models through Google Cloud Vertex AI. Supports enterprise features (VPC-SC, CMEK, regional endpoints, IAM).
+  - Three auth modes: app config Goth (`config :nous, :vertex_ai, goth: MyApp.Goth`), per-model Goth (`default_settings: %{goth: MyApp.Goth}`), or direct access token (`api_key` / `VERTEX_AI_ACCESS_TOKEN`)
+  - Bearer token auth via `api_key` option, `VERTEX_AI_ACCESS_TOKEN` env var, or Goth integration
+  - Goth integration (`{:goth, "~> 1.4", optional: true}`) for automatic service account token management — reuse existing Goth processes from PubSub, etc.
+  - URL auto-construction from `GOOGLE_CLOUD_PROJECT` and `GOOGLE_CLOUD_REGION` env vars
+  - `Nous.Providers.VertexAI.endpoint/2` helper to build endpoint URLs
+  - Reuses existing Gemini message format, response parsing, and stream normalization
+  - Model string: `"vertex_ai:gemini-2.0-flash"`
+
 ## [0.12.2] - 2026-03-04
 
 ### Fixed
diff --git a/README.md b/README.md
index b6d5706..9f6b6f6 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,8 @@ IO.puts("Tokens: #{result.usage.total_tokens}")
 | LM Studio | `lmstudio:qwen3` | ✅ |
 | OpenAI | `openai:gpt-4` | ✅ |
 | Anthropic | `anthropic:claude-sonnet-4-5-20250929` | ✅ |
+| Google Gemini | `gemini:gemini-2.0-flash` | ✅ |
+| Google Vertex AI | `vertex_ai:gemini-2.0-flash` | ✅ |
 | Groq | `groq:llama-3.1-70b-versatile` | ✅ |
 | Ollama | `ollama:llama2` | ✅ |
 | OpenRouter | `openrouter:anthropic/claude-3.5-sonnet` | ✅ |
@@ -106,9 +108,86 @@ All HTTP providers use pure Elixir HTTP clients (Req + Finch). LlamaCpp runs in-
 agent = Nous.new("lmstudio:qwen3")                  # Local (free)
 agent = Nous.new("openai:gpt-4")                    # OpenAI
 agent = Nous.new("anthropic:claude-sonnet-4-5-20250929")   # Anthropic
+agent = Nous.new("vertex_ai:gemini-2.0-flash")      # Google Vertex AI
 agent = Nous.new("llamacpp:local", llamacpp_model: llm)  # Local NIF
 ```
 
+### Google Vertex AI Setup
+
+Vertex AI provides enterprise access to Gemini models. To use it with a service account:
+
+**1. Create a service account:**
+
+```bash
+export PROJECT_ID="your-project-id"
+
+# Enable Vertex AI API
+gcloud services enable aiplatform.googleapis.com --project=$PROJECT_ID
+
+# Create service account
+gcloud iam service-accounts create nous-vertex-ai \
+  --display-name="Nous Vertex AI" \
+  --project=$PROJECT_ID
+
+# Grant permission
+gcloud projects add-iam-policy-binding $PROJECT_ID \
+  --member="serviceAccount:nous-vertex-ai@${PROJECT_ID}.iam.gserviceaccount.com" \
+  --role="roles/aiplatform.user"
+
+# Download key and store as env var
+gcloud iam service-accounts keys create /tmp/sa.json \
+  --iam-account="nous-vertex-ai@${PROJECT_ID}.iam.gserviceaccount.com"
+
+# Set the env vars
+export GOOGLE_CREDENTIALS="$(cat /tmp/sa.json)"
+export GOOGLE_CLOUD_PROJECT="$PROJECT_ID"
+export GOOGLE_CLOUD_REGION="us-central1"
+```
+
+**2. Add Goth to your deps** (handles token refresh from the service account):
+
+```elixir
+{:goth, "~> 1.4"}
+```
+
+**3. Start Goth in your supervision tree:**
+
+```elixir
+credentials = System.get_env("GOOGLE_CREDENTIALS") |> Jason.decode!()
+
+children = [
+  {Goth, name: MyApp.Goth, source: {:service_account, credentials}}
+]
+```
+
+**4. Configure Nous to use Goth:**
+
+```elixir
+# Option A: Via app config (recommended for production)
+# config/config.exs
+config :nous, :vertex_ai, goth: MyApp.Goth
+
+# Then just use it — no extra options needed:
+agent = Nous.new("vertex_ai:gemini-2.0-flash")
+{:ok, result} = Nous.run(agent, "Hello from Vertex AI!")
+```
+
+```elixir
+# Option B: Per-model (useful for multiple projects/regions)
+agent = Nous.new("vertex_ai:gemini-2.0-flash",
+  default_settings: %{goth: MyApp.Goth}
+)
+```
+
+```elixir
+# Option C: Direct access token (no Goth needed, e.g. for quick testing)
+export VERTEX_AI_ACCESS_TOKEN="$(gcloud auth print-access-token)"
+
+agent = Nous.new("vertex_ai:gemini-2.0-flash")
+```
+
+See [`examples/providers/vertex_ai_goth_test.exs`](examples/providers/vertex_ai_goth_test.exs) for a runnable example.
+
 ## Features
 
 ### Tool Calling
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 844d766..421c022 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -49,6 +49,16 @@ export ANTHROPIC_API_KEY="sk-ant-your-key"
 export OPENAI_API_KEY="sk-your-key"
 ```
 
+**Google Vertex AI:**
+```bash
+export GOOGLE_CLOUD_PROJECT="your-project-id"
+export GOOGLE_CLOUD_REGION="us-central1"  # optional, defaults to us-central1
+# Option A: Use gcloud access token
+export VERTEX_AI_ACCESS_TOKEN="$(gcloud auth print-access-token)"
+# Option B: Use Goth with service account (recommended for production)
+export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
+```
+
 **Test cloud setup:**
 ```bash
 mix run -e "
diff --git a/examples/README.md b/examples/README.md
index 3411566..24cd76e 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -44,6 +44,7 @@ Provider-specific configuration and features:
 | [providers/openai.exs](https://github.com/nyo16/nous/blob/master/examples/providers/openai.exs) | GPT models, function calling, settings |
 | [providers/lmstudio.exs](https://github.com/nyo16/nous/blob/master/examples/providers/lmstudio.exs) | Local AI with LM Studio |
 | [providers/vllm_sglang.exs](https://github.com/nyo16/nous/blob/master/examples/providers/vllm_sglang.exs) | vLLM & SGLang high-performance local inference |
+| [providers/vertex_ai.exs](https://github.com/nyo16/nous/blob/master/examples/providers/vertex_ai.exs) | Google Vertex AI with Goth auth |
 | [providers/llamacpp.exs](https://github.com/nyo16/nous/blob/master/examples/providers/llamacpp.exs) | Local NIF-based inference via llama.cpp |
 | [providers/switching_providers.exs](https://github.com/nyo16/nous/blob/master/examples/providers/switching_providers.exs) | Provider comparison and selection |
 
@@ -59,6 +60,7 @@ Persistent agent memory with hybrid search:
 | [memory/duckdb_full.exs](https://github.com/nyo16/nous/blob/master/examples/memory/duckdb_full.exs) | DuckDB with FTS + vector search |
 | [memory/hybrid_full.exs](https://github.com/nyo16/nous/blob/master/examples/memory/hybrid_full.exs) | Muninn + Zvec for maximum search quality |
 | [memory/cross_agent.exs](https://github.com/nyo16/nous/blob/master/examples/memory/cross_agent.exs) | Two agents sharing memory with scoping |
+| [memory/auto_update.exs](https://github.com/nyo16/nous/blob/master/examples/memory/auto_update.exs) | Auto-update memory after each run (no explicit tool calls) |
 
 ## Advanced Examples
 
diff --git a/examples/memory/auto_update.exs b/examples/memory/auto_update.exs
new file mode 100644
index 0000000..1f5797b
--- /dev/null
+++ b/examples/memory/auto_update.exs
@@ -0,0 +1,82 @@
+# Auto-Update Memory
+#
+# Demonstrates automatic memory updates after each agent run.
+# Instead of the agent explicitly calling remember/recall/forget tools,
+# a reflection step runs after each conversation turn and updates
+# memories automatically — similar to Claude Code's "recalled/wrote memory".
+#
+# Run: OPENAI_API_KEY="sk-..." mix run examples/memory/auto_update.exs
+#
+# You can also use a local model:
+#   mix run examples/memory/auto_update.exs
+
+# Choose a model (local or cloud)
+model = (System.get_env("OPENAI_API_KEY") && "openai:gpt-4o-mini") || "lmstudio:qwen3-4b"
+
+alias Nous.Memory.Store
+
+# Create an agent with auto_update_memory enabled
+agent =
+  Nous.new(model,
+    plugins: [Nous.Plugins.Memory],
+    instructions: "You are a helpful personal assistant. Remember what the user tells you.",
+    deps: %{
+      memory_config: %{
+        store: Store.ETS,
+        auto_update_memory: true,
+        auto_update_every: 1,
+        # Use a cheaper/faster model for the reflection step (optional)
+        # reflection_model: "openai:gpt-4o-mini",
+        reflection_max_tokens: 500
+      }
+    }
+  )
+
+IO.puts("=== Auto-Update Memory Demo ===\n")
+
+# Turn 1: Tell the agent something personal
+IO.puts("--- Turn 1 ---")
+{:ok, result1} = Nous.run(agent, "My name is Alice and I work as a data scientist at Acme Corp.")
+IO.puts("Agent: #{result1.output}\n")
+
+# Check what memories were auto-created
+store_state = result1.context.deps[:memory_config][:store_state]
+{:ok, memories} = Store.ETS.list(store_state, [])
+IO.puts("Memories after turn 1 (#{length(memories)}):")
+for m <- memories, do: IO.puts("  - [#{m.type}] #{m.content}")
+IO.puts("")
+
+# Turn 2: Continue the conversation (pass context for continuity)
+IO.puts("--- Turn 2 ---")
+
+{:ok, result2} =
+  Nous.run(agent, "Actually, I just switched jobs. I'm now at TechCorp as a ML engineer.",
+    context: result1.context
+  )
+
+IO.puts("Agent: #{result2.output}\n")
+
+# Check memories again — should have updated, not duplicated
+store_state = result2.context.deps[:memory_config][:store_state]
+{:ok, memories} = Store.ETS.list(store_state, [])
+IO.puts("Memories after turn 2 (#{length(memories)}):")
+for m <- memories, do: IO.puts("  - [#{m.type}] (#{m.id |> String.slice(0..7)}) #{m.content}")
+IO.puts("")
+
+# Turn 3: Ask something that requires memory
+IO.puts("--- Turn 3 ---")
+{:ok, result3} = Nous.run(agent, "What do you know about me?", context: result2.context)
+IO.puts("Agent: #{result3.output}\n")
+
+# Final memory state
+store_state = result3.context.deps[:memory_config][:store_state]
+{:ok, memories} = Store.ETS.list(store_state, [])
+IO.puts("=== Final Memory State (#{length(memories)} memories) ===")
+
+for m <- memories do
+  IO.puts("  [#{m.type}, importance: #{m.importance}] #{m.content}")
+end
+
+run_count = result3.context.deps[:memory_config][:_run_count]
+IO.puts("\nReflection runs completed: #{run_count}")
+IO.puts("Done!")
diff --git a/examples/providers/vertex_ai.exs b/examples/providers/vertex_ai.exs
new file mode 100644
index 0000000..a6b0126
--- /dev/null
+++ b/examples/providers/vertex_ai.exs
@@ -0,0 +1,136 @@
+#!/usr/bin/env elixir
+
+# Nous AI - Google Vertex AI Provider
+#
+# Vertex AI provides enterprise access to Gemini models with features like
+# VPC-SC, CMEK, regional endpoints, and IAM-based access control.
+#
+# Prerequisites:
+#   - A Google Cloud project with Vertex AI API enabled
+#   - Authentication (one of):
+#     a) Access token: `export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token)`
+#     b) Goth with service account: `export GOOGLE_APPLICATION_CREDENTIALS=/path/to/sa.json`
+#   - Project configuration:
+#     `export GOOGLE_CLOUD_PROJECT=your-project-id`
+#     `export GOOGLE_CLOUD_REGION=us-central1`  (optional, defaults to us-central1)
+
+IO.puts("=== Nous AI - Vertex AI Provider ===\n")
+
+# ============================================================================
+# Option 1: Using environment variables
+# ============================================================================
+
+IO.puts("--- Setup with Environment Variables ---")
+
+project = System.get_env("GOOGLE_CLOUD_PROJECT")
+token = System.get_env("VERTEX_AI_ACCESS_TOKEN")
+
+if project && token do
+  IO.puts("Project: #{project}")
+  IO.puts("Region: #{System.get_env("GOOGLE_CLOUD_REGION", "us-central1")}\n")
+
+  # With env vars set, just use the model string
+  agent =
+    Nous.new("vertex_ai:gemini-2.0-flash",
+      instructions: "You are a helpful assistant. Be concise."
+    )
+
+  case Nous.run(agent, "What is Elixir? Answer in one sentence.") do
+    {:ok, result} ->
+      IO.puts("Response: #{result.output}")
+      IO.puts("Tokens: #{result.usage.total_tokens}")
+
+    {:error, error} ->
+      IO.puts("Error: #{inspect(error)}")
+  end
+else
+  IO.puts("""
+  Skipping: Set these environment variables to test:
+    export GOOGLE_CLOUD_PROJECT=your-project-id
+    export VERTEX_AI_ACCESS_TOKEN=$(gcloud auth print-access-token)
+  """)
+end
+
+IO.puts("")
+
+# ============================================================================
+# Option 2: Explicit configuration
+# ============================================================================
+
+IO.puts("--- Explicit Configuration ---")
+
+IO.puts("""
+# Pass base_url and api_key directly:
+model = Nous.Model.parse("vertex_ai:gemini-2.0-flash",
+  base_url: Nous.Providers.VertexAI.endpoint("my-project", "us-central1"),
+  api_key: access_token
+)
+""")
+
+# ============================================================================
+# Option 3: Using Goth (recommended for production)
+# ============================================================================
+
+IO.puts("--- Goth Integration (Production) ---")
+
+IO.puts("""
+# 1. Add {:goth, "~> 1.4"} to your deps
+# 2. Start Goth in your supervision tree:
+#
+#    children = [
+#      {Goth, name: MyApp.Goth}
+#    ]
+#
+# 3. Set GOOGLE_APPLICATION_CREDENTIALS to your service account JSON
+# 4. Configure Nous:
+#
+#    config :nous, :vertex_ai,
+#      goth: MyApp.Goth,
+#      base_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+#
+# 5. Use it:
+#    agent = Nous.new("vertex_ai:gemini-2.0-flash")
+""")
+
+# ============================================================================
+# Streaming
+# ============================================================================
+
+IO.puts("--- Streaming ---")
+
+if project && token do
+  agent =
+    Nous.new("vertex_ai:gemini-2.0-flash",
+      instructions: "You are a helpful assistant."
+    )
+
+  case Nous.run_stream(agent, "Write a haiku about Elixir.") do
+    {:ok, stream} ->
+      stream
+      |> Enum.each(fn
+        {:text_delta, text} -> IO.write(text)
+        {:finish, _} -> IO.puts("\n")
+        _ -> :ok
+      end)
+
+    {:error, error} ->
+      IO.puts("Streaming error: #{inspect(error)}")
+  end
+else
+  IO.puts("Skipping streaming demo (no credentials)\n")
+end
+
+# ============================================================================
+# Available Gemini Models on Vertex AI
+# ============================================================================
+
+IO.puts("--- Available Models ---")
+
+IO.puts("""
+Model                          | Description
+-------------------------------|-------------------------------------------
+gemini-2.0-flash               | Fast, efficient for most tasks
+gemini-2.0-flash-lite          | Lightweight, lowest latency
+gemini-2.5-pro-preview-06-05   | Most capable, best for complex reasoning
+gemini-2.5-flash-preview-05-20 | Balanced speed and capability
+""")
diff --git a/examples/providers/vertex_ai_goth_test.exs b/examples/providers/vertex_ai_goth_test.exs
new file mode 100644
index 0000000..e3e2dde
--- /dev/null
+++ b/examples/providers/vertex_ai_goth_test.exs
@@ -0,0 +1,74 @@
+#!/usr/bin/env elixir
+
+# Quick test script for Vertex AI with service account (Goth)
+#
+# Prerequisites:
+#   export GOOGLE_CREDENTIALS='{"type":"service_account","project_id":"...","private_key":"...",...}'
+#   export GOOGLE_CLOUD_PROJECT="your-project-id"
+#   export GOOGLE_CLOUD_REGION="europe-west1"  # optional, defaults to europe-west1 (Frankfurt)
+#
+# Run:
+#   mix run test_vertex_ai.exs
+
+credentials_json = System.get_env("GOOGLE_CREDENTIALS")
+project = System.get_env("GOOGLE_CLOUD_PROJECT")
+
+unless credentials_json && project do
+  IO.puts("""
+  Missing environment variables. Set:
+    export GOOGLE_CREDENTIALS='<service account JSON content>'
+    export GOOGLE_CLOUD_PROJECT="your-project-id"
+  """)
+
+  System.halt(1)
+end
+
+IO.puts("=== Vertex AI Test with Service Account ===\n")
+IO.puts("Project: #{project}")
+IO.puts("Region: #{System.get_env("GOOGLE_CLOUD_REGION", "europe-west1")}\n")
+
+# Start Goth with service account credentials from env var
+credentials = Jason.decode!(credentials_json)
+
+{:ok, _} = Goth.start_link(name: Nous.TestGoth, source: {:service_account, credentials})
+
+IO.puts("Goth started successfully.\n")
+
+# --- Test 1: Non-streaming ---
+IO.puts("--- Test 1: Non-streaming ---")
+
+agent =
+  Nous.new("vertex_ai:gemini-3.1-pro",
+    instructions: "You are a helpful assistant. Be concise.",
+    default_settings: %{goth: Nous.TestGoth}
+  )
+
+case Nous.run(agent, "What is Elixir? Answer in one sentence.") do
+  {:ok, result} ->
+    IO.puts("Response: #{result.output}")
+    IO.puts("Tokens: #{result.usage.total_tokens}")
+
+  {:error, error} ->
+    IO.puts("Error: #{inspect(error)}")
+end
+
+IO.puts("")
+
+# --- Test 2: Streaming ---
+IO.puts("--- Test 2: Streaming ---")
+
+case Nous.run_stream(agent, "Write a haiku about functional programming.") do
+  {:ok, stream} ->
+    stream
+    |> Enum.each(fn
+      {:text_delta, text} -> IO.write(text)
+      {:thinking_delta, _} -> :ok
+      {:finish, _} -> IO.puts("")
+      other -> IO.puts("\n[Event: #{inspect(other)}]")
+    end)
+
+  {:error, error} ->
+    IO.puts("Streaming error: #{inspect(error)}")
+end
+
+IO.puts("\nDone!")
diff --git a/lib/nous/agent_runner.ex b/lib/nous/agent_runner.ex
index cdf31ac..134036a 100644
--- a/lib/nous/agent_runner.ex
+++ b/lib/nous/agent_runner.ex
@@ -111,6 +111,15 @@ defmodule Nous.AgentRunner do
         case behaviour.extract_output(agent, final_ctx) do
           {:ok, output} ->
             agent_result = build_result(agent, final_ctx, output)
+
+            # Run after_run plugin hooks
+            updated_ctx = Plugin.run_after_run(agent.plugins, agent, agent_result, final_ctx)
+
+            agent_result =
+              if updated_ctx != final_ctx,
+                do: build_result(agent, updated_ctx, output),
+                else: agent_result
+
             duration_ms = System.convert_time_unit(duration, :native, :millisecond)
 
             Logger.info("""
@@ -195,7 +204,15 @@ defmodule Nous.AgentRunner do
       {:ok, final_ctx} ->
         case behaviour.extract_output(agent, final_ctx) do
           {:ok, output} ->
-            {:ok, build_result(agent, final_ctx, output)}
+            agent_result = build_result(agent, final_ctx, output)
+            updated_ctx = Plugin.run_after_run(agent.plugins, agent, agent_result, final_ctx)
+
+            agent_result =
+              if updated_ctx != final_ctx,
+                do: build_result(agent, updated_ctx, output),
+                else: agent_result
+
+            {:ok, agent_result}
 
           {:error, %Errors.ValidationError{} = err} ->
             max_retries = Keyword.get(agent.structured_output, :max_retries, 0)
diff --git a/lib/nous/memory.ex b/lib/nous/memory.ex
index 96bbf2e..1167987 100644
--- a/lib/nous/memory.ex
+++ b/lib/nous/memory.ex
@@ -66,7 +66,9 @@ defmodule Nous.Memory do
          |> Map.put_new(:inject_min_score, 0.3)
          |> Map.put_new(:decay_lambda, 0.001)
          |> Map.put_new(:default_search_scope, :agent)
-         |> Map.put_new(:scoring_weights, relevance: 0.5, importance: 0.3, recency: 0.2)}
+         |> Map.put_new(:scoring_weights, relevance: 0.5, importance: 0.3, recency: 0.2)
+         |> Map.put_new(:auto_update_memory, false)
+         |> Map.put_new(:auto_update_every, 1)}
     end
   end
 
diff --git a/lib/nous/messages.ex b/lib/nous/messages.ex
index 48cac6d..c31aa5f 100644
--- a/lib/nous/messages.ex
+++ b/lib/nous/messages.ex
@@ -237,6 +237,9 @@ defmodule Nous.Messages do
       :gemini ->
         to_gemini_format(messages)
 
+      :vertex_ai ->
+        to_gemini_format(messages)
+
       :mistral ->
         to_openai_format(messages)
 
@@ -250,7 +253,7 @@ defmodule Nous.Messages do
         raise ArgumentError, """
         Unsupported provider: #{inspect(provider)}
 
-        Supported providers: :openai, :openai_compatible, :groq, :lmstudio, :llamacpp, :vllm, :sglang, :anthropic, :gemini, :mistral
+        Supported providers: :openai, :openai_compatible, :groq, :lmstudio, :llamacpp, :vllm, :sglang, :anthropic, :gemini, :vertex_ai, :mistral
         """
     end
   end
@@ -351,6 +354,9 @@ defmodule Nous.Messages do
       :gemini ->
         from_gemini_response(response)
 
+      :vertex_ai ->
+        from_gemini_response(response)
+
       :mistral ->
         from_openai_response(response)
 
@@ -364,7 +370,7 @@ defmodule Nous.Messages do
         raise ArgumentError, """
         Unsupported provider: #{inspect(provider)}
 
-        Supported providers: :openai, :openai_compatible, :groq, :lmstudio, :llamacpp, :vllm, :sglang, :anthropic, :gemini, :mistral
+        Supported providers: :openai, :openai_compatible, :groq, :lmstudio, :llamacpp, :vllm, :sglang, :anthropic, :gemini, :vertex_ai, :mistral
         """
     end
   end
diff --git a/lib/nous/model.ex b/lib/nous/model.ex
index 9fab170..d391f0a 100644
--- a/lib/nous/model.ex
+++ b/lib/nous/model.ex
@@ -18,6 +18,7 @@ defmodule Nous.Model do
           :openai
           | :anthropic
           | :gemini
+          | :vertex_ai
           | :groq
           | :ollama
           | :lmstudio
@@ -63,6 +64,7 @@ defmodule Nous.Model do
     * `"openai:gpt-4"` - OpenAI models
     * `"anthropic:claude-3-5-sonnet-20241022"` - Anthropic Claude
     * `"gemini:gemini-1.5-pro"` - Google Gemini
+    * `"vertex_ai:gemini-2.0-flash"` - Google Vertex AI
     * `"groq:llama-3.1-70b-versatile"` - Groq models
     * `"mistral:mistral-large-latest"` - Mistral models
     * `"ollama:llama2"` - Local Ollama
@@ -95,6 +97,7 @@ defmodule Nous.Model do
   def parse("openai:" <> model_name, opts), do: new(:openai, model_name, opts)
   def parse("anthropic:" <> model_name, opts), do: new(:anthropic, model_name, opts)
   def parse("gemini:" <> model_name, opts), do: new(:gemini, model_name, opts)
+  def parse("vertex_ai:" <> model_name, opts), do: new(:vertex_ai, model_name, opts)
   def parse("groq:" <> model_name, opts), do: new(:groq, model_name, opts)
   def parse("mistral:" <> model_name, opts), do: new(:mistral, model_name, opts)
   def parse("ollama:" <> model_name, opts), do: new(:ollama, model_name, opts)
@@ -147,7 +150,7 @@ defmodule Nous.Model do
     raise ArgumentError,
           "Invalid model string format: #{inspect(invalid_string)}. " <>
             "Expected format: \"provider:model-name\". " <>
-            "Supported providers: openai, anthropic, gemini, groq, mistral, ollama, lmstudio, llamacpp, openrouter, together, vllm, sglang, custom"
+            "Supported providers: openai, anthropic, gemini, vertex_ai, groq, mistral, ollama, lmstudio, llamacpp, openrouter, together, vllm, sglang, custom"
   end
 
   @doc """
@@ -202,6 +205,18 @@ defmodule Nous.Model do
   defp default_base_url(:openai), do: "https://api.openai.com/v1"
   defp default_base_url(:anthropic), do: "https://api.anthropic.com"
   defp default_base_url(:gemini), do: "https://generativelanguage.googleapis.com/v1beta"
+
+  defp default_base_url(:vertex_ai) do
+    project = System.get_env("GOOGLE_CLOUD_PROJECT") || System.get_env("GCLOUD_PROJECT")
+    region = System.get_env("GOOGLE_CLOUD_REGION") || "us-central1"
+
+    if project do
+      "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project}/locations/#{region}"
+    else
+      nil
+    end
+  end
+
   defp default_base_url(:groq), do: "https://api.groq.com/openai/v1"
   defp default_base_url(:ollama), do: "http://localhost:11434/v1"
   defp default_base_url(:lmstudio), do: "http://localhost:1234/v1"
@@ -218,6 +233,7 @@ defmodule Nous.Model do
   defp default_api_key(:openai), do: Application.get_env(:nous, :openai_api_key)
   defp default_api_key(:anthropic), do: Application.get_env(:nous, :anthropic_api_key)
   defp default_api_key(:gemini), do: Application.get_env(:nous, :google_ai_api_key)
+  defp default_api_key(:vertex_ai), do: Application.get_env(:nous, :vertex_ai_api_key)
   defp default_api_key(:groq), do: Application.get_env(:nous, :groq_api_key)
   defp default_api_key(:openrouter), do: Application.get_env(:nous, :openrouter_api_key)
   defp default_api_key(:together), do: Application.get_env(:nous, :together_api_key)
diff --git a/lib/nous/model_dispatcher.ex b/lib/nous/model_dispatcher.ex
index a2331e6..18f095b 100644
--- a/lib/nous/model_dispatcher.ex
+++ b/lib/nous/model_dispatcher.ex
@@ -32,6 +32,11 @@ defmodule Nous.ModelDispatcher do
     Providers.Gemini.request(model, messages, settings)
   end
 
+  def request(%Model{provider: :vertex_ai} = model, messages, settings) do
+    Logger.debug("Routing to Vertex AI provider for model: #{model.model}")
+    Providers.VertexAI.request(model, messages, settings)
+  end
+
   def request(%Model{provider: :mistral} = model, messages, settings) do
     Logger.debug("Routing to Mistral provider for model: #{model.model}")
     Providers.Mistral.request(model, messages, settings)
@@ -82,6 +87,11 @@ defmodule Nous.ModelDispatcher do
     Providers.Gemini.request_stream(model, messages, settings)
   end
 
+  def request_stream(%Model{provider: :vertex_ai} = model, messages, settings) do
+    Logger.debug("Routing streaming request to Vertex AI provider for model: #{model.model}")
+    Providers.VertexAI.request_stream(model, messages, settings)
+  end
+
   def request_stream(%Model{provider: :mistral} = model, messages, settings) do
     Logger.debug("Routing streaming request to Mistral provider for model: #{model.model}")
     Providers.Mistral.request_stream(model, messages, settings)
@@ -132,6 +142,10 @@ defmodule Nous.ModelDispatcher do
     Providers.Gemini.count_tokens(messages)
   end
 
+  def count_tokens(%Model{provider: :vertex_ai}, messages) do
+    Providers.VertexAI.count_tokens(messages)
+  end
+
   def count_tokens(%Model{provider: :mistral}, messages) do
     Providers.Mistral.count_tokens(messages)
   end
diff --git a/lib/nous/plugin.ex b/lib/nous/plugin.ex
index 99f353d..787e5fb 100644
--- a/lib/nous/plugin.ex
+++ b/lib/nous/plugin.ex
@@ -36,9 +36,15 @@ defmodule Nous.Plugin do
 
   ## Callback Execution Order
 
-  Plugins are executed in list order. For `before_request`, the context
-  flows through each plugin. For `after_response`, the context is passed
-  through each plugin in order.
+  Plugins are executed in list order. The context flows through each plugin
+  sequentially for all hooks:
+
+  1. `init/2` — once at run start
+  2. `system_prompt/2` — once, fragments joined into system message
+  3. `tools/2` — once per iteration, tools collected
+  4. `before_request/3` — before each LLM call
+  5. `after_response/3` — after each LLM response
+  6. `after_run/3` — once after the entire run completes (post-loop)
 
   ## Built-in Plugins
 
@@ -96,7 +102,23 @@ defmodule Nous.Plugin do
             ) ::
               Context.t()
 
-  @optional_callbacks [init: 2, tools: 2, system_prompt: 2, before_request: 3, after_response: 3]
+  @doc """
+  Post-process after the entire agent run completes.
+
+  Receives the agent, the final result map, and the final context.
+  Return the updated context. Use this for end-of-run housekeeping
+  like auto-updating memory.
+  """
+  @callback after_run(agent :: Nous.Agent.t(), result :: map(), ctx :: Context.t()) :: Context.t()
+
+  @optional_callbacks [
+    init: 2,
+    tools: 2,
+    system_prompt: 2,
+    before_request: 3,
+    after_response: 3,
+    after_run: 3
+  ]
 
   # Plugin execution helpers
 
@@ -182,6 +204,20 @@ defmodule Nous.Plugin do
     end)
   end
 
+  @doc """
+  Run `after_run/3` across all plugins, threading context.
+  """
+  @spec run_after_run([module()], Nous.Agent.t(), map(), Context.t()) :: Context.t()
+  def run_after_run(plugins, agent, result, ctx) do
+    Enum.reduce(plugins, ctx, fn plugin, acc_ctx ->
+      if exports?(plugin, :after_run, 3) do
+        plugin.after_run(agent, result, acc_ctx)
+      else
+        acc_ctx
+      end
+    end)
+  end
+
   # Ensure module is loaded before checking exports
   defp exports?(module, function, arity) do
     Code.ensure_loaded(module)
diff --git a/lib/nous/plugins/memory.ex b/lib/nous/plugins/memory.ex
index d544b27..560cd79 100644
--- a/lib/nous/plugins/memory.ex
+++ b/lib/nous/plugins/memory.ex
@@ -54,13 +54,47 @@ defmodule Nous.Plugins.Memory do
   **Optional — scoring:**
     * `:scoring_weights` - `[relevance: 0.5, importance: 0.3, recency: 0.2]`
     * `:decay_lambda` - Temporal decay rate (default: 0.001)
+
+  **Optional — auto-update (after_run reflection):**
+    * `:auto_update_memory` - Automatically reflect on conversation and update memories after each run (default: `false`)
+    * `:auto_update_every` - Run reflection every N runs (default: `1`)
+    * `:reflection_model` - Model string for reflection LLM call (default: agent's own model). Use a cheaper model like `"openai:gpt-4o-mini"` to save costs.
+    * `:reflection_max_tokens` - Max tokens for reflection response (default: `1000`)
+    * `:reflection_max_messages` - Max recent messages to include in reflection context (default: `20`)
+    * `:reflection_max_memories` - Max existing memories to include in reflection context (default: `50`)
+
+  ## Auto-Update Memory
+
+  When `:auto_update_memory` is `true`, the plugin runs an `after_run/3` hook
+  after each agent run completes. It reflects on the conversation and outputs
+  a JSON array of memory operations (`remember`, `update`, `forget`) — similar
+  to Claude Code's "recalled/wrote memory" behavior.
+
+      agent = Nous.new("openai:gpt-4o",
+        plugins: [Nous.Plugins.Memory],
+        deps: %{
+          memory_config: %{
+            store: Nous.Memory.Store.ETS,
+            auto_update_memory: true,
+            reflection_model: "openai:gpt-4o-mini"
+          }
+        }
+      )
+
+      {:ok, result} = Nous.run(agent, "My favorite color is blue")
+      # Memory automatically stored after run completes
+
+      {:ok, result2} = Nous.run(agent, "I changed my mind, it's green",
+        context: result.context
+      )
+      # Memory automatically updated (not duplicated)
   """
 
   @behaviour Nous.Plugin
 
   require Logger
 
-  alias Nous.Memory.{Search, Tools}
+  alias Nous.Memory.{Embedding, Entry, Search, Tools}
 
   @impl true
   def init(_agent, ctx) do
@@ -90,6 +124,12 @@ defmodule Nous.Plugins.Memory do
             |> Map.put_new(:decay_lambda, 0.001)
             |> Map.put_new(:default_search_scope, :agent)
             |> Map.put(:_inject_done, false)
+            |> Map.put_new(:auto_update_memory, false)
+            |> Map.put_new(:auto_update_every, 1)
+            |> Map.put_new(:reflection_max_tokens, 1000)
+            |> Map.put_new(:reflection_max_messages, 20)
+            |> Map.put_new(:reflection_max_memories, 50)
+            |> Map.put_new(:_run_count, 0)
 
           %{ctx | deps: Map.put(ctx.deps, :memory_config, updated_config)}
 
@@ -145,10 +185,289 @@ defmodule Nous.Plugins.Memory do
     end
   end
 
+  @impl true
+  def after_run(agent, _result, ctx) do
+    config = ctx.deps[:memory_config] || %{}
+
+    if config[:auto_update_memory] == true && config[:store_state] != nil do
+      run_count = (config[:_run_count] || 0) + 1
+      auto_update_every = config[:auto_update_every] || 1
+
+      ctx =
+        if rem(run_count, auto_update_every) == 0 do
+          do_memory_reflection(agent, ctx, config)
+        else
+          ctx
+        end
+
+      # Always persist the updated run count
+      updated_config = Map.put(ctx.deps[:memory_config] || config, :_run_count, run_count)
+      %{ctx | deps: Map.put(ctx.deps, :memory_config, updated_config)}
+    else
+      ctx
+    end
+  end
+
   # ---------------------------------------------------------------------------
   # Private
   # ---------------------------------------------------------------------------
 
+  defp do_memory_reflection(agent, ctx, config) do
+    store_mod = config[:store]
+    store_state = config[:store_state]
+    max_messages = config[:reflection_max_messages] || 20
+    max_memories = config[:reflection_max_memories] || 50
+    max_tokens = config[:reflection_max_tokens] || 1000
+
+    # Determine model for reflection: explicit config, or fall back to agent's model
+    reflection_model =
+      config[:reflection_model] ||
+        "#{agent.model.provider}:#{agent.model.model}"
+
+    if reflection_model do
+      # 1. Format recent conversation messages (skip system messages)
+      conversation_text = format_conversation(ctx.messages, max_messages)
+
+      # 2. Fetch existing memories
+      scope = build_memory_scope(config)
+
+      existing_memories =
+        case store_mod.list(store_state, scope: scope, limit: max_memories) do
+          {:ok, entries} -> entries
+          _ -> []
+        end
+
+      memories_text = format_existing_memories(existing_memories)
+
+      # 3. Build reflection prompt and call LLM
+      prompt = build_reflection_prompt(conversation_text, memories_text)
+
+      case Nous.LLM.generate_text(reflection_model, prompt,
+             system: reflection_system_prompt(),
+             max_tokens: max_tokens,
+             temperature: 0.0
+           ) do
+        {:ok, response_text} ->
+          apply_reflection_operations(ctx, config, response_text)
+
+        {:error, reason} ->
+          Logger.warning("Memory auto-update reflection failed: #{inspect(reason)}")
+          ctx
+      end
+    else
+      ctx
+    end
+  end
+
+  defp format_conversation(messages, max_messages) do
+    messages
+    |> Enum.reject(fn msg -> msg.role == :system end)
+    |> Enum.take(-max_messages)
+    |> Enum.map(fn msg ->
+      role = msg.role |> to_string() |> String.capitalize()
+      content = if is_binary(msg.content), do: msg.content, else: inspect(msg.content)
+      "#{role}: #{content}"
+    end)
+    |> Enum.join("\n")
+  end
+
+  defp format_existing_memories([]), do: "No existing memories."
+
+  defp format_existing_memories(entries) do
+    entries
+    |> Enum.map(fn entry ->
+      "[#{entry.id}] (#{entry.type}, importance: #{entry.importance}) #{entry.content}"
+    end)
+    |> Enum.join("\n")
+  end
+
+  defp build_memory_scope(config) do
+    case config[:default_search_scope] do
+      :global -> :global
+      :session -> scope_from_config(config, [:agent_id, :session_id, :user_id])
+      :user -> scope_from_config(config, [:user_id])
+      _ -> scope_from_config(config, [:agent_id, :user_id])
+    end
+  end
+
+  defp reflection_system_prompt do
+    """
+    You are a memory management assistant. Analyze the conversation and existing memories, \
+    then output a JSON array of memory operations. Each operation is an object with these fields:
+
+    - "action": one of "remember", "update", or "forget"
+    - "content": the memory content (required for "remember" and "update")
+    - "type": one of "semantic", "episodic", "procedural" (default: "semantic")
+    - "importance": a float 0.0–1.0 (default: 0.5)
+    - "id": the memory ID (required for "update" and "forget")
+
+    Rules:
+    - Only output the JSON array, nothing else. No markdown fences.
+    - "remember" creates a new memory.
+    - "update" modifies an existing memory's content (provide the id).
+    - "forget" deletes an outdated or incorrect memory (provide the id).
+    - Be selective. Only store information worth remembering long-term.
+    - Prefer updating existing memories over creating duplicates.
+    - If there is nothing worth remembering, output an empty array: []
+    """
+  end
+
+  defp build_reflection_prompt(conversation_text, memories_text) do
+    """
+    ## Recent Conversation
+    #{conversation_text}
+
+    ## Existing Memories
+    #{memories_text}
+
+    Based on the conversation above, what memory operations should be performed? \
+    Output a JSON array of operations.
+    """
+  end
+
+  @doc false
+  def apply_reflection_operations(ctx, config, response_text) do
+    case parse_reflection_json(response_text) do
+      {:ok, operations} when is_list(operations) ->
+        Enum.reduce(operations, ctx, fn op, acc_ctx ->
+          apply_single_operation(acc_ctx, config, op)
+        end)
+
+      _ ->
+        Logger.warning("Memory auto-update: failed to parse reflection response")
+        ctx
+    end
+  end
+
+  defp parse_reflection_json(text) do
+    # Strip potential markdown fences
+    cleaned =
+      text
+      |> String.trim()
+      |> String.replace(~r/^```(?:json)?\s*/m, "")
+      |> String.replace(~r/\s*```$/m, "")
+      |> String.trim()
+
+    case JSON.decode(cleaned) do
+      {:ok, parsed} when is_list(parsed) -> {:ok, parsed}
+      {:ok, _} -> {:error, :not_an_array}
+      {:error, _} = err -> err
+    end
+  end
+
+  defp apply_single_operation(ctx, config, %{"action" => "remember"} = op) do
+    store_mod = config[:store]
+    store_state = (ctx.deps[:memory_config] || config)[:store_state]
+    content = op["content"]
+
+    unless content do
+      ctx
+    else
+      embedding = maybe_embed(config, content)
+
+      entry =
+        Entry.new(%{
+          content: content,
+          type: parse_memory_type(op["type"]),
+          importance: op["importance"] || 0.5,
+          embedding: embedding,
+          agent_id: config[:agent_id],
+          session_id: config[:session_id],
+          user_id: config[:user_id],
+          namespace: config[:namespace]
+        })
+
+      case store_mod.store(store_state, entry) do
+        {:ok, new_state} ->
+          Logger.debug("Memory auto-update: remembered #{entry.id} — #{content}")
+          updated_config = Map.put(ctx.deps[:memory_config] || config, :store_state, new_state)
+          %{ctx | deps: Map.put(ctx.deps, :memory_config, updated_config)}
+
+        {:error, reason} ->
+          Logger.warning("Memory auto-update: store failed: #{inspect(reason)}")
+          ctx
+      end
+    end
+  end
+
+  defp apply_single_operation(ctx, config, %{"action" => "update", "id" => id} = op)
+       when is_binary(id) do
+    store_mod = config[:store]
+    store_state = (ctx.deps[:memory_config] || config)[:store_state]
+    content = op["content"]
+
+    updates =
+      %{}
+      |> then(fn m -> if content, do: Map.put(m, :content, content), else: m end)
+      |> then(fn m ->
+        if op["importance"], do: Map.put(m, :importance, op["importance"]), else: m
+      end)
+      |> then(fn m ->
+        if op["type"], do: Map.put(m, :type, parse_memory_type(op["type"])), else: m
+      end)
+
+    # Re-embed if content changed
+    updates =
+      if content do
+        case maybe_embed(config, content) do
+          nil -> updates
+          emb -> Map.put(updates, :embedding, emb)
+        end
+      else
+        updates
+      end
+
+    case store_mod.update(store_state, id, updates) do
+      {:ok, new_state} ->
+        Logger.debug("Memory auto-update: updated #{id}")
+        updated_config = Map.put(ctx.deps[:memory_config] || config, :store_state, new_state)
+        %{ctx | deps: Map.put(ctx.deps, :memory_config, updated_config)}
+
+      {:error, reason} ->
+        Logger.warning("Memory auto-update: update failed for #{id}: #{inspect(reason)}")
+        ctx
+    end
+  end
+
+  defp apply_single_operation(ctx, config, %{"action" => "forget", "id" => id})
+       when is_binary(id) do
+    store_mod = config[:store]
+    store_state = (ctx.deps[:memory_config] || config)[:store_state]
+
+    case store_mod.delete(store_state, id) do
+      {:ok, new_state} ->
+        Logger.debug("Memory auto-update: forgot #{id}")
+        updated_config = Map.put(ctx.deps[:memory_config] || config, :store_state, new_state)
+        %{ctx | deps: Map.put(ctx.deps, :memory_config, updated_config)}
+
+      {:error, reason} ->
+        Logger.warning("Memory auto-update: forget failed for #{id}: #{inspect(reason)}")
+        ctx
+    end
+  end
+
+  defp apply_single_operation(ctx, _config, op) do
+    Logger.warning("Memory auto-update: unrecognized operation: #{inspect(op)}")
+    ctx
+  end
+
+  defp maybe_embed(config, content) do
+    embedding_provider = config[:embedding]
+    embedding_opts = config[:embedding_opts] || []
+
+    if embedding_provider do
+      case Embedding.embed(embedding_provider, content, embedding_opts) do
+        {:ok, emb} -> emb
+        {:error, _} -> nil
+      end
+    end
+  end
+
+  defp parse_memory_type("semantic"), do: :semantic
+  defp parse_memory_type("episodic"), do: :episodic
+  defp parse_memory_type("procedural"), do: :procedural
+  defp parse_memory_type(_), do: :semantic
+
   defp should_inject_this_iteration?(config) do
     case config[:inject_strategy] do
       :every_iteration -> true
diff --git a/lib/nous/provider.ex b/lib/nous/provider.ex
index 0ad97bf..6c13673 100644
--- a/lib/nous/provider.ex
+++ b/lib/nous/provider.ex
@@ -436,6 +436,7 @@ defmodule Nous.Provider do
         case @provider_id do
           :anthropic -> Nous.StreamNormalizer.Anthropic
           :gemini -> Nous.StreamNormalizer.Gemini
+          :vertex_ai -> Nous.StreamNormalizer.Gemini
           _ -> Nous.StreamNormalizer.OpenAI
         end
       end
diff --git a/lib/nous/providers/vertex_ai.ex b/lib/nous/providers/vertex_ai.ex
new file mode 100644
index 0000000..5062592
--- /dev/null
+++ b/lib/nous/providers/vertex_ai.ex
@@ -0,0 +1,297 @@
+defmodule Nous.Providers.VertexAI do
+  @moduledoc """
+  Google Vertex AI provider implementation.
+
+  Supports Gemini models via the Vertex AI API, which provides enterprise features
+  like VPC-SC, CMEK, and regional endpoints.
+
+  ## Authentication
+
+  Vertex AI uses OAuth2 Bearer tokens (not API keys like Google AI).
+  Token resolution order:
+
+  1. `:api_key` option passed directly (treated as a Bearer access token)
+  2. Goth integration — if a Goth process name is configured, fetches tokens automatically
+  3. `VERTEX_AI_ACCESS_TOKEN` environment variable
+  4. Application config: `config :nous, :vertex_ai, api_key: "..."`
+
+  ### Using Goth (Recommended)
+
+  If you already use Goth for Google Cloud services (PubSub, etc.), you can reuse it.
+  Goth handles service account credentials, token caching, and auto-refresh via the
+  `GOOGLE_APPLICATION_CREDENTIALS` environment variable.
+
+  Add Goth to your deps and supervision tree:
+
+      # mix.exs
+      {:goth, "~> 1.4"}
+
+      # application.ex
+      children = [
+        {Goth, name: MyApp.Goth}
+      ]
+
+  Then configure Nous to use it:
+
+      # config.exs
+      config :nous, :vertex_ai, goth: MyApp.Goth
+
+  Or pass it per-model:
+
+      model = Model.parse("vertex_ai:gemini-2.0-flash",
+        default_settings: %{goth: MyApp.Goth}
+      )
+
+  ### Using an Access Token
+
+  You can pass a pre-obtained token (e.g., from `gcloud auth print-access-token`):
+
+      model = Model.parse("vertex_ai:gemini-2.0-flash",
+        api_key: System.get_env("VERTEX_AI_ACCESS_TOKEN")
+      )
+
+  ## URL Construction
+
+  The base URL is constructed from project and region:
+
+      https://{region}-aiplatform.googleapis.com/v1/projects/{project}/locations/{region}
+
+  Set via environment variables:
+
+  - `GOOGLE_CLOUD_PROJECT` — GCP project ID
+  - `GOOGLE_CLOUD_REGION` — GCP region (defaults to `us-central1`)
+
+  Or pass `:base_url` explicitly:
+
+      model = Model.parse("vertex_ai:gemini-2.0-flash",
+        base_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+      )
+
+  ## Configuration
+
+      # In config.exs
+      config :nous, :vertex_ai,
+        goth: MyApp.Goth,
+        base_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+
+  """
+
+  use Nous.Provider,
+    id: :vertex_ai,
+    # Default is constructed dynamically from env vars in resolve_base_url/1
+    default_base_url: "",
+    default_env_key: "VERTEX_AI_ACCESS_TOKEN"
+
+  alias Nous.Providers.HTTP
+
+  require Logger
+
+  @default_timeout 60_000
+  @streaming_timeout 120_000
+
+  # Override to convert from generic format to Gemini's format (same API format)
+  defp build_request_params(model, messages, settings) do
+    merged_settings = Map.merge(model.default_settings, settings)
+
+    # Reuse Gemini message format — Vertex AI uses the same content structure
+    {system_prompt, contents} = Nous.Messages.to_provider_format(messages, :gemini)
+
+    params = %{"contents" => contents}
+
+    params =
+      if system_prompt do
+        Map.put(params, "systemInstruction", %{"parts" => [%{"text" => system_prompt}]})
+      else
+        params
+      end
+
+    # Map generic settings to Gemini's generationConfig
+    generation_config =
+      %{}
+      |> maybe_put("temperature", merged_settings[:temperature])
+      |> maybe_put("maxOutputTokens", merged_settings[:max_tokens])
+      |> maybe_put("topP", merged_settings[:top_p])
+      |> maybe_put("stopSequences", merged_settings[:stop_sequences] || merged_settings[:stop])
+
+    # Merge any explicit generationConfig from settings
+    generation_config =
+      Map.merge(generation_config, merged_settings[:generationConfig] || %{})
+
+    if map_size(generation_config) > 0 do
+      Map.put(params, "generationConfig", generation_config)
+    else
+      params
+    end
+  end
+
+  # Use Gemini stream normalizer — same response format
+  defp default_stream_normalizer, do: Nous.StreamNormalizer.Gemini
+
+  @impl Nous.Provider
+  def chat(params, opts \\ []) do
+    model = Map.get(params, "model") || Map.get(params, :model) || "gemini-2.0-flash"
+
+    with {:ok, token} <- resolve_token(opts),
+         {:ok, url_base} <- resolve_base_url(opts) do
+      url = build_url(url_base, model, :generate)
+      headers = build_headers(token)
+      timeout = Keyword.get(opts, :timeout, @default_timeout)
+
+      # Remove model from params (it's in the URL)
+      body = params |> Map.delete("model") |> Map.delete(:model)
+
+      HTTP.post(url, body, headers, timeout: timeout)
+    end
+  end
+
+  @impl Nous.Provider
+  def chat_stream(params, opts \\ []) do
+    model = Map.get(params, "model") || Map.get(params, :model) || "gemini-2.0-flash"
+
+    with {:ok, token} <- resolve_token(opts),
+         {:ok, url_base} <- resolve_base_url(opts) do
+      url = build_url(url_base, model, :stream)
+      headers = build_headers(token)
+      timeout = Keyword.get(opts, :timeout, @streaming_timeout)
+      finch_name = Keyword.get(opts, :finch_name, Nous.Finch)
+
+      # Remove model from params (it's in the URL)
+      body = params |> Map.delete("model") |> Map.delete(:model)
+
+      # Vertex AI with ?alt=sse returns SSE format (default parser)
+      HTTP.stream(url, body, headers, timeout: timeout, finch_name: finch_name)
+    end
+  end
+
+  @doc """
+  Build a Vertex AI endpoint URL from project ID and region.
+
+  ## Examples
+
+      iex> Nous.Providers.VertexAI.endpoint("my-project", "us-central1")
+      "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+
+  """
+  @spec endpoint(String.t(), String.t()) :: String.t()
+  def endpoint(project_id, region \\ "us-central1") do
+    "https://#{region}-aiplatform.googleapis.com/v1/projects/#{project_id}/locations/#{region}"
+  end
+
+  # Resolve the base URL from options, app config, or env vars
+  defp resolve_base_url(opts) do
+    url =
+      Keyword.get(opts, :base_url) ||
+        get_in(Application.get_env(:nous, :vertex_ai, []), [:base_url]) ||
+        build_default_base_url()
+
+    if url && url != "" do
+      {:ok, url}
+    else
+      {:error,
+       %{
+         reason: :no_base_url,
+         message:
+           "No Vertex AI base URL configured. Provide :base_url option, " <>
+             "set GOOGLE_CLOUD_PROJECT environment variable, or configure " <>
+             "config :nous, :vertex_ai, base_url: \"...\""
+       }}
+    end
+  end
+
+  # Build URL for Vertex AI endpoints
+  defp build_url(base_url, model, :generate) do
+    "#{base_url}/publishers/google/models/#{model}:generateContent"
+  end
+
+  defp build_url(base_url, model, :stream) do
+    "#{base_url}/publishers/google/models/#{model}:streamGenerateContent?alt=sse"
+  end
+
+  # Build headers with Bearer token auth
+  defp build_headers(token) do
+    [
+      {"content-type", "application/json"},
+      {"authorization", "Bearer #{token}"}
+    ]
+  end
+
+  # Build default base URL from environment variables
+  defp build_default_base_url do
+    project = System.get_env("GOOGLE_CLOUD_PROJECT") || System.get_env("GCLOUD_PROJECT")
+    region = System.get_env("GOOGLE_CLOUD_REGION") || "us-central1"
+
+    if project do
+      endpoint(project, region)
+    else
+      nil
+    end
+  end
+
+  # Resolve access token from multiple sources
+  defp resolve_token(opts) do
+    cond do
+      # 1. Direct api_key option
+      token = api_key(opts) ->
+        {:ok, token}
+
+      # 2. Goth integration
+      goth_name = goth_name(opts) ->
+        fetch_goth_token(goth_name)
+
+      true ->
+        {:error,
+         %{
+           reason: :no_credentials,
+           message:
+             "No Vertex AI credentials found. Provide :api_key, configure Goth, " <>
+               "or set VERTEX_AI_ACCESS_TOKEN environment variable."
+         }}
+    end
+  end
+
+  # Get the configured Goth process name
+  defp goth_name(opts) do
+    Keyword.get(opts, :goth) ||
+      get_in(Application.get_env(:nous, :vertex_ai, []), [:goth])
+  end
+
+  # Override build_provider_opts to pass goth name from model settings
+  defp build_provider_opts(model) do
+    opts = [
+      base_url: model.base_url,
+      api_key: model.api_key,
+      timeout: model.receive_timeout,
+      finch_name: Application.get_env(:nous, :finch, Nous.Finch)
+    ]
+
+    # Pass goth name from model's default_settings if present
+    if goth = model.default_settings[:goth] do
+      Keyword.put(opts, :goth, goth)
+    else
+      opts
+    end
+  end
+
+  if Code.ensure_loaded?(Goth) do
+    defp fetch_goth_token(goth_name) do
+      case Goth.fetch(goth_name) do
+        {:ok, %{token: token}} ->
+          {:ok, token}
+
+        {:error, reason} ->
+          Logger.error("Failed to fetch Goth token: #{inspect(reason)}")
+          {:error, %{reason: :goth_error, message: "Goth token fetch failed", details: reason}}
+      end
+    end
+  else
+    defp fetch_goth_token(_goth_name) do
+      {:error,
+       %{
+         reason: :goth_not_available,
+         message:
+           "Goth is not available. Add {:goth, \"~> 1.4\"} to your deps " <>
+             "or provide an access token via :api_key option."
+       }}
+    end
+  end
+end
diff --git a/mix.exs b/mix.exs
index d5ba082..1bee83b 100644
--- a/mix.exs
+++ b/mix.exs
@@ -1,14 +1,14 @@
 defmodule Nous.MixProject do
   use Mix.Project
 
-  @version "0.13.0"
+  @version "0.13.2"
   @source_url "https://github.com/nyo16/nous"
 
   def project do
     [
       app: :nous,
       version: @version,
-      elixir: "~> 1.15",
+      elixir: "~> 1.18",
       start_permanent: Mix.env() == :prod,
       deps: deps(),
       docs: docs(),
@@ -49,6 +49,9 @@ defmodule Nous.MixProject do
       {:finch, "~> 0.19"},
       {:req, "~> 0.5"},
 
+      # Google Cloud auth for Vertex AI (optional — add to your app's deps to unlock)
+      {:goth, "~> 1.4", optional: true},
+
       # HTML parsing (for web content extraction in research tools)
       {:floki, "~> 0.36", optional: true},
 
@@ -161,6 +164,7 @@ defmodule Nous.MixProject do
           Nous.Providers.OpenAICompatible,
           Nous.Providers.Anthropic,
           Nous.Providers.LMStudio,
+          Nous.Providers.VertexAI,
           Nous.Providers.LlamaCpp,
           Nous.StreamNormalizer.LlamaCpp
         ],
diff --git a/mix.lock b/mix.lock
index 952b2a1..61e6c3f 100644
--- a/mix.lock
+++ b/mix.lock
@@ -25,6 +25,7 @@
   "fine": {:hex, :fine, "0.1.4", "b19a89c1476c7c57afb5f9314aed5960b5bc95d5277de4cb5ee8e1d1616ce379", [:mix], [], "hexpm", "be3324cc454a42d80951cf6023b9954e9ff27c6daa255483b3e8d608670303f5"},
   "floki": {:hex, :floki, "0.38.0", "62b642386fa3f2f90713f6e231da0fa3256e41ef1089f83b6ceac7a3fd3abf33", [:mix], [], "hexpm", "a5943ee91e93fb2d635b612caf5508e36d37548e84928463ef9dd986f0d1abd9"},
   "gemini_ex": {:hex, :gemini_ex, "0.8.6", "616af1e6c71d9540c088331d703cbc0b46ddb03f160b060ab4d8c8b3f22f9014", [:mix], [{:altar, "~> 0.1.2", [hex: :altar, repo: "hexpm", optional: false]}, {:gun, "~> 2.1", [hex: :gun, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.4", [hex: :jason, repo: "hexpm", optional: false]}, {:joken, "~> 2.6.2", [hex: :joken, repo: "hexpm", optional: false]}, {:req, "~> 0.5.10", [hex: :req, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.3.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:typed_struct, "~> 0.3.0", [hex: :typed_struct, repo: "hexpm", optional: false]}], "hexpm", "ee1ac602c59c0284e0f588e4a8dc2e3620d299a0cc4defb352c06c1a6f4623d9"},
+  "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"},
   "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"},
   "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
   "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
diff --git a/test/nous/plugin_test.exs b/test/nous/plugin_test.exs
index 5b7369f..e62cdbb 100644
--- a/test/nous/plugin_test.exs
+++ b/test/nous/plugin_test.exs
@@ -49,6 +49,11 @@ defmodule Nous.PluginTest do
     def after_response(_agent, _response, ctx) do
       Context.merge_deps(ctx, %{full_plugin_after: true})
     end
+
+    @impl true
+    def after_run(_agent, _result, ctx) do
+      Context.merge_deps(ctx, %{full_plugin_after_run: true})
+    end
   end
 
   # A second plugin to test threading/composition
@@ -294,6 +299,32 @@ defmodule Nous.PluginTest do
     end
   end
 
+  describe "run_after_run/4" do
+    test "threads context through all plugins", %{agent: agent, ctx: ctx} do
+      result = %{output: "test"}
+
+      result_ctx = Plugin.run_after_run([FullPlugin], agent, result, ctx)
+
+      assert result_ctx.deps[:full_plugin_after_run] == true
+    end
+
+    test "skips plugins that do not implement after_run", %{agent: agent, ctx: ctx} do
+      result = %{output: "test"}
+
+      result_ctx = Plugin.run_after_run([MinimalPlugin, FullPlugin], agent, result, ctx)
+
+      assert result_ctx.deps[:full_plugin_after_run] == true
+    end
+
+    test "returns original context with empty plugins list", %{agent: agent, ctx: ctx} do
+      result = %{output: "test"}
+
+      result_ctx = Plugin.run_after_run([], agent, result, ctx)
+
+      assert result_ctx == ctx
+    end
+  end
+
   describe "all optional callbacks safely skipped" do
     test "MinimalPlugin works as a no-op for every hook", %{agent: agent, ctx: ctx} do
       plugins = [MinimalPlugin]
@@ -319,6 +350,11 @@ defmodule Nous.PluginTest do
       response = Message.assistant("test")
       ar_ctx = Plugin.run_after_response(plugins, agent, response, ctx)
       assert ar_ctx == ctx
+
+      # run_after_run
+      result = %{output: "test"}
+      ar_run_ctx = Plugin.run_after_run(plugins, agent, result, ctx)
+      assert ar_run_ctx == ctx
     end
   end
 end
diff --git a/test/nous/plugins/memory_auto_update_test.exs b/test/nous/plugins/memory_auto_update_test.exs
new file mode 100644
index 0000000..1445c15
--- /dev/null
+++ b/test/nous/plugins/memory_auto_update_test.exs
@@ -0,0 +1,246 @@
+defmodule Nous.Plugins.MemoryAutoUpdateTest do
+  use ExUnit.Case, async: true
+
+  alias Nous.Agent
+  alias Nous.Agent.Context
+  alias Nous.Memory.{Entry, Store}
+  alias Nous.Message
+  alias Nous.Plugins.Memory, as: MemoryPlugin
+
+  setup do
+    agent =
+      Agent.new("openai:gpt-4",
+        plugins: [MemoryPlugin],
+        instructions: "Be helpful"
+      )
+
+    config = %{
+      store: Store.ETS,
+      auto_update_memory: true,
+      auto_update_every: 1
+    }
+
+    ctx = Context.new(deps: %{memory_config: config})
+    ctx = MemoryPlugin.init(agent, ctx)
+
+    %{agent: agent, ctx: ctx}
+  end
+
+  describe "after_run/3 — disabled" do
+    test "is a no-op when auto_update_memory is false", %{agent: agent} do
+      config = %{store: Store.ETS, auto_update_memory: false}
+      ctx = Context.new(deps: %{memory_config: config})
+      ctx = MemoryPlugin.init(agent, ctx)
+
+      result_ctx = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx)
+
+      # Context unchanged — no _run_count increment
+      assert result_ctx == ctx
+    end
+
+    test "is a no-op when store is not initialized", %{agent: agent} do
+      ctx = Context.new(deps: %{memory_config: %{auto_update_memory: true}})
+
+      result_ctx = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx)
+
+      assert result_ctx == ctx
+    end
+  end
+
+  describe "after_run/3 — run counter" do
+    test "increments _run_count on each call", %{agent: agent, ctx: ctx} do
+      # Stub the reflection to be a no-op by using a non-existent model
+      # We just want to verify counter behavior, so set auto_update_every very high
+      config = Map.put(ctx.deps[:memory_config], :auto_update_every, 1000)
+      ctx = %{ctx | deps: Map.put(ctx.deps, :memory_config, config)}
+
+      ctx1 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx)
+      assert ctx1.deps[:memory_config][:_run_count] == 1
+
+      ctx2 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx1)
+      assert ctx2.deps[:memory_config][:_run_count] == 2
+
+      ctx3 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx2)
+      assert ctx3.deps[:memory_config][:_run_count] == 3
+    end
+  end
+
+  describe "apply_reflection_operations/3" do
+    test "handles remember operations", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      json =
+        ~s([{"action": "remember", "content": "User likes blue", "type": "semantic", "importance": 0.8}])
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      # Verify the memory was stored
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, entries} = Store.ETS.list(store_state, [])
+      assert length(entries) == 1
+      assert hd(entries).content == "User likes blue"
+      assert hd(entries).importance == 0.8
+      assert hd(entries).type == :semantic
+    end
+
+    test "handles update operations", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+      store_state = config[:store_state]
+
+      # Store an initial memory
+      entry = Entry.new(%{content: "User likes blue", importance: 0.5})
+      {:ok, store_state} = Store.ETS.store(store_state, entry)
+      config = Map.put(config, :store_state, store_state)
+      ctx = %{ctx | deps: Map.put(ctx.deps, :memory_config, config)}
+
+      json =
+        ~s([{"action": "update", "id": "#{entry.id}", "content": "User likes green", "importance": 0.9}])
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      # Verify the memory was updated
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, updated} = Store.ETS.fetch(store_state, entry.id)
+      assert updated.content == "User likes green"
+      assert updated.importance == 0.9
+    end
+
+    test "handles forget operations", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+      store_state = config[:store_state]
+
+      # Store an initial memory
+      entry = Entry.new(%{content: "Outdated info"})
+      {:ok, store_state} = Store.ETS.store(store_state, entry)
+      config = Map.put(config, :store_state, store_state)
+      ctx = %{ctx | deps: Map.put(ctx.deps, :memory_config, config)}
+
+      json = ~s([{"action": "forget", "id": "#{entry.id}"}])
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      assert {:error, :not_found} = Store.ETS.fetch(store_state, entry.id)
+    end
+
+    test "handles multiple operations in sequence", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+      store_state = config[:store_state]
+
+      # Store a memory to update and one to forget
+      entry1 = Entry.new(%{content: "Old fact"})
+      entry2 = Entry.new(%{content: "Wrong info"})
+      {:ok, store_state} = Store.ETS.store(store_state, entry1)
+      {:ok, store_state} = Store.ETS.store(store_state, entry2)
+      config = Map.put(config, :store_state, store_state)
+      ctx = %{ctx | deps: Map.put(ctx.deps, :memory_config, config)}
+
+      json = """
+      [
+        {"action": "remember", "content": "New fact"},
+        {"action": "update", "id": "#{entry1.id}", "content": "Updated fact"},
+        {"action": "forget", "id": "#{entry2.id}"}
+      ]
+      """
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, entries} = Store.ETS.list(store_state, [])
+
+      contents = Enum.map(entries, & &1.content) |> Enum.sort()
+      assert "New fact" in contents
+      assert "Updated fact" in contents
+      refute "Wrong info" in contents
+      assert length(entries) == 2
+    end
+
+    test "handles malformed JSON gracefully", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, "not json at all")
+
+      # Context unchanged
+      assert result_ctx.deps[:memory_config][:store_state] ==
+               ctx.deps[:memory_config][:store_state]
+    end
+
+    test "handles empty array", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, "[]")
+
+      assert result_ctx.deps[:memory_config][:store_state] ==
+               ctx.deps[:memory_config][:store_state]
+    end
+
+    test "handles JSON wrapped in markdown fences", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      json = """
+      ```json
+      [{"action": "remember", "content": "Fenced memory"}]
+      ```
+      """
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, entries} = Store.ETS.list(store_state, [])
+      assert length(entries) == 1
+      assert hd(entries).content == "Fenced memory"
+    end
+
+    test "skips unrecognized operations gracefully", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      json = ~s([{"action": "unknown", "content": "something"}])
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, entries} = Store.ETS.list(store_state, [])
+      assert entries == []
+    end
+
+    test "skips remember with missing content", %{ctx: ctx} do
+      config = ctx.deps[:memory_config]
+
+      json = ~s([{"action": "remember"}])
+
+      result_ctx = MemoryPlugin.apply_reflection_operations(ctx, config, json)
+
+      store_state = result_ctx.deps[:memory_config][:store_state]
+      {:ok, entries} = Store.ETS.list(store_state, [])
+      assert entries == []
+    end
+  end
+
+  describe "after_run/3 — auto_update_every" do
+    test "only triggers reflection at the right interval", %{agent: agent, ctx: ctx} do
+      # Set auto_update_every to 3, and don't set a reflection model
+      # so reflection will "fail" gracefully — we verify via _run_count and store state
+      config =
+        ctx.deps[:memory_config]
+        |> Map.put(:auto_update_every, 3)
+
+      ctx = %{ctx | deps: Map.put(ctx.deps, :memory_config, config)}
+
+      # Add a user message so format_conversation has something to work with
+      ctx = Context.add_message(ctx, Message.user("Hello"))
+
+      # Run 1 — no reflection (1 % 3 != 0)
+      ctx1 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx)
+      assert ctx1.deps[:memory_config][:_run_count] == 1
+
+      # Run 2 — no reflection (2 % 3 != 0)
+      ctx2 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx1)
+      assert ctx2.deps[:memory_config][:_run_count] == 2
+
+      # Run 3 — reflection fires (3 % 3 == 0), but may fail due to no real LLM
+      # That's OK — we just verify the counter incremented
+      ctx3 = MemoryPlugin.after_run(agent, %{output: "hi"}, ctx2)
+      assert ctx3.deps[:memory_config][:_run_count] == 3
+    end
+  end
+end
diff --git a/test/nous/providers/provider_test.exs b/test/nous/providers/provider_test.exs
index 8c6551b..80eb6b3 100644
--- a/test/nous/providers/provider_test.exs
+++ b/test/nous/providers/provider_test.exs
@@ -400,6 +400,7 @@ defmodule Nous.ProviderTest do
         Nous.Providers.OpenAICompatible,
         Nous.Providers.Anthropic,
         Nous.Providers.Gemini,
+        Nous.Providers.VertexAI,
         Nous.Providers.Mistral,
         Nous.Providers.LMStudio,
         Nous.Providers.VLLM,
diff --git a/test/nous/providers/vertex_ai_test.exs b/test/nous/providers/vertex_ai_test.exs
new file mode 100644
index 0000000..97cfdd3
--- /dev/null
+++ b/test/nous/providers/vertex_ai_test.exs
@@ -0,0 +1,185 @@
+defmodule Nous.Providers.VertexAITest do
+  use ExUnit.Case, async: true
+
+  alias Nous.Providers.VertexAI
+
+  describe "provider configuration" do
+    test "has correct provider ID" do
+      assert VertexAI.provider_id() == :vertex_ai
+    end
+
+    test "has correct env key" do
+      assert VertexAI.default_env_key() == "VERTEX_AI_ACCESS_TOKEN"
+    end
+
+    test "implements required callbacks" do
+      Code.ensure_loaded!(VertexAI)
+      functions = VertexAI.__info__(:functions)
+
+      assert {:chat, 1} in functions or {:chat, 2} in functions
+      assert {:chat_stream, 1} in functions or {:chat_stream, 2} in functions
+      assert {:count_tokens, 1} in functions
+      assert {:request, 3} in functions
+      assert {:request_stream, 3} in functions
+    end
+  end
+
+  describe "endpoint/2" do
+    test "builds correct endpoint URL" do
+      assert VertexAI.endpoint("my-project", "us-central1") ==
+               "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+    end
+
+    test "defaults to us-central1 region" do
+      assert VertexAI.endpoint("my-project") ==
+               "https://us-central1-aiplatform.googleapis.com/v1/projects/my-project/locations/us-central1"
+    end
+
+    test "supports other regions" do
+      assert VertexAI.endpoint("my-project", "europe-west1") ==
+               "https://europe-west1-aiplatform.googleapis.com/v1/projects/my-project/locations/europe-west1"
+    end
+  end
+
+  describe "Model.parse/2 integration" do
+    test "parses vertex_ai model string" do
+      model = Nous.Model.parse("vertex_ai:gemini-2.0-flash", api_key: "test-token")
+
+      assert model.provider == :vertex_ai
+      assert model.model == "gemini-2.0-flash"
+      assert model.api_key == "test-token"
+    end
+
+    test "constructs base_url from env vars" do
+      System.put_env("GOOGLE_CLOUD_PROJECT", "test-project")
+      System.put_env("GOOGLE_CLOUD_REGION", "europe-west4")
+
+      try do
+        model = Nous.Model.parse("vertex_ai:gemini-2.0-flash")
+
+        assert model.base_url ==
+                 "https://europe-west4-aiplatform.googleapis.com/v1/projects/test-project/locations/europe-west4"
+      after
+        System.delete_env("GOOGLE_CLOUD_PROJECT")
+        System.delete_env("GOOGLE_CLOUD_REGION")
+      end
+    end
+
+    test "defaults region to us-central1" do
+      System.put_env("GOOGLE_CLOUD_PROJECT", "test-project")
+      System.delete_env("GOOGLE_CLOUD_REGION")
+
+      try do
+        model = Nous.Model.parse("vertex_ai:gemini-2.0-flash")
+
+        assert model.base_url ==
+                 "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1"
+      after
+        System.delete_env("GOOGLE_CLOUD_PROJECT")
+      end
+    end
+
+    test "base_url is nil when no project env var set" do
+      System.delete_env("GOOGLE_CLOUD_PROJECT")
+      System.delete_env("GCLOUD_PROJECT")
+
+      model = Nous.Model.parse("vertex_ai:gemini-2.0-flash", api_key: "test")
+      assert model.base_url == nil
+    end
+
+    test "explicit base_url overrides env vars" do
+      System.put_env("GOOGLE_CLOUD_PROJECT", "env-project")
+
+      try do
+        model =
+          Nous.Model.parse("vertex_ai:gemini-2.0-flash",
+            base_url: "https://custom-endpoint.example.com/v1",
+            api_key: "test"
+          )
+
+        assert model.base_url == "https://custom-endpoint.example.com/v1"
+      after
+        System.delete_env("GOOGLE_CLOUD_PROJECT")
+      end
+    end
+
+    test "passes goth name through default_settings" do
+      model =
+        Nous.Model.parse("vertex_ai:gemini-2.0-flash",
+          default_settings: %{goth: MyApp.Goth}
+        )
+
+      assert model.default_settings[:goth] == MyApp.Goth
+    end
+  end
+
+  describe "token resolution" do
+    test "chat returns error when no credentials available" do
+      System.delete_env("VERTEX_AI_ACCESS_TOKEN")
+
+      {:error, reason} = VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: nil)
+
+      assert reason.reason == :no_credentials
+    end
+
+    test "chat returns error when no base_url available" do
+      System.delete_env("GOOGLE_CLOUD_PROJECT")
+      System.delete_env("GCLOUD_PROJECT")
+
+      {:error, reason} =
+        VertexAI.chat(%{"model" => "gemini-2.0-flash"}, api_key: "test", base_url: nil)
+
+      assert reason.reason == :no_base_url
+    end
+  end
+
+  describe "message format" do
+    test "uses Gemini format for to_provider_format" do
+      messages = [Nous.Message.system("Be helpful"), Nous.Message.user("Hello")]
+
+      {system_prompt, contents} = Nous.Messages.to_provider_format(messages, :vertex_ai)
+
+      assert system_prompt == "Be helpful"
+      assert [%{"role" => "user", "parts" => [%{"text" => "Hello"}]}] = contents
+    end
+
+    test "uses Gemini format for from_provider_response" do
+      response = %{
+        "candidates" => [
+          %{
+            "content" => %{
+              "parts" => [%{"text" => "Hello from Vertex AI"}],
+              "role" => "model"
+            }
+          }
+        ],
+        "usageMetadata" => %{
+          "promptTokenCount" => 10,
+          "candidatesTokenCount" => 5,
+          "totalTokenCount" => 15
+        }
+      }
+
+      message = Nous.Messages.from_provider_response(response, :vertex_ai)
+
+      assert message.role == :assistant
+      assert message.content == "Hello from Vertex AI"
+    end
+  end
+
+  describe "ModelDispatcher routing" do
+    test "routes vertex_ai to VertexAI provider" do
+      # We can't make a real request but we can verify the module is callable
+      model = %Nous.Model{
+        provider: :vertex_ai,
+        model: "gemini-2.0-flash",
+        api_key: nil,
+        base_url: nil,
+        default_settings: %{}
+      }
+
+      # This will fail at the HTTP level, but we verify routing works
+      assert {:error, _} = Nous.ModelDispatcher.request(model, [Nous.Message.user("test")], %{})
+    end
+  end
+end