diff --git a/.env.example b/.env.example index 57dad4963f..fb0a62a3d0 100644 --- a/.env.example +++ b/.env.example @@ -211,6 +211,50 @@ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # MCP_SESSION_POOL_ENABLED=false # ANYIO_CANCEL_DELIVERY_PATCH_ENABLED=false +# Rust MCP (simple) +# RUST_MCP_BUILD=false # build the Rust MCP runtime into Containerfile.lite images +# RUST_MCP_MODE=off # off | shadow | edge | full +# RUST_MCP_LOG=warn # default Rust sidecar log filter for the simple mode flow +# +# RUST_MCP_MODE=shadow -> Rust sidecar enabled, but public /mcp stays on Python for safe fallback +# RUST_MCP_MODE=edge -> direct public /mcp on Rust with managed UDS sidecar defaults +# RUST_MCP_MODE=full -> edge + Rust session/event-store/resume/live-stream/affinity cores +# +# Advanced Rust MCP overrides +# RUST_MCP_SESSION_AUTH_REUSE=false # advanced override for the fast direct public Rust session-auth path; prefer RUST_MCP_MODE presets above +# EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED= +# EXPERIMENTAL_RUST_MCP_RUNTIME_URL=http://127.0.0.1:8787 +# EXPERIMENTAL_RUST_MCP_RUNTIME_UDS=/tmp/contextforge-mcp-rust.sock +# EXPERIMENTAL_RUST_MCP_RUNTIME_TIMEOUT_SECONDS=30 +# EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED= # enable Rust-owned MCP session metadata/lifecycle increment +# EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED= # enable Rust-owned resumable event-store backend +# EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED= # enable Rust-owned public GET /mcp replay/resume path +# EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED= # enable Rust-owned public GET /mcp live SSE path +# EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED= # enable Rust-owned session-affinity forwarding path +# EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED= # enable Rust-owned session-bound auth-context reuse +# EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED= # launcher env, not a Pydantic setting +# ENABLE_RUST_MCP_RMCP_BUILD= # container build arg override for rmcp-enabled Rust MCP binary +# MCP_RUST_USE_RMCP_UPSTREAM_CLIENT= # runtime override for official rust-sdk upstream tools/call client +# MCP_RUST_LISTEN_HTTP=127.0.0.1:8787 # runtime env for bundled Rust sidecar +# MCP_RUST_LISTEN_UDS=/tmp/contextforge-mcp-rust.sock +# MCP_RUST_SESSION_CORE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED +# MCP_RUST_SESSION_TTL_SECONDS=3600 +# MCP_RUST_EVENT_STORE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED +# MCP_RUST_RESUME_CORE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED +# MCP_RUST_LIVE_STREAM_CORE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED +# MCP_RUST_AFFINITY_CORE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED +# MCP_RUST_SESSION_AUTH_REUSE_ENABLED= # explicit sidecar env; defaults from EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED +# MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS=30 +# MCP_RUST_EVENT_STORE_MAX_EVENTS_PER_STREAM=100 +# MCP_RUST_EVENT_STORE_TTL_SECONDS=3600 +# MCP_RUST_EVENT_STORE_POLL_INTERVAL_MS=250 +# MCP_RUST_LOG= # advanced runtime log override for the bundled Rust sidecar +# MCP_RUST_BACKEND_RPC_URL=http://127.0.0.1:4444/_internal/mcp/rpc +# MCP_RUST_REDIS_URL=redis://redis:6379/0 +# MCP_RUST_CACHE_PREFIX=mcpgw: +# MCP_RUST_DATABASE_URL=postgresql://postgres:mysecretpassword@pgbouncer:6432/mcp +# MCP_RUST_DB_POOL_MAX_SIZE=20 + # ============================================================================= # Performance Tuning (quick reference) # ============================================================================= @@ -1773,16 +1817,20 @@ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # ============================================================================= # Caches authentication data (user, team, revocation) to reduce database queries # Uses Redis when available, falls back to in-memory cache +# Applies to both Python MCP and Rust MCP because public MCP auth still runs in Python first # Enable Redis/in-memory caching for authentication data (default: true) # Significantly reduces database queries during authentication +# Disabling this also disables the shared auth cache benefit for RUST_MCP_MODE=edge/full # AUTH_CACHE_ENABLED=true # TTL in seconds for cached user data (default: 60, range: 10-300) +# Also affects MCP Streamable HTTP auth, including Rust-fronted MCP requests # AUTH_CACHE_USER_TTL=60 # TTL in seconds for token revocation cache (default: 30, range: 5-120) # Security-critical: keep short to limit exposure window for revoked tokens +# Also affects MCP auth on both Python and Rust runtime modes # AUTH_CACHE_REVOCATION_TTL=30 # TTL in seconds for team membership cache (default: 60, range: 10-300) @@ -1794,6 +1842,7 @@ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Enable caching for get_user_teams() (default: true) # Set to false to disable teams list caching (useful for debugging) +# Also affects session-token MCP auth on Python and Rust modes # AUTH_CACHE_TEAMS_ENABLED=true # TTL in seconds for user teams list cache (default: 60, range: 10-300) @@ -1802,6 +1851,7 @@ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # Batch auth DB queries into single call (default: true) # Reduces 3 separate queries to 1, improving performance under load +# Streamable HTTP MCP auth uses this too before falling back to per-query checks # AUTH_CACHE_BATCH_QUERIES=true # Registry Cache Configuration diff --git a/.gitignore b/.gitignore index 434720af3d..8c2e75f5ff 100644 --- a/.gitignore +++ b/.gitignore @@ -298,6 +298,7 @@ TODO.md FIXMEs # Upgrade validation outputs +artifacts/ artifacts/upgrade-validation*/ # Debug & profiling artifacts @@ -354,6 +355,12 @@ docs/docs/test/license-check-report.json nginx.conf docker-compose.perf.yml +# Rust MCP runtime profiling artifacts +tools_rust/mcp_runtime/profiles/ +tools_rust/mcp_runtime/flamegraph*.svg +tools_rust/mcp_runtime/flamegraph*.html +tools_rust/mcp_runtime/perf.data* + # JMeter test results and local installation tests/jmeter/results/*.jtl tests/jmeter/results/*/ diff --git a/Containerfile.lite b/Containerfile.lite index f7270a963e..9b5dfc39d8 100644 --- a/Containerfile.lite +++ b/Containerfile.lite @@ -20,6 +20,7 @@ # Python major.minor series to track ARG PYTHON_VERSION=3.12 ARG ENABLE_RUST=false +ARG ENABLE_RUST_MCP_RMCP=false # Enable profiling tools (memray, py-spy) - off by default for smaller images # To enable: docker build --build-arg ENABLE_PROFILING=true -f Containerfile.lite . # Usage after enabling: @@ -36,6 +37,7 @@ ARG ENABLE_PROFILING=false ############################################################################### FROM quay.io/pypa/manylinux2014:2026.03.06-3 AS rust-builder-base ARG ENABLE_RUST +ARG ENABLE_RUST_MCP_RMCP # Set shell with pipefail for safety SHELL ["/bin/bash", "-o", "pipefail", "-c"] @@ -43,7 +45,11 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Only build if ENABLE_RUST=true RUN if [ "$ENABLE_RUST" != "true" ]; then \ echo "⏭️ Rust builds disabled (set --build-arg ENABLE_RUST=true to enable)"; \ - mkdir -p /build/rust-wheels; \ + mkdir -p /build/rust-wheels /build/tools_rust/mcp_runtime/target/release; \ + printf '#!/usr/bin/env sh\n' > /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ + printf 'echo "Rust MCP runtime not built into this image. Rebuild with --build-arg ENABLE_RUST=true." >&2\n' >> /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ + printf 'exit 1\n' >> /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ + chmod +x /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ exit 0; \ fi @@ -55,8 +61,9 @@ ENV PATH="/root/.cargo/bin:$PATH" WORKDIR /build -# Copy only Rust plugin files (only if ENABLE_RUST=true) +# Copy only Rust plugin/runtime files (only if ENABLE_RUST=true) COPY plugins_rust/ /build/plugins_rust/ +COPY tools_rust/mcp_runtime/ /build/tools_rust/mcp_runtime/ # Build each Rust plugin independently using Python 3.12 from manylinux image RUN if [ "$ENABLE_RUST" = "true" ]; then \ @@ -74,6 +81,21 @@ RUN if [ "$ENABLE_RUST" = "true" ]; then \ echo "⏭️ Skipping Rust plugin build"; \ fi +WORKDIR /build/tools_rust/mcp_runtime + +# Build the experimental Rust MCP runtime binary (only if ENABLE_RUST=true) +RUN if [ "$ENABLE_RUST" = "true" ]; then \ + if [ "$ENABLE_RUST_MCP_RMCP" = "true" ]; then \ + cargo build --release --features rmcp-upstream-client; \ + else \ + cargo build --release; \ + fi && \ + cp target/release/contextforge_mcp_runtime target/release/contextforge-mcp-runtime && \ + echo "✅ Rust MCP runtime built successfully"; \ + else \ + echo "⏭️ Skipping Rust MCP runtime build"; \ + fi + FROM rust-builder-base AS rust-builder ########################### @@ -127,6 +149,7 @@ COPY pyproject.toml /app/ # Copy Rust plugin wheels from rust-builder stage (if any exist) # ---------------------------------------------------------------------------- COPY --from=rust-builder /build/rust-wheels/ /tmp/rust-wheels/ +COPY --from=rust-builder /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime /app/bin/contextforge-mcp-runtime # ---------------------------------------------------------------------------- # Create and populate virtual environment @@ -139,6 +162,7 @@ COPY --from=rust-builder /build/rust-wheels/ /tmp/rust-wheels/ # - Remove build caches and build artifacts # ---------------------------------------------------------------------------- ARG ENABLE_RUST=false +ARG ENABLE_RUST_MCP_RMCP=false ARG ENABLE_PROFILING=false RUN set -euo pipefail \ && . /etc/profile.d/use-openssl.sh \ @@ -219,6 +243,8 @@ RUN chown -R 1001:0 /app \ FROM registry.access.redhat.com/ubi10/ubi-minimal:10.1-1772441549 AS runtime ARG PYTHON_VERSION=3.12 +ARG ENABLE_RUST=false +ARG ENABLE_RUST_MCP_RMCP=false ARG ENABLE_PROFILING=false # ---------------------------------------------------------------------------- @@ -285,6 +311,8 @@ COPY --from=builder --chown=1001:0 /app /app # - Disable pip version check to reduce startup time # ---------------------------------------------------------------------------- ENV PATH="/app/.venv/bin:${PATH}" \ + CONTEXTFORGE_ENABLE_RUST_BUILD=${ENABLE_RUST} \ + CONTEXTFORGE_ENABLE_RUST_MCP_RMCP_BUILD=${ENABLE_RUST_MCP_RMCP} \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PYTHONHASHSEED=random \ diff --git a/Makefile b/Makefile index d46436d5a3..9f80881ee7 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,10 @@ SHELL := /bin/bash # Rust build configuration (set to 1 to enable Rust builds, 0 to disable) # Default is disabled to avoid requiring Rust toolchain for standard builds ENABLE_RUST_BUILD ?= 0 +ENABLE_RUST_MCP_RMCP_BUILD ?= +RUST_MCP_BUILD ?= 0 +RUST_MCP_MODE ?= off +RUST_MCP_LOG ?= warn # Project variables PROJECT_NAME = mcpgateway @@ -646,13 +650,14 @@ clean: # help: query-log-analyze - Analyze query log for N+1 patterns and slow queries # help: query-log-clear - Clear database query log files -.PHONY: smoketest test-mcp-cli test-mcp-rbac test test-verbose test-profile coverage test-docs pytest-examples test-curl htmlcov doctest doctest-verbose doctest-coverage doctest-check test-db-perf test-db-perf-verbose 2025-11-25 2025-11-25-core 2025-11-25-tasks 2025-11-25-auth 2025-11-25-report dev-query-log query-log-tail query-log-analyze query-log-clear load-test load-test-ui load-test-light load-test-heavy load-test-sustained load-test-stress load-test-report load-test-compose load-test-timeserver load-test-fasttime load-test-1000 load-test-summary load-test-baseline load-test-baseline-ui load-test-baseline-stress load-test-agentgateway-mcp-server-time +.PHONY: smoketest test-mcp-cli test-mcp-rbac test-mcp-plugin-parity test-mcp-access-matrix test-mcp-session-isolation test-mcp-session-isolation-load test test-verbose test-profile coverage test-docs pytest-examples test-curl htmlcov doctest doctest-verbose doctest-coverage doctest-check test-db-perf test-db-perf-verbose 2025-11-25 2025-11-25-core 2025-11-25-tasks 2025-11-25-auth 2025-11-25-report dev-query-log query-log-tail query-log-analyze query-log-clear load-test load-test-ui load-test-light load-test-heavy load-test-sustained load-test-stress load-test-report load-test-compose load-test-timeserver load-test-fasttime load-test-1000 load-test-summary load-test-baseline load-test-baseline-ui load-test-baseline-stress load-test-agentgateway-mcp-server-time # Dirs/files always excluded from standard pytest runs PYTEST_IGNORE := tests/fuzz tests/manual test.py \ tests/e2e/test_entra_id_integration.py \ tests/e2e/test_mcp_cli_protocol.py \ - tests/e2e/test_mcp_rbac_transport.py + tests/e2e/test_mcp_rbac_transport.py \ + tests/e2e_rust # Expand to --ignore= flags for pytest CLI PYTEST_IGNORE_FLAGS := $(foreach p,$(PYTEST_IGNORE),--ignore=$(p)) @@ -683,6 +688,51 @@ test-mcp-rbac: ## RBAC + multi-transport MCP protocol tests (needs live gateway || { echo "❌ MCP RBAC transport tests failed!"; exit 1; }; \ echo "✅ MCP RBAC transport tests passed!"' +test-mcp-access-matrix: ## Detailed Rust MCP role/access matrix test with strong tool/resource/prompt sentinels + @echo "🧪 Running MCP role/access matrix tests against $${MCP_CLI_BASE_URL:-http://localhost:8080}..." + @echo " Requires: docker-compose stack rebuilt in Rust edge/full mode" + @/bin/bash -c 'source $(VENV_DIR)/bin/activate && \ + uv run --active pytest tests/e2e_rust/test_mcp_access_matrix.py -v -s --tb=short \ + || { echo "❌ MCP role/access matrix tests failed!"; exit 1; }; \ + echo "✅ MCP role/access matrix tests passed!"' + +test-mcp-plugin-parity: ## MCP plugin parity E2E for current Python or Rust stack using a test-specific plugin config + @echo "🧪 Running MCP plugin parity tests against $${MCP_CLI_BASE_URL:-http://localhost:8080}..." + @echo " Requires: stack started with PLUGINS_CONFIG_FILE=plugins/plugin_parity_config.yaml" + @/bin/bash -c 'source $(VENV_DIR)/bin/activate && \ + uv run --active pytest tests/e2e/test_mcp_plugin_parity.py -v -s --tb=short \ + || { echo "❌ MCP plugin parity tests failed!"; exit 1; }; \ + echo "✅ MCP plugin parity tests passed!"' + +test-mcp-session-isolation: ## MCP session/auth isolation tests for the Rust public transport path + @echo "🧪 Running MCP session/auth isolation tests against $${MCP_CLI_BASE_URL:-http://localhost:8080}..." + @echo " Requires: docker-compose stack rebuilt in Rust edge/full mode" + @/bin/bash -c 'source $(VENV_DIR)/bin/activate && \ + uv run --active pytest tests/e2e_rust/test_mcp_session_isolation.py -v -s --tb=short \ + || { echo "❌ MCP session/auth isolation tests failed!"; exit 1; }; \ + echo "✅ MCP session/auth isolation tests passed!"' + +MCP_ISOLATION_LOCUSTFILE ?= tests/loadtest/locustfile_mcp_isolation.py +MCP_ISOLATION_LOAD_HOST ?= http://localhost:8080 +MCP_ISOLATION_LOAD_USERS ?= 12 +MCP_ISOLATION_LOAD_SPAWN_RATE ?= 3 +MCP_ISOLATION_LOAD_RUN_TIME ?= 60s + +test-mcp-session-isolation-load: ## Multi-user MCP session/auth isolation correctness load test + @echo "🧪 Running MCP session/auth isolation load test against $(MCP_ISOLATION_LOAD_HOST)..." + @echo " Requires: docker-compose stack rebuilt in Rust full mode" + @test -d "$(VENV_DIR)" || $(MAKE) venv + @/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate && \ + locust -f $(MCP_ISOLATION_LOCUSTFILE) \ + --host=$(MCP_ISOLATION_LOAD_HOST) \ + --users=$(MCP_ISOLATION_LOAD_USERS) \ + --spawn-rate=$(MCP_ISOLATION_LOAD_SPAWN_RATE) \ + --run-time=$(MCP_ISOLATION_LOAD_RUN_TIME) \ + --headless \ + --stop-timeout=30 \ + --exit-code-on-error=1 \ + --only-summary' + test: @echo "🧪 Running tests..." @test -d "$(VENV_DIR)" || $(MAKE) venv @@ -1289,6 +1339,39 @@ testing-up: ## Start testing stack (Locust + A2A @echo " Next:" @echo " • Open Locust: http://localhost:8089 (default host is http://nginx:80)" +.PHONY: testing-up-rust +testing-up-rust: ## Start testing stack with RUST_MCP_MODE=edge + @RUST_MCP_MODE=edge RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + +.PHONY: testing-up-rust-shadow +testing-up-rust-shadow: ## Start testing stack with RUST_MCP_MODE=shadow + @RUST_MCP_MODE=shadow RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + +.PHONY: testing-up-rust-full +testing-up-rust-full: ## Start testing stack with RUST_MCP_MODE=full + @RUST_MCP_MODE=full RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + +.PHONY: testing-rebuild-rust +testing-rebuild-rust: ## Rebuild Rust image with no cache, then start testing stack in edge mode + @$(MAKE) testing-down + @$(MAKE) compose-clean + @$(MAKE) docker-prod-rust-no-cache + @RUST_MCP_MODE=edge RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + +.PHONY: testing-rebuild-rust-shadow +testing-rebuild-rust-shadow: ## Rebuild Rust image with no cache, then start testing stack in shadow mode + @$(MAKE) testing-down + @$(MAKE) compose-clean + @$(MAKE) docker-prod-rust-no-cache + @RUST_MCP_MODE=shadow RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + +.PHONY: testing-rebuild-rust-full +testing-rebuild-rust-full: ## Rebuild Rust image with no cache, then start testing stack in full mode + @$(MAKE) testing-down + @$(MAKE) compose-clean + @$(MAKE) docker-prod-rust-no-cache + @RUST_MCP_MODE=full RUST_MCP_LOG=$(RUST_MCP_LOG) $(MAKE) testing-up + .PHONY: testing-down testing-down: ## Stop testing stack @echo "🧪 Stopping testing stack..." @@ -2238,6 +2321,19 @@ load-test-agentgateway-mcp-server-time: ## Load test external MCP server (loc MCP_PROTOCOL_LOCUSTFILE ?= tests/loadtest/locustfile_mcp_protocol.py MCP_PROTOCOL_HOST ?= http://localhost:4444 +MCP_BENCHMARK_HOST ?= http://localhost:8080 +MCP_BENCHMARK_SERVER_ID ?= 9779b6698cbd4b4995ee04a4fab38737 +MCP_BENCHMARK_USERS ?= 125 +MCP_BENCHMARK_SPAWN_RATE ?= 30 +MCP_BENCHMARK_RUN_TIME ?= 60s +MCP_BENCHMARK_HIGH_USERS ?= 300 +MCP_BENCHMARK_HIGH_SPAWN_RATE ?= 50 +MCP_BENCHMARK_HIGH_RUN_TIME ?= 60s +MCP_BENCHMARK_WORKERS ?= 4 +MCP_BENCHMARK_MIXED_MASTER_PORT ?= 5567 +MCP_BENCHMARK_TOOLS_MASTER_PORT ?= 5569 +MCP_BENCHMARK_LOCUST_LOG_LEVEL ?= ERROR +MCP_BENCHMARK_WORKER_LOG_DIR ?= reports/mcp_benchmark_workers load-test-mcp-protocol: ## MCP Streamable HTTP protocol test (150 users, 2min) @echo "🔬 Running MCP STREAMABLE HTTP protocol load test..." @@ -2274,6 +2370,119 @@ load-test-mcp-protocol-ui: ## MCP Streamable HTTP protocol test --run-time=120s \ --class-picker' +# help: benchmark-mcp-mixed - Quick mixed MCP benchmark against the testing stack +# help: benchmark-mcp-tools - Quick tools-only MCP benchmark against the testing stack +# help: benchmark-mcp-mixed-300 - Distributed 300-user mixed MCP benchmark +# help: benchmark-mcp-tools-300 - Distributed 300-user tools-only MCP benchmark + +.PHONY: benchmark-mcp-mixed +benchmark-mcp-mixed: ## Quick mixed MCP benchmark against the testing stack + @echo "📊 Running mixed MCP benchmark..." + @echo " Host: $(MCP_BENCHMARK_HOST)" + @echo " Server: $(MCP_BENCHMARK_SERVER_ID)" + @echo " Users: $(MCP_BENCHMARK_USERS), Spawn: $(MCP_BENCHMARK_SPAWN_RATE)/s, Duration: $(MCP_BENCHMARK_RUN_TIME)" + @test -d "$(VENV_DIR)" || $(MAKE) venv + @/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate && \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --host=$(MCP_BENCHMARK_HOST) \ + --users=$(MCP_BENCHMARK_USERS) \ + --spawn-rate=$(MCP_BENCHMARK_SPAWN_RATE) \ + --run-time=$(MCP_BENCHMARK_RUN_TIME) \ + --headless \ + --only-summary' + +.PHONY: benchmark-mcp-tools +benchmark-mcp-tools: ## Quick tools-only MCP benchmark against the testing stack + @echo "📊 Running tools-only MCP benchmark..." + @echo " Host: $(MCP_BENCHMARK_HOST)" + @echo " Server: $(MCP_BENCHMARK_SERVER_ID)" + @echo " Users: $(MCP_BENCHMARK_USERS), Spawn: $(MCP_BENCHMARK_SPAWN_RATE)/s, Duration: $(MCP_BENCHMARK_RUN_TIME)" + @test -d "$(VENV_DIR)" || $(MAKE) venv + @/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate && \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --host=$(MCP_BENCHMARK_HOST) \ + --users=$(MCP_BENCHMARK_USERS) \ + --spawn-rate=$(MCP_BENCHMARK_SPAWN_RATE) \ + --run-time=$(MCP_BENCHMARK_RUN_TIME) \ + --headless \ + --only-summary \ + MCPToolCallerUser' + +.PHONY: benchmark-mcp-mixed-300 +benchmark-mcp-mixed-300: ## Distributed 300-user mixed MCP benchmark + @echo "📊 Running distributed mixed MCP benchmark..." + @echo " Host: $(MCP_BENCHMARK_HOST)" + @echo " Server: $(MCP_BENCHMARK_SERVER_ID)" + @echo " Users: $(MCP_BENCHMARK_HIGH_USERS), Spawn: $(MCP_BENCHMARK_HIGH_SPAWN_RATE)/s, Duration: $(MCP_BENCHMARK_HIGH_RUN_TIME), Workers: $(MCP_BENCHMARK_WORKERS)" + @test -d "$(VENV_DIR)" || $(MAKE) venv + @mkdir -p $(MCP_BENCHMARK_WORKER_LOG_DIR) + @/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate; \ + pids=""; \ + cleanup() { \ + for pid in $$pids; do kill $$pid 2>/dev/null || true; done; \ + wait $$pids 2>/dev/null || true; \ + }; \ + trap cleanup EXIT INT TERM; \ + for i in $$(seq 1 $(MCP_BENCHMARK_WORKERS)); do \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --worker \ + --master-host=127.0.0.1 \ + --master-port=$(MCP_BENCHMARK_MIXED_MASTER_PORT) \ + > $(MCP_BENCHMARK_WORKER_LOG_DIR)/mixed_worker_$$i.log 2>&1 & \ + pids="$$pids $$!"; \ + done; \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --host=$(MCP_BENCHMARK_HOST) \ + --master \ + --headless \ + --expect-workers=$(MCP_BENCHMARK_WORKERS) \ + --master-bind-port=$(MCP_BENCHMARK_MIXED_MASTER_PORT) \ + --users=$(MCP_BENCHMARK_HIGH_USERS) \ + --spawn-rate=$(MCP_BENCHMARK_HIGH_SPAWN_RATE) \ + --run-time=$(MCP_BENCHMARK_HIGH_RUN_TIME) \ + --only-summary' + +.PHONY: benchmark-mcp-tools-300 +benchmark-mcp-tools-300: ## Distributed 300-user tools-only MCP benchmark + @echo "📊 Running distributed tools-only MCP benchmark..." + @echo " Host: $(MCP_BENCHMARK_HOST)" + @echo " Server: $(MCP_BENCHMARK_SERVER_ID)" + @echo " Users: $(MCP_BENCHMARK_HIGH_USERS), Spawn: $(MCP_BENCHMARK_HIGH_SPAWN_RATE)/s, Duration: $(MCP_BENCHMARK_HIGH_RUN_TIME), Workers: $(MCP_BENCHMARK_WORKERS)" + @test -d "$(VENV_DIR)" || $(MAKE) venv + @mkdir -p $(MCP_BENCHMARK_WORKER_LOG_DIR) + @/bin/bash -eu -o pipefail -c 'source $(VENV_DIR)/bin/activate; \ + pids=""; \ + cleanup() { \ + for pid in $$pids; do kill $$pid 2>/dev/null || true; done; \ + wait $$pids 2>/dev/null || true; \ + }; \ + trap cleanup EXIT INT TERM; \ + for i in $$(seq 1 $(MCP_BENCHMARK_WORKERS)); do \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --worker \ + --master-host=127.0.0.1 \ + --master-port=$(MCP_BENCHMARK_TOOLS_MASTER_PORT) \ + > $(MCP_BENCHMARK_WORKER_LOG_DIR)/tools_worker_$$i.log 2>&1 & \ + pids="$$pids $$!"; \ + done; \ + LOCUST_LOG_LEVEL=$(MCP_BENCHMARK_LOCUST_LOG_LEVEL) MCP_SERVER_ID=$(MCP_BENCHMARK_SERVER_ID) \ + locust -f $(MCP_PROTOCOL_LOCUSTFILE) \ + --host=$(MCP_BENCHMARK_HOST) \ + --master \ + --headless \ + --expect-workers=$(MCP_BENCHMARK_WORKERS) \ + --master-bind-port=$(MCP_BENCHMARK_TOOLS_MASTER_PORT) \ + --users=$(MCP_BENCHMARK_HIGH_USERS) \ + --spawn-rate=$(MCP_BENCHMARK_HIGH_SPAWN_RATE) \ + --run-time=$(MCP_BENCHMARK_HIGH_RUN_TIME) \ + --only-summary \ + MCPToolCallerUser' + load-test-mcp-protocol-heavy: ## MCP Streamable HTTP protocol heavy test (500 users, 5min) @echo "🔬 Running MCP STREAMABLE HTTP protocol HEAVY load test..." @echo " Host: $(MCP_PROTOCOL_HOST)" @@ -4605,13 +4814,26 @@ PLATFORM ?= linux/$(shell uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') container-build: @echo "🔨 Building with $(CONTAINER_RUNTIME) for platform $(PLATFORM)..." - @RUST_ARG=""; PROFILING_ARG=""; \ - if [ "$(ENABLE_RUST_BUILD)" = "1" ]; then \ + @RUST_BUILD_VALUE="$(ENABLE_RUST_BUILD)"; RMCP_BUILD_VALUE="$(ENABLE_RUST_MCP_RMCP_BUILD)"; RUST_ARG=""; RMCP_ARG=""; PROFILING_ARG=""; \ + if [ "$(RUST_MCP_BUILD)" = "1" ] || [ "$(RUST_MCP_BUILD)" = "true" ]; then \ + RUST_BUILD_VALUE="1"; \ + if [ -z "$$RMCP_BUILD_VALUE" ] || [ "$$RMCP_BUILD_VALUE" = "0" ] || [ "$$RMCP_BUILD_VALUE" = "false" ]; then \ + RMCP_BUILD_VALUE="1"; \ + fi; \ + fi; \ + if [ "$$RUST_BUILD_VALUE" = "1" ] || [ "$$RUST_BUILD_VALUE" = "true" ]; then \ echo "🦀 Building container WITH Rust plugins..."; \ RUST_ARG="--build-arg ENABLE_RUST=true"; \ + if [ "$$RMCP_BUILD_VALUE" = "1" ] || [ "$$RMCP_BUILD_VALUE" = "true" ]; then \ + echo "🦀 Enabling rmcp support in the Rust MCP runtime..."; \ + RMCP_ARG="--build-arg ENABLE_RUST_MCP_RMCP=true"; \ + else \ + RMCP_ARG="--build-arg ENABLE_RUST_MCP_RMCP=false"; \ + fi; \ else \ - echo "⏭️ Building container WITHOUT Rust plugins (set ENABLE_RUST_BUILD=1 to enable)"; \ + echo "⏭️ Building container WITHOUT Rust plugins (set RUST_MCP_BUILD=1 or ENABLE_RUST_BUILD=1 to enable)"; \ RUST_ARG="--build-arg ENABLE_RUST=false"; \ + RMCP_ARG="--build-arg ENABLE_RUST_MCP_RMCP=false"; \ fi; \ if [ "$(ENABLE_PROFILING_BUILD)" = "1" ]; then \ echo "📊 Building container WITH profiling tools (memray)..."; \ @@ -4623,7 +4845,9 @@ container-build: --platform=$(PLATFORM) \ -f $(CONTAINER_FILE) \ $$RUST_ARG \ + $$RMCP_ARG \ $$PROFILING_ARG \ + $(DOCKER_BUILD_ARGS) \ --tag $(IMAGE_BASE):$(IMAGE_TAG) \ . @echo "✅ Built image: $(call get_image_name)" @@ -5123,6 +5347,12 @@ docker: docker-prod: @DOCKER_CONTENT_TRUST=1 $(MAKE) container-build CONTAINER_RUNTIME=docker CONTAINER_FILE=Containerfile.lite +docker-prod-rust: + @DOCKER_CONTENT_TRUST=1 $(MAKE) container-build CONTAINER_RUNTIME=docker CONTAINER_FILE=Containerfile.lite RUST_MCP_BUILD=1 + +docker-prod-rust-no-cache: + @DOCKER_CONTENT_TRUST=1 $(MAKE) container-build CONTAINER_RUNTIME=docker CONTAINER_FILE=Containerfile.lite RUST_MCP_BUILD=1 DOCKER_BUILD_ARGS="--no-cache" + # Build production image with profiling tools (memray) for performance debugging # Usage: make docker-prod-profiling # Then run with SYS_PTRACE capability: @@ -5763,7 +5993,7 @@ MINIKUBE_ADDONS ?= ingress ingress-dns metrics-server dashboard registry regist # mcpgateway/mcpgateway:latest. Override with IMAGE= to use a # remote registry (e.g. ghcr.io/ibm/mcp-context-forge:v0.9.0). TAG ?= latest # override with TAG= -IMAGE ?= $(IMG):$(TAG) # or IMAGE=ghcr.io/ibm/mcp-context-forge:$(TAG) +IMAGE ?= $(IMAGE_LOCAL) # or IMAGE=ghcr.io/ibm/mcp-context-forge:$(TAG) # ----------------------------------------------------------------------------- # 🆘 HELP TARGETS (parsed by `make help`) @@ -5854,7 +6084,7 @@ minikube-dashboard: .PHONY: minikube-context minikube-context: @echo "🎯 Switching kubectl context to Minikube ..." - kubectl config use-context minikube + kubectl config use-context $(MINIKUBE_PROFILE) .PHONY: minikube-ssh minikube-ssh: @@ -7730,7 +7960,7 @@ migration-test-all: migration-setup ## Run comprehensive migration test s @echo "📋 Testing PostgreSQL migrations..." @/bin/bash -c "source $(VENV_DIR)/bin/activate && \ pytest $(MIGRATION_TEST_DIR)/test_compose_postgres_migrations.py \ - -v --tb=short --maxfail=3 -m 'not slow' \ + -v --tb=short --maxfail=3 \ --log-cli-level=INFO --log-cli-format='%(asctime)s [%(levelname)s] %(name)s: %(message)s'" @echo "" @echo "📊 Generating migration test report..." @@ -7752,7 +7982,7 @@ migration-test-postgres: ## Run PostgreSQL compose migration t @test -d "$(VENV_DIR)" || $(MAKE) venv @/bin/bash -c "source $(VENV_DIR)/bin/activate && \ pytest $(MIGRATION_TEST_DIR)/test_compose_postgres_migrations.py \ - -v --tb=short --log-cli-level=INFO -m 'not slow'" + -v --tb=short --log-cli-level=INFO" @echo "✅ PostgreSQL migration tests complete!" migration-test-performance: ## Run migration performance benchmarking @@ -7864,10 +8094,14 @@ upgrade-validate: ## Validate fresh + upgrade DB startup # help: rust-build-all-platforms - Build for all platforms (Linux, macOS, Windows) # help: rust-cross - Install targets + build all Linux (convenience) # help: rust-cross-install-build - Install targets + build all platforms (one command) +# help: rust-mcp-runtime-build - Build the experimental Rust MCP runtime +# help: rust-mcp-runtime-test - Run tests for the experimental Rust MCP runtime +# help: rust-mcp-runtime-run - Run the experimental Rust MCP runtime against local gateway /rpc .PHONY: rust-build rust-dev rust-test rust-test-integration rust-python-test rust-test-all rust-bench rust-bench-compare rust-compare rust-check rust-clean rust-verify rust-verify-stubs .PHONY: rust-ensure-deps rust-install-deps rust-install-targets rust-install .PHONY: rust-build-all-linux rust-build-all-platforms rust-cross rust-cross-install-build +.PHONY: rust-mcp-runtime-build rust-mcp-runtime-test rust-mcp-runtime-run rust-ensure-deps: ## Ensure Rust toolchain, maturin, and all plugins are installed @if ! command -v rustup > /dev/null 2>&1; then \ @@ -7994,6 +8228,18 @@ rust-cross: rust-install-targets rust-build-all-linux ## Install targets + buil rust-cross-install-build: rust-install-deps rust-install-targets rust-build-all-platforms ## Install targets + build all platforms (one command) @echo "✅ Full cross-compilation setup and build complete" +rust-mcp-runtime-build: ## Build the experimental Rust MCP runtime + @echo "🦀 Building experimental Rust MCP runtime..." + @cd tools_rust/mcp_runtime && cargo build --release + +rust-mcp-runtime-test: ## Run tests for the experimental Rust MCP runtime + @echo "🧪 Running Rust MCP runtime tests..." + @cd tools_rust/mcp_runtime && cargo test --release + +rust-mcp-runtime-run: ## Run the experimental Rust MCP runtime against local gateway /rpc + @echo "🚀 Starting Rust MCP runtime on http://127.0.0.1:8787 with backend http://127.0.0.1:4444/rpc" + @cd tools_rust/mcp_runtime && cargo run --release -- --backend-rpc-url http://127.0.0.1:4444/rpc --listen-http 127.0.0.1:8787 + .PHONY: conc-02-gateways conc-02-gateways: ## Run CONC-02 gateways read-during-write check (manual env/token setup required) @/bin/bash tests/manual/concurrency/run_conc_02_gateways.sh diff --git a/charts/mcp-stack/templates/configmap-nginx-proxy.yaml b/charts/mcp-stack/templates/configmap-nginx-proxy.yaml index d4bc1da10b..35cdb248d4 100644 --- a/charts/mcp-stack/templates/configmap-nginx-proxy.yaml +++ b/charts/mcp-stack/templates/configmap-nginx-proxy.yaml @@ -56,6 +56,28 @@ data: allow all; } + location ~ ^(/mcp/?|/servers/.*/mcp/?)$ { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $forwarded_proto; + proxy_set_header X-Forwarded-Host $http_host; + proxy_set_header Connection ""; + + proxy_request_buffering off; + proxy_buffering off; + proxy_cache off; + add_header X-Accel-Buffering "no" always; + + proxy_connect_timeout 1h; + proxy_read_timeout 1h; + proxy_send_timeout 1h; + send_timeout {{ .Values.nginxProxy.config.sendTimeout }}; + + proxy_pass http://gateway_upstream; + } + location / { proxy_http_version 1.1; proxy_set_header Host $http_host; diff --git a/charts/mcp-stack/templates/registration-jobs.yaml b/charts/mcp-stack/templates/registration-jobs.yaml index c4a8f2cb8d..44e6fc9212 100644 --- a/charts/mcp-stack/templates/registration-jobs.yaml +++ b/charts/mcp-stack/templates/registration-jobs.yaml @@ -142,6 +142,8 @@ spec: print("fast_time registration complete") PY + + exit 0 {{- end }} {{- if and .Values.testing.enabled $registrationEnabled .Values.testing.fastTest.register.enabled .Values.testing.fastTestServer.enabled }} @@ -236,6 +238,8 @@ spec: api_request("POST", "/gateways", {"name": gateway_name, "url": gateway_url, "transport": transport}) print("fast_test registration complete") PY + + exit 0 {{- end }} {{- if and .Values.testing.enabled $registrationEnabled .Values.testing.a2a.register.enabled .Values.testing.a2aEchoAgent.enabled }} @@ -344,6 +348,8 @@ spec: api_request("POST", "/a2a", payload) print("a2a echo registration complete") PY + + exit 0 {{- end }} {{- if and .Values.benchmark.enabled .Values.benchmark.register.enabled $registrationEnabled }} @@ -441,4 +447,6 @@ spec: print("benchmark registration complete") PY + + exit 0 {{- end }} diff --git a/charts/mcp-stack/values-minikube.yaml b/charts/mcp-stack/values-minikube.yaml index f784fc1fff..00ebc530d1 100644 --- a/charts/mcp-stack/values-minikube.yaml +++ b/charts/mcp-stack/values-minikube.yaml @@ -58,6 +58,12 @@ mcpContextForge: PASSWORD_REQUIRE_SPECIAL: "false" REQUIRE_STRONG_SECRETS: "false" +migration: + image: + repository: ghcr.io/ibm/mcp-context-forge + tag: "1.0.0-RC-2" + pullPolicy: Never + # Disable TLS on fast-time-server ingress for minikube mcpFastTimeServer: ingress: diff --git a/docker-compose-embedded.yml b/docker-compose-embedded.yml index dd428eb6cd..e549c80868 100644 --- a/docker-compose-embedded.yml +++ b/docker-compose-embedded.yml @@ -45,7 +45,7 @@ services: - ./infra/iframe-test/nginx-embedded.conf:/etc/nginx/nginx.conf:ro gateway: - image: ghcr.io/ibm/mcp-context-forge:1ba8130f7fb82e6f393435be8d064879f234ace1 + image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} environment: # ── Embedded UI ── - MCPGATEWAY_UI_EMBEDDED=true @@ -147,7 +147,7 @@ services: # Benchmark services: inherited from base compose (profiles: ["benchmark"]) # Activated via --profile benchmark in Makefile targets register_benchmark: - image: ghcr.io/ibm/mcp-context-forge:1ba8130f7fb82e6f393435be8d064879f234ace1 + image: ${IMAGE_LOCAL:-mcpgateway/mcpgateway:latest} environment: - JWT_SECRET_KEY=${JWT_SECRET_KEY:-my-test-key} - BENCHMARK_SERVER_COUNT=${BENCHMARK_SERVER_COUNT:-10} diff --git a/docker-compose.yml b/docker-compose.yml index d5cfc2bd47..b5f93cc18c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -117,6 +117,9 @@ services: build: context: . dockerfile: Containerfile.lite # Same one the Makefile builds + args: + ENABLE_RUST: ${ENABLE_RUST_BUILD:-${RUST_MCP_BUILD:-false}} + ENABLE_RUST_MCP_RMCP: ${ENABLE_RUST_MCP_RMCP_BUILD:-${RUST_MCP_BUILD:-false}} restart: unless-stopped # NOTE: When using replicas > 1, access via nginx:8080 instead of direct port 4444 # ports: @@ -139,13 +142,65 @@ services: # Both achieve same RPS when database is the bottleneck. # ═══════════════════════════════════════════════════════════════════════════ # - HTTP_SERVER=granian # Rust-based, native backpressure, +47% memory, -8% CPU - - HTTP_SERVER=gunicorn # Python-based, battle-tested, lower memory usage + - HTTP_SERVER=${HTTP_SERVER:-gunicorn} # Python-based by default; set granian to benchmark the Rust ASGI front door - HOST=0.0.0.0 - PORT=4444 # Domain for CORS/cookies (nginx default at http://localhost:8080) - APP_DOMAIN=${APP_DOMAIN:-http://localhost:8080} # Transport: sse, streamablehttp, http, or all (default: all) - TRANSPORT_TYPE=streamablehttp + # High-level Rust MCP UX: + # RUST_MCP_MODE=off -> Python MCP transport + # RUST_MCP_MODE=shadow -> Rust sidecar is present, but public /mcp stays on Python for safe fallback + # RUST_MCP_MODE=edge -> direct public /mcp on Rust with managed UDS sidecar defaults + # RUST_MCP_MODE=full -> edge + Rust session/event-store/resume/live-stream/affinity cores + # Advanced EXPERIMENTAL_/MCP_RUST_* env vars below still work as explicit overrides. + - RUST_MCP_MODE=${RUST_MCP_MODE:-off} + - RUST_MCP_LOG=${RUST_MCP_LOG:-warn} + # Advanced override: force the session-auth-reuse fast path on/off + # independently of RUST_MCP_MODE. Prefer the mode presets above. + - RUST_MCP_SESSION_AUTH_REUSE=${RUST_MCP_SESSION_AUTH_REUSE:-} + # Experimental Rust MCP runtime internals. Leave unset for the simple + # RUST_MCP_MODE=shadow/edge/full paths; set explicitly only to override. + - EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED=${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED=${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED:-} + - EXPERIMENTAL_RUST_MCP_RUNTIME_URL=${EXPERIMENTAL_RUST_MCP_RUNTIME_URL:-} + - EXPERIMENTAL_RUST_MCP_RUNTIME_UDS=${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS:-} + - EXPERIMENTAL_RUST_MCP_RUNTIME_TIMEOUT_SECONDS=${EXPERIMENTAL_RUST_MCP_RUNTIME_TIMEOUT_SECONDS:-30} + - EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED=${EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED=${EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED=${EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED=${EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED=${EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED:-} + - EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED=${EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED:-} + # Sidecar-level knobs. These default from the mode-derived experimental settings above. + - MCP_RUST_LISTEN_HTTP=${MCP_RUST_LISTEN_HTTP:-} + - MCP_RUST_LISTEN_UDS=${MCP_RUST_LISTEN_UDS:-} + - MCP_RUST_PUBLIC_LISTEN_HTTP=${MCP_RUST_PUBLIC_LISTEN_HTTP:-} + - MCP_RUST_BACKEND_RPC_URL=${MCP_RUST_BACKEND_RPC_URL:-http://127.0.0.1:4444/_internal/mcp/rpc} + - MCP_RUST_REDIS_URL=${MCP_RUST_REDIS_URL:-} + - MCP_RUST_CACHE_PREFIX=${MCP_RUST_CACHE_PREFIX:-mcpgw:} + - MCP_RUST_DATABASE_URL=${MCP_RUST_DATABASE_URL:-} + - MCP_RUST_DB_POOL_MAX_SIZE=${MCP_RUST_DB_POOL_MAX_SIZE:-20} + - MCP_RUST_SESSION_CORE_ENABLED=${MCP_RUST_SESSION_CORE_ENABLED:-} + - MCP_RUST_SESSION_TTL_SECONDS=${MCP_RUST_SESSION_TTL_SECONDS:-3600} + - MCP_RUST_EVENT_STORE_ENABLED=${MCP_RUST_EVENT_STORE_ENABLED:-} + - MCP_RUST_RESUME_CORE_ENABLED=${MCP_RUST_RESUME_CORE_ENABLED:-} + - MCP_RUST_LIVE_STREAM_CORE_ENABLED=${MCP_RUST_LIVE_STREAM_CORE_ENABLED:-} + - MCP_RUST_AFFINITY_CORE_ENABLED=${MCP_RUST_AFFINITY_CORE_ENABLED:-} + - MCP_RUST_SESSION_AUTH_REUSE_ENABLED=${MCP_RUST_SESSION_AUTH_REUSE_ENABLED:-} + - MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS=${MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS:-30} + - MCP_RUST_EVENT_STORE_MAX_EVENTS_PER_STREAM=${MCP_RUST_EVENT_STORE_MAX_EVENTS_PER_STREAM:-100} + - MCP_RUST_EVENT_STORE_TTL_SECONDS=${MCP_RUST_EVENT_STORE_TTL_SECONDS:-3600} + - MCP_RUST_EVENT_STORE_POLL_INTERVAL_MS=${MCP_RUST_EVENT_STORE_POLL_INTERVAL_MS:-250} + - MCP_RUST_LOG=${MCP_RUST_LOG:-} + - MCP_RUST_CLIENT_CONNECT_TIMEOUT_MS=${MCP_RUST_CLIENT_CONNECT_TIMEOUT_MS:-5000} + - MCP_RUST_CLIENT_POOL_IDLE_TIMEOUT_SECONDS=${MCP_RUST_CLIENT_POOL_IDLE_TIMEOUT_SECONDS:-90} + - MCP_RUST_CLIENT_POOL_MAX_IDLE_PER_HOST=${MCP_RUST_CLIENT_POOL_MAX_IDLE_PER_HOST:-1024} + - MCP_RUST_CLIENT_TCP_KEEPALIVE_SECONDS=${MCP_RUST_CLIENT_TCP_KEEPALIVE_SECONDS:-30} + - MCP_RUST_TOOLS_CALL_PLAN_TTL_SECONDS=${MCP_RUST_TOOLS_CALL_PLAN_TTL_SECONDS:-30} + - MCP_RUST_UPSTREAM_SESSION_TTL_SECONDS=${MCP_RUST_UPSTREAM_SESSION_TTL_SECONDS:-300} + - MCP_RUST_USE_RMCP_UPSTREAM_CLIENT=${MCP_RUST_USE_RMCP_UPSTREAM_CLIENT:-} # WebSocket transport features (disabled by default; opt-in only) - MCPGATEWAY_WS_RELAY_ENABLED=${MCPGATEWAY_WS_RELAY_ENABLED:-false} - MCPGATEWAY_REVERSE_PROXY_ENABLED=${MCPGATEWAY_REVERSE_PROXY_ENABLED:-false} @@ -338,6 +393,7 @@ services: # - KEY_FILE_PASSWORD=${KEY_FILE_PASSWORD} # Uncomment to enable plugins - PLUGINS_ENABLED=true + - PLUGINS_CONFIG_FILE=${PLUGINS_CONFIG_FILE:-plugins/config.yaml} # Uncomment to enable catalog - MCPGATEWAY_CATALOG_ENABLED=true - MCPGATEWAY_CATALOG_FILE=/app/mcp-catalog.yml @@ -422,14 +478,14 @@ services: # ═══════════════════════════════════════════════════════════════════════════ # Gunicorn Configuration (used when HTTP_SERVER=gunicorn) # ═══════════════════════════════════════════════════════════════════════════ - - GUNICORN_WORKERS=${GUNICORN_WORKERS:-24} # Worker processes per replica - - GUNICORN_TIMEOUT=120 # Worker timeout in seconds - - GUNICORN_GRACEFUL_TIMEOUT=60 # Grace period for worker shutdown - - GUNICORN_KEEP_ALIVE=30 # Keep-alive timeout (matches SSE keepalive) + - GUNICORN_WORKERS=${GUNICORN_WORKERS:-24} # Worker processes (match CPU cores) + - GUNICORN_TIMEOUT=${GUNICORN_TIMEOUT:-120} # Worker timeout in seconds + - GUNICORN_GRACEFUL_TIMEOUT=${GUNICORN_GRACEFUL_TIMEOUT:-60} # Grace period for worker shutdown + - GUNICORN_KEEP_ALIVE=${GUNICORN_KEEP_ALIVE:-30} # Keep-alive timeout (matches SSE keepalive) # Worker recycling cleans up MCP SDK stuck task groups (anyio#695 workaround) - - GUNICORN_MAX_REQUESTS=1000000 # Recycle workers after 1M requests - - GUNICORN_MAX_REQUESTS_JITTER=100000 # ±100000 jitter prevents thundering herd - - GUNICORN_BACKLOG=4096 # Connection queue depth + - GUNICORN_MAX_REQUESTS=${GUNICORN_MAX_REQUESTS:-1000000} # Recycle workers after 1M requests + - GUNICORN_MAX_REQUESTS_JITTER=${GUNICORN_MAX_REQUESTS_JITTER:-100000} # ±100000 jitter prevents thundering herd + - GUNICORN_BACKLOG=${GUNICORN_BACKLOG:-4096} # Connection queue depth # ═══════════════════════════════════════════════════════════════════════════ # Granian Backpressure Configuration (used when HTTP_SERVER=granian) # ═══════════════════════════════════════════════════════════════════════════ @@ -437,11 +493,11 @@ services: # immediate 503 responses instead of queuing them (which can cause OOM/timeouts). # Total capacity = GRANIAN_WORKERS × GRANIAN_BACKPRESSURE = 16 × 128 = 2048 concurrent # Requests beyond this limit receive immediate 503 (no queuing, no OOM) - - GRANIAN_WORKERS=16 - - GRANIAN_BACKLOG=4096 - - GRANIAN_BACKPRESSURE=128 - - GRANIAN_HTTP1_BUFFER_SIZE=524288 - - GRANIAN_RESPAWN_FAILED=true + - GRANIAN_WORKERS=${GRANIAN_WORKERS:-16} + - GRANIAN_BACKLOG=${GRANIAN_BACKLOG:-4096} + - GRANIAN_BACKPRESSURE=${GRANIAN_BACKPRESSURE:-128} + - GRANIAN_HTTP1_BUFFER_SIZE=${GRANIAN_HTTP1_BUFFER_SIZE:-524288} + - GRANIAN_RESPAWN_FAILED=${GRANIAN_RESPAWN_FAILED:-true} # ─────────────────────────────────────────────────────────────────────── # Granian Worker Lifecycle (recycling to prevent resource leaks) # ─────────────────────────────────────────────────────────────────────── @@ -1340,17 +1396,18 @@ services: echo "Generating JWT token..." echo "Environment: JWT_SECRET_KEY=$$JWT_SECRET_KEY" - echo "Running: python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256" + echo "Running: python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --admin --exp 10080 --secret my-test-key --algo HS256" # Only capture stdout (the token), let warnings go to stderr - export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) + export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --admin --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) echo "Generated token: $$MCPGATEWAY_BEARER_TOKEN" # Decode the token to verify it has expiration echo "Decoding token to verify claims..." python3 -m mcpgateway.utils.create_jwt_token --decode "$$MCPGATEWAY_BEARER_TOKEN" 2>/dev/null || echo "Failed to decode token" - # Test authentication first - echo "Testing authentication..." + # Wait for authenticated gateway readiness. /health goes green before the + # admin/bootstrap path is fully ready for authenticated registration calls. + echo "Waiting for authenticated gateway readiness..." # Use Python to make HTTP requests python3 -c " @@ -1374,27 +1431,38 @@ services: with urllib.request.urlopen(req) as response: return json.loads(response.read().decode('utf-8')) - # Test version endpoint without auth - print('Checking gateway config...') - try: - with urllib.request.urlopen('http://gateway:4444/version') as response: - data = response.read().decode('utf-8') - print(f'Gateway version response (no auth): {data[:200]}') - except Exception as e: - print(f'Version check failed: {e}') - - # Test version endpoint with auth - print('Testing authentication...') - try: - req = urllib.request.Request('http://gateway:4444/version') - req.add_header('Authorization', f'Bearer {token}') - with urllib.request.urlopen(req) as response: - data = response.read().decode('utf-8') - print(f'Auth test response: SUCCESS') - auth_success = True - except Exception as e: - print(f'Auth test response: FAILED - {e}') - auth_success = False + def api_request_with_retry(method, path, data=None, retries=30, delay=2, retry_statuses=(401, 502, 503)): + '''Retry authenticated API requests while gateway workers settle.''' + for attempt in range(1, retries + 1): + try: + return api_request(method, path, data) + except urllib.error.HTTPError as exc: + if exc.code in retry_statuses and attempt < retries: + print(f'Retrying {method} {path} after HTTP {exc.code} ({attempt}/{retries})') + time.sleep(delay) + continue + raise + except Exception: + if attempt < retries: + print(f'Retrying {method} {path} after transient error ({attempt}/{retries})') + time.sleep(delay) + continue + raise + + print('Checking authenticated gateway readiness...') + for i in range(1, 61): + try: + gateways = api_request('GET', '/gateways') + print(f'✅ Authenticated gateway readiness confirmed ({len(gateways)} gateways visible)') + break + except urllib.error.HTTPError as exc: + print(f'Authenticated readiness not ready yet ({i}/60): HTTP {exc.code}') + except Exception as exc: + print(f'Authenticated readiness not ready yet ({i}/60): {exc}') + time.sleep(2) + else: + print('❌ Gateway authenticated readiness check failed') + sys.exit(1) # Register fast_time_server with gateway using Streamable HTTP transport print('Registering fast_time_server with gateway (Streamable HTTP)...') @@ -1402,11 +1470,11 @@ services: # First check if gateway already exists and delete it gateway_id = None try: - gateways = api_request('GET', '/gateways') + gateways = api_request_with_retry('GET', '/gateways') for gw in gateways: if gw.get('name') == 'fast_time': print(f'Found existing gateway {gw[\"id\"]}, deleting...') - api_request('DELETE', f'/gateways/{gw[\"id\"]}') + api_request_with_retry('DELETE', f'/gateways/{gw[\"id\"]}', retry_statuses=(401, 502, 503)) print('Deleted existing gateway') except Exception as e: print(f'Note: Could not check/delete existing gateway: {e}') @@ -1414,14 +1482,14 @@ services: # Delete existing virtual server if present (using fixed ID) VIRTUAL_SERVER_ID = '9779b6698cbd4b4995ee04a4fab38737' try: - api_request('DELETE', f'/servers/{VIRTUAL_SERVER_ID}') + api_request_with_retry('DELETE', f'/servers/{VIRTUAL_SERVER_ID}', retry_statuses=(401, 502, 503)) print(f'Deleted existing virtual server {VIRTUAL_SERVER_ID}') except Exception as e: print(f'Note: No existing virtual server to delete (or error: {e})') # Register the gateway try: - result = api_request('POST', '/gateways', { + result = api_request_with_retry('POST', '/gateways', { 'name': 'fast_time', 'url': 'http://fast_time_server:8080/http', 'transport': 'STREAMABLEHTTP' @@ -1499,7 +1567,7 @@ services: 'associated_prompts': prompt_ids } } - result = api_request('POST', '/servers', server_payload) + result = api_request_with_retry('POST', '/servers', server_payload) print(f'Virtual server created: {result}') print(f'✅ Successfully created virtual server with {len(tool_ids)} tools, {len(resource_ids)} resources, {len(prompt_ids)} prompts') except Exception as e: @@ -1536,7 +1604,7 @@ services: echo "Registering fast_time_server SSE transport with gateway..." # Generate JWT token - export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) + export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --admin --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) python3 -c " import json @@ -1707,7 +1775,7 @@ services: echo "Registering slow_time_server with gateway..." # Generate JWT token - export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) + export MCPGATEWAY_BEARER_TOKEN=$$(python3 -m mcpgateway.utils.create_jwt_token --username admin@example.com --admin --exp 10080 --secret my-test-key --algo HS256 2>/dev/null) python3 -c " import urllib.request @@ -1820,6 +1888,7 @@ services: # Register using Python python3 -c " + import urllib.error import urllib.request import json import os @@ -1837,13 +1906,45 @@ services: with urllib.request.urlopen(req) as response: return json.loads(response.read().decode('utf-8')) + def api_request_with_retry(method, path, data=None, retries=30, delay=2, retry_statuses=(401, 403, 502, 503)): + for attempt in range(1, retries + 1): + try: + return api_request(method, path, data) + except urllib.error.HTTPError as exc: + if exc.code in retry_statuses and attempt < retries: + print(f'Retrying {method} {path} after HTTP {exc.code} ({attempt}/{retries})') + time.sleep(delay) + continue + raise + except Exception: + if attempt < retries: + print(f'Retrying {method} {path} after transient error ({attempt}/{retries})') + time.sleep(delay) + continue + raise + + print('Waiting for authenticated gateway readiness...') + for i in range(1, 61): + try: + gateways = api_request('GET', '/gateways') + print(f'✅ Authenticated gateway readiness confirmed ({len(gateways)} gateways visible)') + break + except urllib.error.HTTPError as exc: + print(f'Authenticated readiness not ready yet ({i}/60): HTTP {exc.code}') + except Exception as exc: + print(f'Authenticated readiness not ready yet ({i}/60): {exc}') + time.sleep(2) + else: + print('❌ Gateway authenticated readiness check failed') + exit(1) + # Delete existing gateway if present try: - gateways = api_request('GET', '/gateways') + gateways = api_request_with_retry('GET', '/gateways') for gw in gateways: if gw.get('name') == 'fast_test': print(f'Deleting existing gateway {gw[\"id\"]}...') - api_request('DELETE', f'/gateways/{gw[\"id\"]}') + api_request_with_retry('DELETE', f'/gateways/{gw[\"id\"]}') except Exception as e: print(f'Note: {e}') @@ -1851,14 +1952,14 @@ services: # Delete existing virtual server if present try: - api_request('DELETE', f'/servers/{VIRTUAL_SERVER_ID}') + api_request_with_retry('DELETE', f'/servers/{VIRTUAL_SERVER_ID}') print(f'Deleted existing virtual server {VIRTUAL_SERVER_ID}') except Exception as e: print(f'Note: No existing virtual server to delete (or error: {e})') # Register the gateway try: - result = api_request('POST', '/gateways', { + result = api_request_with_retry('POST', '/gateways', { 'name': 'fast_test', 'url': 'http://fast_test_server:8880/mcp', 'transport': 'STREAMABLEHTTP' @@ -1874,7 +1975,7 @@ services: for i in range(30): time.sleep(1) try: - tools = api_request('GET', '/tools') + tools = api_request_with_retry('GET', '/tools') fast_test_tools = [t for t in tools if t.get('gatewayId') == gateway_id] if fast_test_tools: print(f'Found {len(fast_test_tools)} tools from fast_test gateway') @@ -1888,7 +1989,7 @@ services: # Collect tool IDs from the fast_test gateway tool_ids = [] try: - tools = api_request('GET', '/tools') + tools = api_request_with_retry('GET', '/tools') tool_ids = [t['id'] for t in tools if t.get('gatewayId') == gateway_id] print(f'Tools: {[t[\"name\"] for t in tools if t.get(\"gatewayId\") == gateway_id]}') except Exception as e: @@ -1907,7 +2008,7 @@ services: 'associated_prompts': [] } } - result = api_request('POST', '/servers', server_payload) + result = api_request_with_retry('POST', '/servers', server_payload) print(f'✅ Virtual server created: {VIRTUAL_SERVER_ID} with {len(tool_ids)} tools') except Exception as e: print(f'❌ Failed to create virtual server: {e}') diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 9dc26c5b53..f7fe94db54 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -1,37 +1,403 @@ #!/usr/bin/env bash -#─────────────────────────────────────────────────────────────────────────────── -# Script : docker-entrypoint.sh -# Purpose: Container entrypoint that allows switching between HTTP servers -# -# Environment Variables: -# HTTP_SERVER : Which HTTP server to use (default: gunicorn) -# - gunicorn : Python-based with Uvicorn workers (default) -# - granian : Rust-based HTTP server (alternative) -# -# Usage: -# # Run with Gunicorn (default) -# docker run -e HTTP_SERVER=gunicorn mcpgateway -# -# # Run with Granian -# docker run -e HTTP_SERVER=granian mcpgateway -#─────────────────────────────────────────────────────────────────────────────── - set -euo pipefail HTTP_SERVER="${HTTP_SERVER:-gunicorn}" +RUST_MCP_MODE="${RUST_MCP_MODE:-off}" +RUST_MCP_LOG="${RUST_MCP_LOG:-warn}" +RUST_MCP_SESSION_AUTH_REUSE="${RUST_MCP_SESSION_AUTH_REUSE:-}" +EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED="${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED="${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED:-}" +EXPERIMENTAL_RUST_MCP_RUNTIME_URL="${EXPERIMENTAL_RUST_MCP_RUNTIME_URL:-}" +EXPERIMENTAL_RUST_MCP_RUNTIME_UDS="${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS:-}" +EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED="${EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED:-}" +EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED="${EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED:-}" +CONTEXTFORGE_ENABLE_RUST_BUILD="${CONTEXTFORGE_ENABLE_RUST_BUILD:-false}" +CONTEXTFORGE_ENABLE_RUST_MCP_RMCP_BUILD="${CONTEXTFORGE_ENABLE_RUST_MCP_RMCP_BUILD:-false}" +MCP_RUST_LISTEN_HTTP="${MCP_RUST_LISTEN_HTTP:-}" +MCP_RUST_LISTEN_UDS="${MCP_RUST_LISTEN_UDS:-}" +MCP_RUST_PUBLIC_LISTEN_HTTP="${MCP_RUST_PUBLIC_LISTEN_HTTP:-}" +MCP_RUST_LOG="${MCP_RUST_LOG:-}" +MCP_RUST_USE_RMCP_UPSTREAM_CLIENT="${MCP_RUST_USE_RMCP_UPSTREAM_CLIENT:-}" +MCP_RUST_SESSION_CORE_ENABLED="${MCP_RUST_SESSION_CORE_ENABLED:-}" +MCP_RUST_EVENT_STORE_ENABLED="${MCP_RUST_EVENT_STORE_ENABLED:-}" +MCP_RUST_RESUME_CORE_ENABLED="${MCP_RUST_RESUME_CORE_ENABLED:-}" +MCP_RUST_LIVE_STREAM_CORE_ENABLED="${MCP_RUST_LIVE_STREAM_CORE_ENABLED:-}" +MCP_RUST_AFFINITY_CORE_ENABLED="${MCP_RUST_AFFINITY_CORE_ENABLED:-}" +MCP_RUST_SESSION_AUTH_REUSE_ENABLED="${MCP_RUST_SESSION_AUTH_REUSE_ENABLED:-}" +MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS="${MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS:-}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "${SCRIPT_DIR}" || { + echo "ERROR: Cannot change to script directory: ${SCRIPT_DIR}" + exit 1 +} + +RUST_MCP_PID="" +SERVER_PID="" + +apply_rust_mcp_mode_defaults() { + local normalized_mode="${RUST_MCP_MODE,,}" + local runtime_enabled_default="false" + local managed_default="true" + local session_core_default="false" + local event_store_default="false" + local resume_core_default="false" + local live_stream_core_default="false" + local affinity_core_default="false" + local session_auth_reuse_default="false" + + case "${normalized_mode}" in + ""|off) + ;; + shadow) + runtime_enabled_default="true" + ;; + edge) + runtime_enabled_default="true" + session_auth_reuse_default="true" + ;; + full) + runtime_enabled_default="true" + session_core_default="true" + event_store_default="true" + resume_core_default="true" + live_stream_core_default="true" + affinity_core_default="true" + session_auth_reuse_default="true" + ;; + *) + echo "ERROR: Unknown RUST_MCP_MODE value: ${RUST_MCP_MODE}" + echo "Valid options: off, shadow, edge, full" + exit 1 + ;; + esac + + if [[ -z "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED="${runtime_enabled_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED}" ]]; then + EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED="${managed_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_RUNTIME_URL}" ]]; then + EXPERIMENTAL_RUST_MCP_RUNTIME_URL="http://127.0.0.1:8787" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED="${session_core_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED="${event_store_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED="${resume_core_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED="${live_stream_core_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED}" ]]; then + EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED="${affinity_core_default}" + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED}" ]]; then + if [[ -n "${RUST_MCP_SESSION_AUTH_REUSE}" ]]; then + EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED="${RUST_MCP_SESSION_AUTH_REUSE}" + else + EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED="${session_auth_reuse_default}" + fi + fi + if [[ -z "${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS}" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED}" = "true" ]]; then + EXPERIMENTAL_RUST_MCP_RUNTIME_UDS="/tmp/contextforge-mcp-rust.sock" + fi + if [[ -z "${MCP_RUST_LISTEN_HTTP}" ]]; then + MCP_RUST_LISTEN_HTTP="127.0.0.1:8787" + fi + if [[ -z "${MCP_RUST_PUBLIC_LISTEN_HTTP}" \ + && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" \ + && "${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED}" = "true" \ + && "${EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED}" = "true" ]]; then + MCP_RUST_PUBLIC_LISTEN_HTTP="0.0.0.0:8787" + fi + if [[ -z "${MCP_RUST_LISTEN_UDS}" && -n "${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS}" ]]; then + MCP_RUST_LISTEN_UDS="${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS}" + fi + if [[ -z "${MCP_RUST_USE_RMCP_UPSTREAM_CLIENT}" ]]; then + if [[ "${CONTEXTFORGE_ENABLE_RUST_MCP_RMCP_BUILD}" = "true" ]]; then + MCP_RUST_USE_RMCP_UPSTREAM_CLIENT="true" + else + MCP_RUST_USE_RMCP_UPSTREAM_CLIENT="false" + fi + fi + if [[ -z "${MCP_RUST_LOG}" ]]; then + MCP_RUST_LOG="${RUST_MCP_LOG}" + fi + if [[ -z "${MCP_RUST_SESSION_CORE_ENABLED}" ]]; then + MCP_RUST_SESSION_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED}" + fi + if [[ -z "${MCP_RUST_EVENT_STORE_ENABLED}" ]]; then + MCP_RUST_EVENT_STORE_ENABLED="${EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED}" + fi + if [[ -z "${MCP_RUST_RESUME_CORE_ENABLED}" ]]; then + MCP_RUST_RESUME_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED}" + fi + if [[ -z "${MCP_RUST_LIVE_STREAM_CORE_ENABLED}" ]]; then + MCP_RUST_LIVE_STREAM_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED}" + fi + if [[ -z "${MCP_RUST_AFFINITY_CORE_ENABLED}" ]]; then + MCP_RUST_AFFINITY_CORE_ENABLED="${EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED}" + fi + if [[ -z "${MCP_RUST_SESSION_AUTH_REUSE_ENABLED}" ]]; then + MCP_RUST_SESSION_AUTH_REUSE_ENABLED="${EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED}" + fi + if [[ -z "${MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS}" ]]; then + MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS="30" + fi + + export RUST_MCP_MODE + export RUST_MCP_LOG + export RUST_MCP_SESSION_AUTH_REUSE + export EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED + export EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED + export EXPERIMENTAL_RUST_MCP_RUNTIME_URL + export EXPERIMENTAL_RUST_MCP_RUNTIME_UDS + export EXPERIMENTAL_RUST_MCP_SESSION_CORE_ENABLED + export EXPERIMENTAL_RUST_MCP_EVENT_STORE_ENABLED + export EXPERIMENTAL_RUST_MCP_RESUME_CORE_ENABLED + export EXPERIMENTAL_RUST_MCP_LIVE_STREAM_CORE_ENABLED + export EXPERIMENTAL_RUST_MCP_AFFINITY_CORE_ENABLED + export EXPERIMENTAL_RUST_MCP_SESSION_AUTH_REUSE_ENABLED + export MCP_RUST_LISTEN_HTTP + export MCP_RUST_LISTEN_UDS + export MCP_RUST_PUBLIC_LISTEN_HTTP + export MCP_RUST_LOG + export MCP_RUST_USE_RMCP_UPSTREAM_CLIENT + export MCP_RUST_SESSION_CORE_ENABLED + export MCP_RUST_EVENT_STORE_ENABLED + export MCP_RUST_RESUME_CORE_ENABLED + export MCP_RUST_LIVE_STREAM_CORE_ENABLED + export MCP_RUST_AFFINITY_CORE_ENABLED + export MCP_RUST_SESSION_AUTH_REUSE_ENABLED + export MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS +} + +cleanup() { + local pids=() + + if [[ -n "${SERVER_PID}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then + pids+=("${SERVER_PID}") + fi + if [[ -n "${RUST_MCP_PID}" ]] && kill -0 "${RUST_MCP_PID}" 2>/dev/null; then + pids+=("${RUST_MCP_PID}") + fi + + if [[ ${#pids[@]} -gt 0 ]]; then + kill "${pids[@]}" 2>/dev/null || true + wait "${pids[@]}" 2>/dev/null || true + fi +} + +print_mcp_runtime_mode() { + local runtime_mode="python" + local upstream_client_mode="native" + local session_core_mode="python" + local event_store_mode="python" + local resume_core_mode="python" + local live_stream_core_mode="python" + local affinity_core_mode="python" + local session_auth_reuse_mode="python" + + if [[ "${MCP_RUST_USE_RMCP_UPSTREAM_CLIENT}" = "true" ]]; then + upstream_client_mode="rmcp" + fi + if [[ "${MCP_RUST_SESSION_CORE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + session_core_mode="rust" + fi + if [[ "${MCP_RUST_EVENT_STORE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + event_store_mode="rust" + fi + if [[ "${MCP_RUST_RESUME_CORE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + resume_core_mode="rust" + fi + if [[ "${MCP_RUST_LIVE_STREAM_CORE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + live_stream_core_mode="rust" + fi + if [[ "${MCP_RUST_AFFINITY_CORE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + affinity_core_mode="rust" + fi + if [[ "${MCP_RUST_SESSION_AUTH_REUSE_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + session_auth_reuse_mode="rust" + fi + + if [[ "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" ]]; then + if [[ "${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED}" = "true" ]]; then + runtime_mode="rust-managed" + echo "MCP runtime mode: ${runtime_mode} (sidecar managed in this container, upstream client: ${upstream_client_mode}, session core: ${session_core_mode}, event store: ${event_store_mode}, resume core: ${resume_core_mode}, live stream core: ${live_stream_core_mode}, affinity core: ${affinity_core_mode}, session auth reuse: ${session_auth_reuse_mode})" + else + runtime_mode="rust-external" + echo "MCP runtime mode: ${runtime_mode} (external sidecar target: ${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS:-${EXPERIMENTAL_RUST_MCP_RUNTIME_URL}}, upstream client: ${upstream_client_mode}, session core: ${session_core_mode}, event store: ${event_store_mode}, resume core: ${resume_core_mode}, live stream core: ${live_stream_core_mode}, affinity core: ${affinity_core_mode}, session auth reuse: ${session_auth_reuse_mode})" + fi + + if [[ "${MCP_RUST_USE_RMCP_UPSTREAM_CLIENT}" = "true" && "${CONTEXTFORGE_ENABLE_RUST_MCP_RMCP_BUILD}" != "true" ]]; then + echo "ERROR: MCP_RUST_USE_RMCP_UPSTREAM_CLIENT=true but this image was built without rmcp support." + echo "Rebuild with RUST_MCP_BUILD=1 or --build-arg ENABLE_RUST_MCP_RMCP=true." + exit 1 + fi + return + fi + + if [[ "${CONTEXTFORGE_ENABLE_RUST_BUILD}" = "true" ]]; then + runtime_mode="python-rust-built-disabled" + echo "WARNING: MCP runtime mode: ${runtime_mode}" + echo "WARNING: Rust MCP artifacts are present in this image, but EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED=false so /mcp will run on the Python transport." + echo "WARNING: Set RUST_MCP_MODE=shadow, RUST_MCP_MODE=edge, or RUST_MCP_MODE=full to activate the Rust MCP runtime." + return + fi + + echo "MCP runtime mode: ${runtime_mode} (Rust MCP artifacts not built into this image)" +} -case "${HTTP_SERVER}" in - granian) - echo "Starting ContextForge with Granian (Rust-based HTTP server)..." - exec ./run-granian.sh "$@" - ;; - gunicorn) - echo "Starting ContextForge with Gunicorn + Uvicorn..." - exec ./run-gunicorn.sh "$@" - ;; - *) - echo "ERROR: Unknown HTTP_SERVER value: ${HTTP_SERVER}" - echo "Valid options: granian, gunicorn" +build_server_command() { + case "${HTTP_SERVER}" in + granian) + echo "Starting ContextForge with Granian (Rust-based HTTP server)..." + SERVER_CMD=(./run-granian.sh "$@") + ;; + gunicorn) + echo "Starting ContextForge with Gunicorn + Uvicorn..." + SERVER_CMD=(./run-gunicorn.sh "$@") + ;; + *) + echo "ERROR: Unknown HTTP_SERVER value: ${HTTP_SERVER}" + echo "Valid options: granian, gunicorn" + exit 1 + ;; + esac +} + +start_managed_rust_mcp_runtime() { + local runtime_bin="/app/bin/contextforge-mcp-runtime" + local rust_listen_http="${MCP_RUST_LISTEN_HTTP:-127.0.0.1:8787}" + local rust_listen_uds="${MCP_RUST_LISTEN_UDS:-${EXPERIMENTAL_RUST_MCP_RUNTIME_UDS:-}}" + local app_root_path="${APP_ROOT_PATH:-}" + local backend_rpc_url="${MCP_RUST_BACKEND_RPC_URL:-http://127.0.0.1:${PORT:-4444}${app_root_path}/_internal/mcp/rpc}" + local rust_database_url="${MCP_RUST_DATABASE_URL:-}" + local rust_redis_url="${MCP_RUST_REDIS_URL:-${REDIS_URL:-}}" + local rust_cache_prefix="${MCP_RUST_CACHE_PREFIX:-${CACHE_PREFIX:-mcpgw:}}" + local rust_event_store_max="${MCP_RUST_EVENT_STORE_MAX_EVENTS_PER_STREAM:-${STREAMABLE_HTTP_MAX_EVENTS_PER_STREAM:-100}}" + local rust_event_store_ttl="${MCP_RUST_EVENT_STORE_TTL_SECONDS:-${STREAMABLE_HTTP_EVENT_TTL:-3600}}" + + if [[ -z "${rust_database_url}" && -n "${DATABASE_URL:-}" ]]; then + case "${DATABASE_URL}" in + postgresql+psycopg://*) + rust_database_url="${DATABASE_URL/postgresql+psycopg:\/\//postgresql://}" + ;; + postgresql://*|postgres://*) + rust_database_url="${DATABASE_URL}" + ;; + esac + fi + + if [[ "${CONTEXTFORGE_ENABLE_RUST_BUILD}" != "true" ]]; then + echo "ERROR: EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED=true but this image was built without Rust artifacts." + echo "Rebuild with RUST_MCP_BUILD=1 or --build-arg ENABLE_RUST=true, or set EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED=false to use an external sidecar." + exit 1 + fi + + if [[ ! -x "${runtime_bin}" ]]; then + echo "ERROR: Rust MCP runtime binary not found at ${runtime_bin}" exit 1 - ;; -esac + fi + + export MCP_RUST_LISTEN_HTTP="${rust_listen_http}" + if [[ -n "${rust_listen_uds}" ]]; then + export MCP_RUST_LISTEN_UDS="${rust_listen_uds}" + else + unset MCP_RUST_LISTEN_UDS || true + unset EXPERIMENTAL_RUST_MCP_RUNTIME_UDS || true + fi + if [[ -n "${MCP_RUST_PUBLIC_LISTEN_HTTP:-}" ]]; then + export MCP_RUST_PUBLIC_LISTEN_HTTP="${MCP_RUST_PUBLIC_LISTEN_HTTP}" + else + unset MCP_RUST_PUBLIC_LISTEN_HTTP || true + fi + export MCP_RUST_BACKEND_RPC_URL="${backend_rpc_url}" + export MCP_RUST_SESSION_CORE_ENABLED="${MCP_RUST_SESSION_CORE_ENABLED}" + export MCP_RUST_EVENT_STORE_ENABLED="${MCP_RUST_EVENT_STORE_ENABLED}" + export MCP_RUST_RESUME_CORE_ENABLED="${MCP_RUST_RESUME_CORE_ENABLED}" + export MCP_RUST_LIVE_STREAM_CORE_ENABLED="${MCP_RUST_LIVE_STREAM_CORE_ENABLED}" + export MCP_RUST_SESSION_AUTH_REUSE_ENABLED="${MCP_RUST_SESSION_AUTH_REUSE_ENABLED}" + export MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS="${MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS}" + export MCP_RUST_CACHE_PREFIX="${rust_cache_prefix}" + export MCP_RUST_EVENT_STORE_MAX_EVENTS_PER_STREAM="${rust_event_store_max}" + export MCP_RUST_EVENT_STORE_TTL_SECONDS="${rust_event_store_ttl}" + if [[ -n "${rust_database_url}" ]]; then + export MCP_RUST_DATABASE_URL="${rust_database_url}" + fi + if [[ -n "${rust_redis_url}" ]]; then + export MCP_RUST_REDIS_URL="${rust_redis_url}" + fi + + if [[ -n "${rust_listen_uds}" ]]; then + echo "Starting experimental Rust MCP runtime on unix://${MCP_RUST_LISTEN_UDS} (backend: ${MCP_RUST_BACKEND_RPC_URL})..." + else + echo "Starting experimental Rust MCP runtime on ${MCP_RUST_LISTEN_HTTP} (backend: ${MCP_RUST_BACKEND_RPC_URL})..." + fi + "${runtime_bin}" & + RUST_MCP_PID=$! + + python3 - <<'PY' +import httpx +import os +import sys +import time +import urllib.error +import urllib.request + +base_url = os.environ.get("EXPERIMENTAL_RUST_MCP_RUNTIME_URL", "http://127.0.0.1:8787").rstrip("/") +health_url = f"{base_url}/health" +uds_path = os.environ.get("EXPERIMENTAL_RUST_MCP_RUNTIME_UDS") or os.environ.get("MCP_RUST_LISTEN_UDS") + +for _ in range(60): + if uds_path: + try: + with httpx.Client(transport=httpx.HTTPTransport(uds=uds_path), timeout=2.0) as client: + response = client.get(health_url) + if response.status_code == 200: + sys.exit(0) + except OSError: + time.sleep(0.5) + except httpx.HTTPError: + time.sleep(0.5) + else: + try: + with urllib.request.urlopen(health_url, timeout=2) as response: + if response.status == 200: + sys.exit(0) + except (OSError, urllib.error.URLError): + time.sleep(0.5) + +print(f"ERROR: Experimental Rust MCP runtime failed health check at {health_url}", file=sys.stderr) +sys.exit(1) +PY +} + +apply_rust_mcp_mode_defaults +build_server_command "$@" +print_mcp_runtime_mode + +if [[ "${EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED}" = "true" && "${EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED}" = "true" ]]; then + trap cleanup EXIT INT TERM + start_managed_rust_mcp_runtime + "${SERVER_CMD[@]}" & + SERVER_PID=$! + + set +e + wait -n "${SERVER_PID}" "${RUST_MCP_PID}" + STATUS=$? + set -e + + exit "${STATUS}" +fi + +exec "${SERVER_CMD[@]}" diff --git a/docs/docs/architecture/.pages b/docs/docs/architecture/.pages index 138202e68b..550f9b18c6 100644 --- a/docs/docs/architecture/.pages +++ b/docs/docs/architecture/.pages @@ -2,6 +2,9 @@ nav: - Overview: index.md - Roadmap: roadmap.md - Performance Architecture: performance-architecture.md + - Rust MCP Runtime: rust-mcp-runtime.md + - Modular Runtime Architecture: modular-design.md + - Modular Runtime Specification: modular-runtime - Security Features: security-features.md - Plugin Framework: plugins.md - Export-Import Architecture: export-import-architecture.md diff --git a/docs/docs/architecture/adr/.pages b/docs/docs/architecture/adr/.pages index 298410fa0b..221314a73a 100644 --- a/docs/docs/architecture/adr/.pages +++ b/docs/docs/architecture/adr/.pages @@ -45,3 +45,8 @@ nav: - 40 Flexible Admin UI Section Visibility: 040-flexible-admin-ui-sections.md - 41 Top-Level Rust Workspace: 041-top-level-rust-workspace.md - 42 Enforce Rust in the Build Process: 042-enforce-rust-in-build-process.md + - 43 Rust MCP Runtime Sidecar and Mode Model: 043-rust-mcp-runtime-sidecar-mode-model.md + - 44 Module Communication Protocol: 044-module-communication-protocol.md + - 45 Auth Remains in Core: 045-auth-remains-in-core.md + - 46 Shared-Nothing Between Modules: 046-shared-nothing-between-modules.md + - 47 Incremental Migration Over Rewrite: 047-incremental-migration-over-rewrite.md diff --git a/docs/docs/architecture/adr/038-experimental-rust-transport-backend.md b/docs/docs/architecture/adr/038-experimental-rust-transport-backend.md index 81967ad2dc..256bbf7aac 100644 --- a/docs/docs/architecture/adr/038-experimental-rust-transport-backend.md +++ b/docs/docs/architecture/adr/038-experimental-rust-transport-backend.md @@ -1,11 +1,17 @@ # ADR-038: Experimental Rust Transport Backend (Streamable HTTP) -- *Status:* Proposed +- *Status:* Superseded by ADR-043 - *Date:* 2025-12-26 - *Deciders:* Platform Team ## Context +!!! warning + This ADR records the original experiment proposal only. The implemented + architecture has moved to a Rust sidecar/runtime with mode-based rollout. + See [ADR-043](043-rust-mcp-runtime-sidecar-mode-model.md) for the current + decision. + ContextForge currently implements its transport layer (stdio, SSE, WebSocket, and Streamable HTTP) in Python using asyncio. While this provides functional correctness, the transport layer experiences performance and memory limitations under higher concurrency due to Python runtime overhead and GIL constraints. Issue #1621 proposes evaluating a Rust-based transport backend to improve throughput, latency, and resource efficiency while preserving the existing Transport API and protocol semantics. diff --git a/docs/docs/architecture/adr/043-rust-mcp-runtime-sidecar-mode-model.md b/docs/docs/architecture/adr/043-rust-mcp-runtime-sidecar-mode-model.md new file mode 100644 index 0000000000..eeb61e9008 --- /dev/null +++ b/docs/docs/architecture/adr/043-rust-mcp-runtime-sidecar-mode-model.md @@ -0,0 +1,112 @@ +# ADR-043: Rust MCP Runtime Sidecar with Mode-Based Rollout + +- *Status:* Accepted +- *Date:* 2026-03-14 +- *Deciders:* Platform Team +- *Supersedes:* ADR-038 (experimental Rust transport backend) + +## Context + +ContextForge's original Rust transport spike began as a narrow experiment around +the streamable HTTP MCP path. The implementation has since evolved beyond that +proposal: + +- the runtime is deployed as a separate Rust sidecar/runtime, not as PyO3/FFI +- nginx can route public `/mcp` traffic directly to Rust +- Rust can own session, event-store, resume, live-stream, and affinity MCP + cores in the `full` mode +- Python still remains authoritative for authentication, token scoping, and RBAC +- rollout and rollback are now controlled through a top-level mode model instead + of only through low-level experimental flags + +The older ADR no longer describes the implemented architecture or the operator +experience. + +## Decision + +We standardize on a **Rust MCP runtime sidecar** with a **mode-based rollout +model**. + +### User-facing modes + +`RUST_MCP_MODE` is the primary operational control: + +- `off`: keep the public MCP path on Python +- `shadow`: run the Rust sidecar, but keep public `/mcp` on Python +- `edge`: route public `/mcp` directly from nginx to Rust +- `full`: `edge` plus Rust-owned MCP session/event-store/resume/live-stream and + affinity cores + +### Public ingress model + +In `edge|full`, nginx routes public `GET/POST/DELETE /mcp` traffic directly to +the Rust runtime through a dedicated public listener. + +Rust authenticates public requests through a trusted internal Python endpoint: + +- `POST /_internal/mcp/authenticate` + +Python remains the system of record for: + +- JWT validation +- token scoping / team visibility +- RBAC + +Rust consumes the authenticated context and owns progressively more of the +public MCP runtime path. + +### Session/auth reuse + +Rust may reuse authenticated context per MCP session, but only with explicit +ownership/binding checks. Session reuse is: + +- bound to the original authenticated context +- validated against an auth-binding fingerprint +- denied if the auth binding changes for the same `mcp-session-id` +- backed by dedicated session-isolation tests + +### Fallback and safety + +`shadow` is the safety-first rollback/comparison mode. It keeps the public MCP +transport/session path on Python while still running the Rust sidecar +internally. + +Low-level `EXPERIMENTAL_RUST_MCP_*` flags still exist as advanced overrides, but +the documented operator model is the high-level mode switch above. + +## Consequences + +### Positive + +- Clear operational model for rollout, benchmarking, and rollback +- Public MCP ingress can move off Python incrementally without rewriting the + full security/control plane +- `shadow` provides a clean safety mode instead of an ambiguous hybrid path +- Session/auth reuse has a documented security model and dedicated isolation + coverage +- The runtime can own more of the hot MCP path while preserving Python + compatibility fallbacks + +### Negative + +- The architecture is now explicitly multi-process and multi-language +- Rust and Python responsibilities must remain carefully documented and tested +- Health, profiling, and debugging require mode-aware operational knowledge +- Some behavior still depends on narrow internal Python routes and compatibility + seams + +## Alternatives Considered + +| Option | Why Not | +|--------|---------| +| Keep ADR-038 as the canonical description | No longer matches the implementation or rollout model | +| Full Rust rewrite of the entire gateway/security stack | Higher risk and out of scope for the current incremental migration | +| Expose only low-level `EXPERIMENTAL_RUST_MCP_*` flags | Too hard for operators to reason about safely | +| Keep public `/mcp` permanently on Python and use Rust only behind Python | Leaves the Python ingress hop in the hot path and limits the performance gain | + +## References + +- [Rust MCP Runtime Architecture](../rust-mcp-runtime.md) +- [Performance Architecture](../performance-architecture.md) +- `tools_rust/mcp_runtime/TESTING-DESIGN.md` in the repository +- `tools_rust/mcp_runtime/README.md` in the repository diff --git a/docs/docs/architecture/adr/044-module-communication-protocol.md b/docs/docs/architecture/adr/044-module-communication-protocol.md new file mode 100644 index 0000000000..581bfac19d --- /dev/null +++ b/docs/docs/architecture/adr/044-module-communication-protocol.md @@ -0,0 +1,75 @@ +# ADR-044: Module Communication Protocol + +- *Status:* Proposed +- *Date:* 2026-03-15 +- *Deciders:* Platform Team +- *Related:* [Modular Runtime Architecture](../modular-design.md), [ADR-043](043-rust-mcp-runtime-sidecar-mode-model.md) + +## Context + +The modular gateway architecture requires protocol modules (MCP, A2A, LLM, +REST/gRPC) to communicate with the core platform over a well-defined boundary. +That boundary must: + +- Support modules written in any language (Python, Rust, Go). +- Handle both request/response and server-streaming patterns (e.g., catalog change subscriptions, SSE relay). +- Keep latency low enough that per-request overhead is negligible relative to upstream calls. +- Align with existing patterns in the codebase (the plugin framework already supports gRPC external runtimes). + +## Decision + +We adopt **gRPC over Unix Domain Socket** as the **target-state default** +module-to-core transport. + +- It is language-neutral via protobuf code generation. +- It supports unary and streaming patterns cleanly. +- It fits host-local sidecar communication well. +- It aligns with the existing external plugin gRPC pattern already present in + the codebase. + +We also allow: + +- **HTTP/JSON** as a fallback where a gRPC toolchain is undesirable +- **direct in-process calls** for embedded runtimes using the same conceptual + contract + +This is important because the currently implemented Rust MCP precedent still +uses trusted internal HTTP over UDS or loopback. That precedent remains valid +during migration, but it does not redefine the longer-term default boundary. + +## Consequences + +### Positive + +- Single contract definition (protobuf) generates client/server stubs for Python, Rust, Go, and other languages. +- Streaming RPCs natively support catalog change subscriptions and session broadcast patterns. +- UDS avoids TCP overhead and keeps traffic host-local. +- Clean process boundary enables crash isolation and independent scaling of modules. + +### Negative + +- Adds a protobuf/gRPC toolchain dependency for module developers. +- Serialization overhead is higher than direct in-process calls. +- Module developers must handle connection lifecycle, deadlines, and + backpressure. + +### Neutral + +- The SPI schemas must be versioned to allow independent evolution. +- Modules that only need request/response can use HTTP/JSON at the cost of a + weaker streaming story. + +## Alternatives Considered + +| Option | Why Not | +|--------|---------| +| **Cap'n Proto** | Better zero-copy performance but significantly less language support and tooling. | +| **Flatbuffers** | No native streaming support; designed for serialization, not RPC. | +| **REST/JSON** | No streaming, higher overhead, no schema enforcement at compile time. | +| **Shared memory** | Too complex, limited to same-host deployment, no language-neutral schema. | +| **PyO3 / CGo (in-process FFI)** | Available as an optimization for embedded modules, but not suitable as the default boundary because it couples module lifecycle to the Python process. | + +## References + +- [Modular Runtime Architecture](../modular-design.md) +- `mcpgateway/plugins/framework/external/grpc/` — Existing gRPC external plugin runtime diff --git a/docs/docs/architecture/adr/045-auth-remains-in-core.md b/docs/docs/architecture/adr/045-auth-remains-in-core.md new file mode 100644 index 0000000000..86992b9416 --- /dev/null +++ b/docs/docs/architecture/adr/045-auth-remains-in-core.md @@ -0,0 +1,66 @@ +# ADR-045: Authentication and Authorization Remain in Core + +- *Status:* Proposed +- *Date:* 2026-03-15 +- *Deciders:* Platform Team +- *Related:* [Modular Runtime Architecture](../modular-design.md), [ADR-004](004-combine-jwt-and-basic-auth.md) + +## Context + +The modular gateway architecture introduces protocol modules that can be implemented in different languages and run as separate processes. A key question is whether authentication and authorization logic should be duplicated in each module or centralized in the core platform. + +ContextForge implements a two-layer security model: + +1. **Token Scoping (Layer 1):** `normalize_token_teams()` in `mcpgateway/auth.py` controls what resources a caller can see. +2. **RBAC (Layer 2):** `PermissionService` controls what actions a caller can perform. + +Both layers are security-critical and have non-trivial edge cases (admin bypass, public-only tokens, team hierarchy resolution). + +## Decision + +Authentication and authorization **never move into modules**. The core +platform remains the single source of truth for: + +- JWT verification and token scoping (`normalize_token_teams()`). +- RBAC permission checks (`PermissionService`). +- SSO provider integration (GitHub, Google, Okta, Keycloak, Entra ID, generic OIDC). +- Token revocation checks. +- Rate limiting. + +Modules consume auth through a core-owned auth and policy SPI. The exact RPC or +method names are intentionally left open, but the contract must support: + +- resolving a caller into a typed authenticated context +- checking permissions against that context +- preserving token-scoped visibility and deny-path behavior + +Modules receive authenticated context or permission outcomes from the core and +pass that context through subsequent core SPI calls as needed. + +## Consequences + +### Positive + +- Security-critical code has a single implementation — no drift between module auth implementations. +- Simplifies security auditing — one codebase to review, not N per module. +- Modules in any language get the full auth stack without reimplementing it. +- Consistent behavior across all protocols (MCP, A2A, LLM, REST). + +### Negative + +- Every module request that needs auth must make at least one call or cacheable + check against the core-owned auth boundary. +- Auth logic cannot be freely reimplemented per protocol without risking + policy drift. + +### Neutral + +- Auth caching at the core level (ADR-028) can amortize the cost of repeated + checks. +- The current Rust MCP runtime already demonstrates this pattern in practice. + +## References + +- [Modular Runtime Architecture](../modular-design.md) +- `mcpgateway/auth.py` — `normalize_token_teams()`, single source of truth +- [ADR-043](043-rust-mcp-runtime-sidecar-mode-model.md) diff --git a/docs/docs/architecture/adr/046-shared-nothing-between-modules.md b/docs/docs/architecture/adr/046-shared-nothing-between-modules.md new file mode 100644 index 0000000000..dd0ba2457d --- /dev/null +++ b/docs/docs/architecture/adr/046-shared-nothing-between-modules.md @@ -0,0 +1,57 @@ +# ADR-046: Shared-Nothing Between Protocol Modules + +- *Status:* Proposed +- *Date:* 2026-03-15 +- *Deciders:* Platform Team +- *Related:* [Modular Runtime Architecture](../modular-design.md), [ADR-044](044-module-communication-protocol.md) + +## Context + +ContextForge supports multiple protocols (MCP, A2A, LLM, REST/gRPC) that sometimes need cross-protocol behavior: + +- A2A agents are auto-registered as MCP tools. +- LLM chat integrates MCP tools via LangChain. +- REST/gRPC services can be exposed as MCP tools. + +In the current monolithic architecture, this cross-protocol behavior happens via direct Python imports between services. In a modular architecture where modules may be written in different languages and run in separate processes, direct imports are not possible. + +## Decision + +**Modules cannot import or call each other directly.** All cross-protocol +behavior is mediated by the core platform through core-owned catalogs and +policy-aware routing. + +Illustrative example: + +1. an MCP module asks the core to invoke a tool +2. the core determines the owning integration type +3. the core routes to the appropriate protocol runtime +4. the result returns through the core to the original module + +The exact dispatcher shape may evolve, but the architectural rule does not: +modules remain isolated from one another and the core performs the bridging. + +## Consequences + +### Positive + +- Prevents language-specific coupling between modules (Rust MCP module doesn't import Python A2A code). +- Forces clean API boundaries — all cross-protocol contracts go through the Core SPI. +- Enables independent deployment and scaling of modules. +- Modules can be replaced independently (e.g., Go A2A module replaces Python A2A module) without affecting other modules. + +### Negative + +- Cross-protocol calls have additional latency (two IPC hops: module → core → module). +- The core becomes a bottleneck for cross-protocol traffic. +- Some operations that are currently a simple function call become multi-hop IPC chains. + +### Neutral + +- An event bus (future) can provide asynchronous cross-module communication for non-request-path operations (e.g., "agent registered" → "create tool entry"). +- The `integration_type` field already exists on tools in the current schema, so the routing mechanism is a formalization of existing behavior. + +## References + +- [Modular Runtime Architecture](../modular-design.md) +- `mcpgateway/services/tool_service.py` — `_invoke_a2a_tool()` (current cross-protocol call) diff --git a/docs/docs/architecture/adr/047-incremental-migration-over-rewrite.md b/docs/docs/architecture/adr/047-incremental-migration-over-rewrite.md new file mode 100644 index 0000000000..cdc62859db --- /dev/null +++ b/docs/docs/architecture/adr/047-incremental-migration-over-rewrite.md @@ -0,0 +1,66 @@ +# ADR-047: Incremental Migration Over Rewrite + +- *Status:* Proposed +- *Date:* 2026-03-15 +- *Deciders:* Platform Team +- *Related:* [Modular Runtime Architecture](../modular-design.md), [ADR-019](019-modular-architecture-split.md) + +## Context + +The modular gateway architecture requires significant restructuring of the existing monolithic FastAPI application. Two approaches are possible: + +1. **Incremental migration:** Refactor the existing codebase phase-by-phase, keeping the system functional at each step. +2. **Ground-up rewrite:** Build the modular architecture from scratch and migrate services over. + +The existing codebase already has: + +- production behavior that cannot be broken casually +- a broad unit, integration, E2E, security, and performance test surface +- cross-cutting concerns such as auth, RBAC, plugins, and admin UI that must + remain coherent while protocol runtimes are extracted +- an implemented Rust MCP sidecar precedent showing that sidecar rollout is + feasible + +## Decision + +We **modularize through phased refactoring**, not a ground-up rewrite. The migration follows five phases: + +1. **Phase 0 — prerequisite refactors:** create cleaner seams inside the + monolith without changing the deployment model +2. **Phase 1 — core SPI definition:** express core/module boundaries as + internal interfaces first +3. **Phase 2 — module lifecycle:** wrap existing runtimes behind a common + lifecycle and capability model +4. **Phase 3 — sidecar transport:** add sidecar-capable communication where it + is justified +5. **Phase 4 — additional modules:** extract or introduce new protocol + runtimes on top of the same contract + +Feature flags and rollout controls may be used during the transition, but this +ADR does not freeze their final names. + +## Consequences + +### Positive + +- The existing test suite provides regression safety at every phase. +- Each phase leaves the system fully functional — no "big bang" cutover. +- Risk is distributed across multiple small PRs instead of one massive change. +- Legacy mode preserves an escape hatch if module mode has issues. +- The implemented Rust MCP runtime sidecar proves the sidecar pattern works before generalizing it. + +### Negative + +- The codebase will temporarily have both legacy and modular code paths. +- Feature flag complexity increases until legacy mode is retired. +- Each phase requires careful testing in both modes. + +### Neutral + +- The migration timeline is longer than a rewrite but carries less risk. +- Legacy mode can be retired once all modules are stable and production-validated. + +## References + +- [Modular Runtime Architecture](../modular-design.md) +- [ADR-043](043-rust-mcp-runtime-sidecar-mode-model.md) diff --git a/docs/docs/architecture/adr/index.md b/docs/docs/architecture/adr/index.md index 73e748f0af..bc1bd73b93 100644 --- a/docs/docs/architecture/adr/index.md +++ b/docs/docs/architecture/adr/index.md @@ -40,10 +40,15 @@ This page tracks all significant design decisions made for ContextForge project, | 0033 | Tool Lookup Cache for invoke_tool | Accepted | Performance | 2025-01-20 | | 0035 | Query Parameter Authentication for Gateways | Accepted | Security | 2026-01-19 | | 0037 | External Plugin STDIO Launch with Command/Env Overrides | Accepted | Extensibility | 2026-01-28 | -| 0038 | Experimental Rust Transport Backend (Streamable HTTP) | Proposed | Performance | 2025-12-26 | +| 0038 | Experimental Rust Transport Backend (Streamable HTTP) | Superseded | Performance | 2025-12-26 | | 0039 | Adopt Fully Independent Plugin Crates Architecture | Accepted | Architecture | 2026-02-13 | | 0040 | Flexible Admin UI Section Visibility | Accepted | User Interface | 2026-02-16 | | 0041 | Top-Level Rust Workspace (Cargo.toml at Repository Root) | Accepted | Architecture | 2026-02-26 | | 0042 | Enforce Rust in the Build Process | Proposed | Build | 2026-02-26 | +| 0043 | Rust MCP Runtime Sidecar with Mode-Based Rollout | Accepted | Architecture | 2026-03-14 | +| 0044 | Module Communication Protocol (gRPC over UDS) | Proposed | Architecture | 2026-03-15 | +| 0045 | Authentication and Authorization Remain in Core | Proposed | Security | 2026-03-15 | +| 0046 | Shared-Nothing Between Protocol Modules | Proposed | Architecture | 2026-03-15 | +| 0047 | Incremental Migration Over Rewrite | Proposed | Architecture | 2026-03-15 | > ✳️ Add new decisions chronologically and link to them from this table. diff --git a/docs/docs/architecture/index.md b/docs/docs/architecture/index.md index d119f62e35..54955bda9f 100644 --- a/docs/docs/architecture/index.md +++ b/docs/docs/architecture/index.md @@ -4,7 +4,22 @@ ## High-Level Architecture Summary -**ContextForge** is a comprehensive production-grade gateway built on modern Python technologies with a performance-first approach. For a detailed visual diagram of the high-performance components (Rust-powered libraries, async patterns, caching layers, and Kubernetes scaling), see the [Performance Architecture Diagram](performance-architecture.md). +**ContextForge** is a comprehensive production-grade gateway built on modern +Python technologies with a performance-first approach. The primary control plane +remains Python, but the MCP streamable HTTP hot path can now optionally run +through a dedicated Rust runtime sidecar. For the current Rust MCP runtime +design, see [Rust MCP Runtime](rust-mcp-runtime.md). For the broader +high-performance view, see the +[Performance Architecture Diagram](performance-architecture.md). + +The broader target-state modular runtime architecture is documented separately +in [Modular Runtime Architecture](modular-design.md). That document generalizes +the current Rust MCP sidecar pattern into a reusable core-plus-modules design +for future MCP, A2A, LLM, and REST/gRPC runtimes. The related decisions are +captured in [ADR-043](adr/043-rust-mcp-runtime-sidecar-mode-model.md) through +[ADR-047](adr/047-incremental-migration-over-rewrite.md). The implementation- +ready contract for building new modules lives in the +[Modular Runtime Specification](modular-runtime/index.md). ## Design Diagrams @@ -35,6 +50,10 @@ The following diagrams are generated by `make docs` and provide a quick visual r - Native **MCP (Model Context Protocol)** server implementation supporting protocol version 2025-03-26 - Transport mechanisms: **HTTP/JSON-RPC**, **Server-Sent Events (SSE)** with keepalive, **WebSocket**, **stdio** (for CLI integration), and **streamable-HTTP** - JSON-RPC 2.0 compliant message handling with bidirectional communication +- Optional **Rust MCP runtime sidecar** for streamable HTTP public ingress with + mode-based rollout (`off`, `shadow`, `edge`, `full`) +- In Rust `edge|full` mode, nginx routes public `/mcp` traffic directly to the + Rust runtime while Python remains authoritative for auth and RBAC **4. Federation & Registry Architecture** diff --git a/docs/docs/architecture/modular-design.md b/docs/docs/architecture/modular-design.md new file mode 100644 index 0000000000..f786b59e7e --- /dev/null +++ b/docs/docs/architecture/modular-design.md @@ -0,0 +1,676 @@ +# ContextForge Modular Runtime Architecture + +**Status:** Proposed target architecture and implementation entry point + +This document defines the target-state modular runtime architecture for +ContextForge. + +It is intended to support: + +- the existing MCP gateway +- the existing A2A gateway +- future LLM gateway runtimes +- future REST and gRPC gateway runtimes +- implementations in different languages, including Python, Rust, and Go + +## Purpose + +ContextForge already contains multiple protocol-facing runtime paths inside one +Python application. The Rust MCP runtime proves that a protocol runtime can be +split out into a separate implementation while the core platform remains the +system of record for security and catalog state. + +This specification generalizes that idea into a reusable architecture: + +- a **core platform** that owns policy, persistence, catalogs, plugins, admin + UI, and observability +- one or more **protocol modules** that own protocol wire behavior and + transport or runtime semantics + +The goal is not to rewrite the product. The goal is to create a stable +architecture that can evolve incrementally from the current codebase. + +## Scope + +This document is about **runtime decomposition**, not package layout. + +It is complementary to +[ADR-019: Modular Architecture Split (14 Independent Modules)](adr/019-modular-architecture-split.md), +which is about packaging and repository structure. + +This document deliberately distinguishes between: + +- **implemented precedent** + The current Rust MCP runtime sidecar and existing external plugin runtimes. +- **target architecture** + The longer-term modular contract that future MCP, A2A, LLM, and REST or gRPC + runtimes should follow. +- **migration guidance** + The phased path from the current monolith to that target. + +## Non-Goals + +This specification does not: + +- require a ground-up rewrite +- require all protocols to be extracted at once +- require all modules to be sidecars immediately +- freeze final protobuf package names or generated SDK layout +- replace protocol-specific documents such as + [Rust MCP Runtime](rust-mcp-runtime.md) + +## Relationship to Existing Architecture Docs + +| Document | Role | +|----------|------| +| [Rust MCP Runtime](rust-mcp-runtime.md) | Describes the currently implemented MCP sidecar/runtime path and rollout modes | +| [ADR-043](adr/043-rust-mcp-runtime-sidecar-mode-model.md) | Records the implemented Rust MCP sidecar and mode model | +| [Multitenancy](multitenancy.md) | Defines team scoping and visibility rules that remain core-owned | +| [OAuth Design](oauth-design.md) | Defines auth and credential handling that remain core-owned | +| [Plugin Framework](plugins.md) | Defines plugin behavior that remains centrally configured and enforced by the core | + +## How to Use This Specification + +Use the documents in this order: + +1. this page for the architectural rules and operating model +2. [Core SPI](modular-runtime/core-spi.md) for the module-to-core contract +3. [Module Descriptor](modular-runtime/module-descriptor.md) and + [Module Lifecycle](modular-runtime/module-lifecycle.md) for module + registration and startup behavior +4. [Error Model](modular-runtime/error-model.md) and + [Conformance](modular-runtime/conformance.md) for compatibility and release + requirements +5. the protocol profile for the module being implemented: + - [MCP Module Profile](modular-runtime/mcp-module.md) + - [A2A Module Profile](modular-runtime/a2a-module.md) + - [LLM Module Profile](modular-runtime/llm-module.md) + - [REST/gRPC Module Profile](modular-runtime/rest-grpc-module.md) + +## Key Decisions + +| Decision | Summary | Source | +|----------|---------|--------| +| Core owns policy; modules own protocol | Auth, RBAC, catalogs, plugins, persistence, and admin UI stay in the core; wire protocol and transport behavior move to modules | This document | +| Process boundary first | Sidecars are the default modular boundary; embedded runtimes are an optimization | This document | +| Default IPC transport | gRPC over Unix Domain Socket is the target-state module-to-core transport; HTTP/JSON remains an explicit fallback | [ADR-044](adr/044-module-communication-protocol.md) | +| Auth remains in core | Modules consume authenticated context; they do not become independent auth authorities | [ADR-045](adr/045-auth-remains-in-core.md) | +| Shared-nothing between modules | Modules do not import or call each other directly; cross-protocol behavior is mediated by the core | [ADR-046](adr/046-shared-nothing-between-modules.md) | +| Incremental migration | The architecture is adopted by refactoring the current system in phases, not by rewrite | [ADR-047](adr/047-incremental-migration-over-rewrite.md) | + +## Current Implemented Precedent + +ContextForge today is primarily a monolithic Python application, but two +existing patterns already prove the modular direction: + +1. **Rust MCP runtime sidecar** + The MCP streamable HTTP public path can run through a Rust sidecar while + Python remains authoritative for auth, token scoping, and RBAC. +2. **External plugin runtimes** + Plugins can already run out of process behind a language-neutral transport. + +These are important precedents, but they are not yet the full target modular +contract. + +In particular, the current Rust MCP runtime is a **transition architecture**: + +- it is a real external runtime +- it proves sidecar deployment, direct ingress, and mode-based rollout +- it still contains performance-oriented implementation details that are more + specific than the long-term generic module boundary + +This document defines the steadier target boundary that future modules should +converge on. + +## Implementation Status + +The modular architecture is no longer purely speculative. One protocol module +is already implemented and validated. + +| Protocol family | Module status | Notes | +|-----------------|---------------|-------| +| MCP | Implemented | Rust MCP runtime sidecar exists today with mode-based rollout and direct-ingress support | +| A2A | Not yet extracted | Current A2A runtime remains embedded in Python | +| LLM | Not yet extracted | Current LLM proxy and chat flows remain embedded in Python | +| REST/gRPC | Not yet extracted | Current virtualization and service-management flows remain embedded in Python | + +The important consequence is that this spec is grounded in a working MCP module +rather than a hypothetical first extraction. + +## Current Precedent vs Target State + +The spec must be explicit about what is implemented today versus what future +modules should target. + +| Topic | Implemented today | Target-state default | +|-------|-------------------|----------------------| +| First extracted runtime | Rust MCP sidecar | Additional protocol modules, potentially in Rust, Go, or Python | +| Sidecar transport to core | Narrow internal HTTP over local/private transport, including UDS or loopback depending on path | gRPC over UDS | +| Fallback transport | HTTP/JSON | HTTP/JSON | +| Ingress ownership | Both valid today: Python-owned ingress and direct Rust ingress depending on mode | Both valid patterns remain acceptable | +| Auth authority | Python core | Core platform | +| Plugin parity | Achieved through a mix of direct core-sensitive handling and selective delegation | Explicit SPI or core-delegation contract | +| Data-path optimizations | Rust MCP keeps targeted fast paths | Allowed, but must preserve contract and rollback behavior | + +## Architecture Principles + +1. **Core owns policy; modules own protocol.** + The core platform owns security, persistence, catalogs, plugins, + configuration, observability, and admin UI. Modules own transport, wire + format, session/runtime semantics, capability negotiation, and upstream + protocol behavior. + +2. **Process boundary first.** + The default modular boundary is a sidecar or sibling process. Embedded + in-process runtimes are allowed where justified, but they are not the + default design center. + +3. **Language-neutral contracts.** + Contracts between the core and modules must not depend on Python object + identity, ORM models, or framework internals. + +4. **Shared-nothing between modules.** + Modules do not import or call one another directly. Cross-protocol behavior + flows through the core. + +5. **Compatibility and rollback first.** + Each extraction step must preserve an operational rollback path. + +6. **Incremental migration over rewrite.** + The existing codebase remains the migration source; tests and behavior + remain the regression oracle. + +## Reference Model + +The following diagram is logical, not strictly physical. A module may sit +behind core-managed routing or may own direct public ingress while still using +the core for policy and catalog decisions. + +```mermaid +flowchart TD + client[Client] + ingress[Ingress / Proxy / TLS termination] + core[Core Platform] + mcp[MCP Module] + a2a[A2A Module] + llm[LLM Module] + rest[REST or gRPC Module] + upstream[Upstream service or core-owned catalog action] + + client --> ingress + ingress --> core + ingress --> mcp + core --> mcp + core --> a2a + core --> llm + core --> rest + mcp --> core + a2a --> core + llm --> core + rest --> core + mcp --> upstream + a2a --> upstream + llm --> upstream + rest --> upstream +``` + +Two ingress patterns are valid: + +1. `client -> ingress -> core -> module` + Use when the core remains the public edge and the module is an internal + runtime behind it. +2. `client -> ingress -> module -> core SPI` + Use when the module owns the public protocol edge directly, as the Rust MCP + runtime already does in `edge` and `full` mode. + +### Responsibilities by Plane + +```mermaid +flowchart LR + subgraph ControlPlane[Core platform control and policy plane] + auth[Authentication and RBAC] + scope[Token scoping and visibility] + catalog[Catalogs and CRUD] + plugin[Plugin policy] + config[Config and secrets] + admin[Admin UI and observability] + end + + subgraph RuntimePlane[Protocol runtime plane] + wire[Wire parsing and serialization] + transport[Transport ownership] + session[Session or task runtime] + caps[Capability negotiation] + upstream[Upstream protocol behavior] + end +``` + +## Core Platform Responsibilities + +The core platform remains the common control plane and policy plane. The table +below is representative, not exhaustive. + +| Responsibility | Notes | +|----------------|-------| +| Authentication | JWT verification, SSO integration, token normalization, revocation checks | +| Authorization | RBAC, team scoping, visibility filtering, deny-path behavior | +| Persistence | Database models, migrations, consistency, ownership metadata | +| Catalogs | Tools, resources, prompts, servers, gateways, agents, providers, and other core-owned records | +| CRUD and admin flows | Registration, update, delete, import/export, admin workflows | +| Plugin policy and configuration | Central plugin config, hook selection, hook execution policy | +| Prompt/completion/roots business services | Prompt rendering policy, completion services, roots services, and other catalog-backed non-wire operations | +| LLM and upstream provider control plane | Provider credentials, model configuration, policy-aware routing metadata | +| gRPC and REST control surfaces | Core-owned registration, exposure metadata, and governance for virtualized services | +| Observability | Traces, logs, metrics, audit signals, support bundles | +| Configuration and secrets | Global config precedence, secret resolution, encryption | +| Cross-protocol routing | Mediate calls between protocol modules through core-owned catalogs and services | +| Admin UI | Platform UI remains core-owned even when runtimes are modularized | + +The core does **not** own protocol wire parsing, protocol transport semantics, +or protocol-specific session state machines once those are extracted into a +module. + +## Protocol Module Responsibilities + +Each protocol module owns protocol-facing runtime behavior for one protocol +family. + +| Responsibility | Example | +|----------------|---------| +| Wire parsing and serialization | MCP JSON-RPC, A2A request envelopes, future LLM request formats | +| Protocol transport | streamable HTTP, SSE, WebSocket, stdio, long-poll, push channels | +| Runtime/session semantics | MCP session lifecycle, A2A task state handling, LLM chat session flow | +| Capability negotiation | MCP `initialize`, A2A capability advertisement, future provider capability declarations | +| Upstream protocol behavior | MCP upstream client pooling, A2A invocation behavior, LLM provider relay logic | +| Protocol-specific health and stats | Runtime-owned counters, transport stats, protocol-specific readiness | + +Modules should not become independent sources of truth for security policy, +catalog ownership, or long-term persistence rules. + +## Module Runtime Contract + +The contract has three parts: + +1. **module identity** +2. **module lifecycle** +3. **core SPI service families** + +Those concrete documents live under [Modular Runtime Specification](modular-runtime/index.md). + +### Module Identity + +Every module should declare a stable descriptor with fields equivalent to: + +- module id +- protocol family +- implementation language +- module version +- supported SPI version(s) +- runtime mode + - embedded + - sidecar +- exposed capabilities +- health and stats endpoints or RPCs + +The exact wire schema is implementation detail. The architectural requirement +is that the core can discover what a module is, what contract version it +supports, and how to talk to it. + +### Module Lifecycle + +Every module should support these lifecycle phases: + +1. **register** + The module is discovered and its descriptor is loaded. +2. **initialize** + The core provides configuration, scoped dependencies, and any required + bootstrap state. +3. **ready** + The module can accept live traffic. +4. **drain** + The module stops accepting new work and lets in-flight work complete. +5. **shutdown** + The module releases resources and exits cleanly. + +At minimum, the core must be able to ask a module for: + +- readiness +- liveness +- version and capability metadata +- runtime stats + +### Core SPI Service Families + +The exact API surface will evolve, but the module contract should be organized +around stable service families rather than one-off internal endpoints. + +| Service family | What it provides | +|----------------|------------------| +| Auth and policy | Resolve caller context, validate authenticated identity, check permissions, enforce token-scoped visibility | +| Catalog read and invoke | List, fetch, and invoke tools, resources, prompts, agents, servers, gateways, providers, and related core-owned records through policy-aware services | +| Session and event services | Session lookup, ownership checks, replay/event access where the protocol requires shared session or event semantics | +| Plugin services | Execute or delegate plugin-sensitive pre/post operations under core-owned plugin policy | +| Observability | Trace context propagation, structured logging, audit events, module metrics publication | +| Configuration and secrets | Scoped config delivery, secret references, feature flags, core-provided defaults | +| Admin and health integration | Module stats, health, and optional descriptors the core UI can surface | + +Two constraints are intentionally fixed: + +- the architecture does **not** freeze exact final RPC names yet +- the architecture does **not** require one giant interface; multiple smaller + service definitions are preferred + +## Communication Model + +### Default Transport + +The target-state default module-to-core transport is: + +- **gRPC over Unix Domain Socket** + +Why: + +- language-neutral +- streaming support +- well understood code generation story for Python, Rust, and Go +- suitable for host-local sidecar communication + +This is the target-state default, not a claim about every implemented module +today. The current Rust MCP runtime is the main precedent and still uses a +mix of narrow internal HTTP over local/private transport depending on the path. + +### Fallback Transport + +The fallback transport is: + +- **HTTP/JSON over loopback or internal network** + +This is acceptable when: + +- a gRPC toolchain is undesirable +- a runtime only needs request/response behavior +- an operator environment prefers plain HTTP for debugging or policy reasons + +### Embedded Mode + +Embedded modules may bypass serialization and call the same conceptual contract +directly in-process. + +This is an optimization, not a different architecture. + +### No Direct Module-to-Module Calls + +Modules do not import or invoke each other directly. + +Cross-protocol behavior must be mediated by the core. + +Example: + +```mermaid +sequenceDiagram + participant MCP as MCP module + participant Core as Core platform + participant A2A as A2A module + + MCP->>Core: Invoke tool by catalog entry + Core->>Core: Apply auth, visibility, RBAC, plugin policy + Core->>A2A: Dispatch invoke to owning protocol runtime + A2A-->>Core: Structured result + Core-->>MCP: Structured result +``` + +That preserves language independence and keeps routing policy in one place. + +## Security and Trust Model + +The modular architecture does **not** distribute trust equally. + +### Core-Owned Security Responsibilities + +The core remains the source of truth for: + +- authentication +- token scoping +- RBAC +- secret storage and decryption +- rate limiting policy +- audit and security logging + +Modules may enforce the outcome of a core decision, but they do not become +independent security authorities. + +### What Modules Receive + +Modules should receive: + +- a typed authenticated context +- permission decisions or permission-check APIs +- scoped resource visibility through catalog calls + +Modules should **not** be expected to: + +- interpret raw JWT claims as the source of truth +- fetch and decrypt stored credentials +- invent their own team-scoping semantics + +### Trust Boundary for Sidecars + +Sidecars must communicate with the core over a trusted local or private +channel. The deployment mechanism may vary, but the architectural +requirements are: + +- the core can authenticate the module channel +- arbitrary external clients cannot call privileged core-internal module APIs +- channel permissions or network policy are explicit + +## Cross-Protocol Mediation + +ContextForge already has cross-protocol behaviors: + +- A2A agents exposed as MCP tools +- LLM chat invoking MCP tools +- REST and gRPC services exposed as virtual servers or tools + +In the modular architecture, those behaviors stay possible, but the routing +belongs to the core. + +The core is responsible for: + +- deciding which catalog entry is being invoked +- determining the owning protocol/runtime +- applying policy, plugin rules, and observability +- dispatching to the appropriate module + +The key consequence is that modules remain isolated, while the product keeps a +single coherent governance model. + +## Plugin Model + +Plugins remain a core-owned concern. + +That means: + +- plugin configuration stays centralized +- the core defines which hooks run +- modules must preserve plugin parity on plugin-sensitive flows + +There are two acceptable implementation patterns: + +1. the module explicitly calls a core plugin SPI around the relevant operation +2. the module delegates a plugin-sensitive flow back to the core when parity + requires it + +This keeps plugin behavior consistent even when the fast path moves into a +different language. + +## Deployment Patterns + +The architecture supports three deployment patterns. + +### 1. Monolithic / Embedded + +The core and modules run in one process. + +Use when: + +- minimizing operational complexity +- migrating incrementally +- performance-sensitive in-process execution is justified + +### 2. Hybrid + +Some protocols remain embedded while others move into sidecars. + +This is the current precedent with the Rust MCP runtime: + +- Python remains the core +- MCP may run through a Rust sidecar +- A2A and other runtime paths remain embedded in Python + +### 3. Full Sidecar Model + +Multiple protocol runtimes run as separate processes, possibly in different +languages, while the core remains the shared control plane. + +This is the long-term extensibility model for future A2A, LLM, and REST/gRPC +modules. + +## Configuration Model + +Configuration remains layered and core-owned. + +The architecture should distinguish: + +- **core-global settings** + Shared platform settings such as auth, database, Redis, plugin config, and + observability. +- **module-scoped settings** + Protocol-specific runtime settings such as protocol version behavior, + transport tuning, or runtime-specific timeouts. + +Module-scoped settings should use explicit namespacing and should be delivered +through the module runtime contract rather than by relying on unrestricted +global process imports. + +## Health, Failure, and Fallback + +Every module should define: + +- how it reports readiness and liveness +- how it reports degraded mode +- what happens if the core becomes unavailable +- what happens if the module becomes unavailable +- whether traffic can fall back to an embedded or legacy path + +This is especially important for incremental rollout. + +The current Rust MCP runtime already demonstrates this pattern through +mode-based rollout and rollback. Future modules should preserve the same +operational discipline. + +## Testing and Release Requirements + +Every protocol module should be expected to prove: + +- contract compatibility with the core SPI +- protocol conformance for its protocol surface +- security deny paths +- fallback and rollback behavior +- plugin parity for plugin-sensitive flows +- performance and degradation characteristics appropriate to the protocol + +Where a module introduces deployment-specific behavior, release validation +should also cover: + +- compose or local stack validation +- Kubernetes or Helm validation where applicable +- upgrade and migration compatibility where applicable + +The concrete target-state test matrix is defined in +[Conformance](modular-runtime/conformance.md). + +## Migration Strategy + +The migration is intentionally phased. + +### Phase 0: Extract seams inside the monolith + +Create clearer boundaries inside the current Python code: + +- isolate protocol dispatch +- isolate policy and catalog boundaries +- reduce direct cross-service coupling where practical + +### Phase 1: Define the core SPI + +Define the first stable internal service families between core and modules. + +At this stage, modules may still be embedded. + +### Phase 2: Wrap existing runtimes behind module lifecycles + +Make protocol runtimes conform to a common lifecycle and capability model even +before all traffic crosses an IPC boundary. + +### Phase 3: Move selected runtimes to sidecars + +Use sidecars where the performance, isolation, or language goals justify it. +The current Rust MCP runtime is the first concrete example of this phase. + +### Phase 4: Add new protocol runtimes + +Introduce new A2A, LLM, and REST/gRPC runtimes behind the same architectural +contract. + +### Phase 5: Optimize + +Only after the boundary is stable should the implementation optimize for: + +- direct hot paths +- embedded fast paths +- selective caching and event-stream ownership + +## What Is Decided vs What Is Still Open + +### Decided in principle + +- ContextForge should evolve toward a core-plus-modules runtime model. +- The core remains the policy and control plane. +- Modules are language-agnostic and process-boundary first. +- Shared-nothing between modules is a design rule. +- Incremental migration is the preferred path. + +### Still intentionally open + +- exact final SPI RPC names +- exact protobuf package layout +- exact module descriptor wire schema +- whether all plugin hooks are always explicit SPI calls versus selective core + delegation for parity-sensitive flows +- how much direct data-path optimization a module may keep before it must be + expressed through the generic SPI + +## Open Questions + +- What is the minimal first stable SPI version that supports both MCP and A2A + without overfitting to either? +- Which cross-module events deserve an event bus rather than synchronous + core-mediated routing? +- How should optional protocol surfaces be classified in release gating versus + follow-up compatibility work? +- What is the right balance between generic SPI purity and targeted fast paths + for performance-sensitive runtimes? + +## Related Documents + +- [Modular Runtime Specification](modular-runtime/index.md) +- [Rust MCP Runtime](rust-mcp-runtime.md) +- [ADR-043: Rust MCP Runtime Sidecar with Mode-Based Rollout](adr/043-rust-mcp-runtime-sidecar-mode-model.md) +- [ADR-044: Module Communication Protocol](adr/044-module-communication-protocol.md) +- [ADR-045: Authentication and Authorization Remain in Core](adr/045-auth-remains-in-core.md) +- [ADR-046: Shared-Nothing Between Protocol Modules](adr/046-shared-nothing-between-modules.md) +- [ADR-047: Incremental Migration Over Rewrite](adr/047-incremental-migration-over-rewrite.md) +- [ADR-019: Modular Architecture Split (14 Independent Modules)](adr/019-modular-architecture-split.md) +- [Multitenancy](multitenancy.md) +- [OAuth Design](oauth-design.md) +- [Plugin Framework](plugins.md) diff --git a/docs/docs/architecture/modular-runtime/.pages b/docs/docs/architecture/modular-runtime/.pages new file mode 100644 index 0000000000..f7862155f3 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/.pages @@ -0,0 +1,12 @@ +title: Modular Runtime Specification +nav: + - Overview: index.md + - Core SPI: core-spi.md + - Module Descriptor: module-descriptor.md + - Module Lifecycle: module-lifecycle.md + - Error Model: error-model.md + - Conformance: conformance.md + - MCP Module Profile: mcp-module.md + - A2A Module Profile: a2a-module.md + - LLM Module Profile: llm-module.md + - REST-gRPC Module Profile: rest-grpc-module.md diff --git a/docs/docs/architecture/modular-runtime/a2a-module.md b/docs/docs/architecture/modular-runtime/a2a-module.md new file mode 100644 index 0000000000..7468edee6c --- /dev/null +++ b/docs/docs/architecture/modular-runtime/a2a-module.md @@ -0,0 +1,85 @@ +# A2A Module Profile + +This profile defines how a future A2A module should map onto the current +ContextForge A2A surface. + +## Current Surface + +Today the A2A HTTP surface is embedded in `main.py` and uses the core service +layer directly. + +Current live endpoints include: + +- `GET /a2a` and `GET /a2a/` for list +- `GET /a2a/{agent_id}` for fetch +- `POST /a2a/{agent_name}/invoke` for invoke + +CRUD endpoints also exist today, but they are core-owned admin operations and +should remain there in the modular design. + +The persisted A2A record already contains the fields a module will depend on: + +- endpoint URL +- protocol version +- capability or config metadata +- auth configuration +- team, owner, and visibility +- associated MCP tool linkage where the agent is exposed cross-protocol + +## What the A2A Module Owns + +The module should own: + +- A2A request parsing and response serialization +- protocol-specific discovery and read surface for agents +- invoke envelope construction and normalization +- outbound A2A transport behavior to target agents +- protocol-specific retries, timeouts, and future streaming or push behavior +- task or runtime state handling where A2A requires it +- protocol-specific metrics and runtime health + +## What Stays in Core + +The core should continue to own: + +- agent CRUD and persistence +- auth and token normalization +- RBAC and visibility filtering +- encrypted auth and OAuth secret storage +- ownership and mutation checks +- cross-protocol exposure of A2A agents as MCP tools + +That last point is important: if an A2A agent is exposed as an MCP tool, the +core still mediates that cross-protocol bridge. + +## Required Policy Semantics + +The A2A module must preserve current product semantics: + +- token scoping remains separate from RBAC +- public-only tokens can only see public records +- team visibility uses normalized token team state +- admin bypass semantics come from the core, not from local JWT parsing +- hidden or inaccessible agents may intentionally use not-found semantics +- feature-flagged query-parameter auth remains a core-governed exception, not a + module-defined bypass + +## Required SPI Usage + +At minimum, an A2A module needs: + +- `AuthPolicyService` +- `CatalogService` for agent discovery and invoke +- `PluginService` for any parity-sensitive A2A hooks +- `ObservabilityService` +- `ConfigSecretsService` for module-scoped transport or timeout settings + +## Conformance Additions + +An A2A module should additionally prove: + +- invoke deny paths for wrong team, wrong owner, and public-only tokens +- correct handling of agent visibility modes +- correct propagation of outbound auth without exposing stored secrets +- correct cross-protocol behavior when an A2A agent is invoked through the MCP + tool bridge diff --git a/docs/docs/architecture/modular-runtime/conformance.md b/docs/docs/architecture/modular-runtime/conformance.md new file mode 100644 index 0000000000..a98ab810eb --- /dev/null +++ b/docs/docs/architecture/modular-runtime/conformance.md @@ -0,0 +1,88 @@ +# Conformance + +Every new protocol module should prove the same categories of correctness +before it is considered release-ready. + +## Minimum Required Gates + +### 1. Contract Compatibility + +The module must prove: + +- descriptor compatibility +- lifecycle compatibility +- SPI version negotiation +- correct startup failure on incompatible SPI + +### 2. Security and Policy + +The module must prove: + +- unauthenticated deny paths +- wrong-team deny paths +- insufficient-permission deny paths +- hidden-resource not-found behavior where the product requires it +- trusted-channel behavior for sidecar-only core APIs + +### 3. Protocol Correctness + +The module must prove: + +- required protocol surfaces +- capability negotiation +- correct optional-surface behavior +- stable request and response shapes +- structured error handling + +### 4. Plugin Parity + +The module must prove plugin-sensitive flows still behave correctly. + +For a protocol module, this means: + +- explicitly exercising active plugins, not only plugin-disabled stacks +- proving both the normal path and the parity-sensitive path +- documenting any remaining delegated or unsupported hooks + +### 5. Rollback and Degradation + +The module must prove: + +- health reporting +- degraded-state reporting +- rollback or fallback path +- safe failure when the core is unavailable + +### 6. Performance + +The module must prove: + +- no unacceptable regression on the intended hot paths +- no correctness failures under representative load +- no hidden bypass of policy or plugin behavior in the fast path + +## Environment Matrix + +At minimum, release validation should cover: + +- local or compose deployment +- the intended production deployment mode +- upgrade and migration behavior where the module changes deployment structure + +If the release story includes Helm or Kubernetes, that must be validated too. + +## Suggested Evidence + +- focused unit tests +- live stack-backed E2E tests +- plugin parity tests +- protocol compliance suite where one exists +- benchmark or load sanity checks + +## Protocol Profiles + +Each protocol profile adds its own required checks: + +- [A2A Module Profile](a2a-module.md) +- [LLM Module Profile](llm-module.md) +- [REST/gRPC Module Profile](rest-grpc-module.md) diff --git a/docs/docs/architecture/modular-runtime/core-spi.md b/docs/docs/architecture/modular-runtime/core-spi.md new file mode 100644 index 0000000000..384daed937 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/core-spi.md @@ -0,0 +1,291 @@ +# Core SPI + +This document defines the target-state service families between the core +platform and a protocol module. + +The preferred transport is gRPC over UDS. During transition, HTTP/JSON over a +trusted local channel is also acceptable. The information model below is what +matters. + +## Design Rules + +- Modules do not read the database directly as their primary source of truth. +- Modules do not interpret raw JWT claims as authoritative policy. +- Modules do not fetch or decrypt stored credentials directly. +- Modules call the core for policy-sensitive catalog access. +- Modules may keep protocol-local caches, but cache invalidation still follows + core-owned rules. + +## Service Families + +### Suggested First-Cut IDL Package Layout + +The architecture does not freeze final generated package names, but a new +module should assume a layout equivalent to: + +- `core.spi.auth.v1` +- `core.spi.catalog.v1` +- `core.spi.plugin.v1` +- `core.spi.session.v1` +- `core.spi.config.v1` +- `core.spi.observability.v1` + +This is a useful planning baseline for a Rust A2A module or a Go LLM proxy +module even if the exact final packages evolve. + +### AuthPolicyService + +Provides authenticated context and permission decisions. + +Required operations: + +- `ResolveCaller` +- `CheckPermission` +- `CheckCatalogAccess` +- `ValidateSessionBinding` + +Illustrative request and response shapes: + +```json +{ + "resolveCallerRequest": { + "transport": "streamable_http", + "headers": {"authorization": "Bearer ..."}, + "clientIp": "203.0.113.10", + "requestedServerId": "server-123" + } +} +``` + +```json +{ + "authenticatedContext": { + "subject": { + "userEmail": "alice@example.com", + "isAdmin": false, + "tokenTeams": ["team-alpha"] + }, + "visibilityScope": { + "serverId": "server-123", + "permissions": ["tools.read", "resources.read"], + "ipRestrictions": [], + "timeRestrictions": {} + }, + "trace": { + "requestId": "req-123", + "correlationId": "corr-456" + } + } +} +``` + +The important invariant is semantic, not syntactic: + +- `tokenTeams = null` and `isAdmin = true` means unrestricted admin context +- `tokenTeams = []` means public-only visibility +- team membership and visibility must use the same normalization as the core + +### CatalogService + +Provides policy-aware access to core-owned records. + +Required operation classes: + +- list records +- fetch one record +- invoke a record-backed action +- subscribe to record-change streams where the protocol needs them + +Representative entity families: + +- tools +- resources +- prompts +- agents +- servers +- gateways +- LLM providers and models +- roots + +Illustrative list request: + +```json +{ + "listCatalogRequest": { + "entityType": "prompt", + "serverId": "server-123", + "authenticatedContextRef": "ctx-abc", + "filters": { + "activeOnly": true + } + } +} +``` + +Illustrative invoke request: + +```json +{ + "invokeCatalogRequest": { + "entityType": "tool", + "entityId": "tool-123", + "serverId": "server-123", + "authenticatedContextRef": "ctx-abc", + "arguments": { + "timezone": "UTC" + } + } +} +``` + +### PluginService + +Modules must preserve plugin parity on plugin-sensitive flows. + +Two allowed patterns: + +1. explicit hook execution through the SPI +2. full delegation of a parity-sensitive flow back to the core + +Required hook classes: + +- pre-fetch +- post-fetch +- pre-invoke +- post-invoke +- request or response mutation where the product already supports it + +Illustrative hook call: + +```json +{ + "executeHookRequest": { + "hook": "resource_post_fetch", + "entityType": "resource", + "entityName": "time://formats", + "serverId": "server-123", + "authenticatedContextRef": "ctx-abc", + "payload": { + "contents": [{"uri": "time://formats", "mimeType": "text/plain", "text": "UTC"}] + } + } +} +``` + +### SessionEventService + +Needed for protocols with shared session or task state. + +Required when the protocol needs: + +- stable session ownership +- replay or resume +- distributed event history +- ownership validation across workers + +Representative operations: + +- `CreateSession` +- `GetSession` +- `ValidateSessionOwner` +- `AppendEvent` +- `ReplayEvents` +- `DeleteSession` + +### ConfigSecretsService + +Provides module-scoped configuration and secret references. + +Required capabilities: + +- get module config +- resolve core-managed feature flags +- resolve secret references to a usable form without exposing unrelated secrets + +Modules should receive only the settings they need, not a whole-process +configuration dump. + +### ObservabilityService + +Provides structured platform integration. + +Required capabilities: + +- emit structured logs +- publish counters and histograms +- attach trace context +- emit audit events + +Modules may expose protocol-local metrics, but the core remains the system of +record for shared operational visibility. + +## Minimum Service-Family Matrix + +| Module family | Required service families | Usually optional | +|---------------|---------------------------|------------------| +| MCP | AuthPolicy, Catalog, Plugin, SessionEvent, Observability, ConfigSecrets | Additional module-local optimizations | +| A2A | AuthPolicy, Catalog, Observability, ConfigSecrets | Plugin, SessionEvent | +| LLM | AuthPolicy, Catalog, Observability, ConfigSecrets | Plugin, SessionEvent | +| REST/gRPC | AuthPolicy, Catalog, Observability, ConfigSecrets | Plugin, SessionEvent | + +## Authenticated Context + +Every SPI call that depends on caller identity should carry either: + +- a full authenticated context +- a short-lived authenticated-context reference issued by the core + +The minimum fields are: + +- user identity +- admin status +- normalized token team state +- effective server scope if one is already known +- permission or scope restrictions attached to the token +- request and trace correlation identifiers + +## Invocation Envelope + +All invoke-style operations should preserve the same conceptual envelope: + +- target record +- effective server or gateway context +- authenticated context reference +- input arguments +- trace metadata +- optional delegation hints + +That allows a Rust A2A module and a Go LLM module to call the same core +services without inventing a protocol-specific policy seam. + +## Typical Flow + +```mermaid +sequenceDiagram + participant Client + participant Module + participant Core + participant Upstream + + Client->>Module: protocol request + Module->>Core: ResolveCaller + Core-->>Module: AuthenticatedContext + Module->>Core: CheckCatalogAccess / InvokeCatalogAction + Core-->>Module: Policy-approved target and action result + Module->>Upstream: protocol-specific outbound call if needed + Upstream-->>Module: upstream result + Module->>Core: ExecuteHook or emit audit event + Module-->>Client: protocol response +``` + +## Versioning + +The SPI must be explicitly versioned. + +Rules: + +- the core declares supported SPI versions +- the module declares supported SPI versions +- incompatible versions fail at startup +- additive changes are preferred +- protocol capability negotiation is separate from SPI version negotiation diff --git a/docs/docs/architecture/modular-runtime/error-model.md b/docs/docs/architecture/modular-runtime/error-model.md new file mode 100644 index 0000000000..fac25fa23b --- /dev/null +++ b/docs/docs/architecture/modular-runtime/error-model.md @@ -0,0 +1,78 @@ +# Error Model + +Modules in different languages must still present a coherent failure model to +the core and to clients. + +## Canonical Error Categories + +| Category | Meaning | +|----------|---------| +| `INVALID_ARGUMENT` | Client supplied malformed or semantically invalid input | +| `UNAUTHENTICATED` | Caller identity could not be established | +| `PERMISSION_DENIED` | Caller is authenticated but not allowed | +| `NOT_FOUND` | Resource intentionally absent or hidden | +| `CONFLICT` | Request conflicts with current state | +| `FAILED_PRECONDITION` | State is valid but not ready for this operation | +| `RATE_LIMITED` | Policy denied due to rate or quota | +| `UNSUPPORTED` | Optional feature or method is not supported | +| `UNAVAILABLE` | Core, module, or upstream dependency is temporarily unavailable | +| `UPSTREAM_ERROR` | Upstream protocol peer failed | +| `INTERNAL` | Unexpected internal failure | + +## Required Error Envelope + +Every structured module error should include: + +- canonical category +- stable machine-readable code +- human-readable message safe for clients +- origin + - `module` + - `core` + - `plugin` + - `upstream` +- retryability hint +- trace or correlation id when available + +Example: + +```json +{ + "error": { + "category": "PERMISSION_DENIED", + "code": "a2a.invoke.denied", + "message": "Access denied", + "origin": "core", + "retryable": false, + "traceId": "trace-123" + } +} +``` + +## Safety Rules + +- Do not expose stack traces to clients. +- Do not expose raw `err.to_string()` data from internal libraries on public + paths. +- Hide existence where the current product intentionally uses not-found + semantics for protected records. +- Preserve protocol-correct error mapping where the protocol defines it. + +## Mapping Guidance + +| Canonical category | HTTP | gRPC | +|-------------------|------|------| +| `INVALID_ARGUMENT` | `400` | `INVALID_ARGUMENT` | +| `UNAUTHENTICATED` | `401` | `UNAUTHENTICATED` | +| `PERMISSION_DENIED` | `403` | `PERMISSION_DENIED` | +| `NOT_FOUND` | `404` | `NOT_FOUND` | +| `CONFLICT` | `409` | `ALREADY_EXISTS` or `ABORTED` | +| `FAILED_PRECONDITION` | `412` or `400` | `FAILED_PRECONDITION` | +| `RATE_LIMITED` | `429` | `RESOURCE_EXHAUSTED` | +| `UNSUPPORTED` | `400`, `404`, or protocol-specific unsupported response | `UNIMPLEMENTED` | +| `UNAVAILABLE` | `503` | `UNAVAILABLE` | +| `UPSTREAM_ERROR` | `502` | `UNKNOWN` or mapped upstream status | +| `INTERNAL` | `500` | `INTERNAL` | + +Protocol-specific documents may add more precise mappings, but they must not +break these semantics. diff --git a/docs/docs/architecture/modular-runtime/index.md b/docs/docs/architecture/modular-runtime/index.md new file mode 100644 index 0000000000..c6f5c034b7 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/index.md @@ -0,0 +1,66 @@ +# Modular Runtime Specification + +This section turns the [Modular Runtime Architecture](../modular-design.md) +into an implementation-oriented specification that another team can use to +build a protocol module in Rust, Go, or Python. + +The intent is not to freeze every future protobuf field today. The intent is +to define the minimum contract surface clearly enough that: + +- the existing Rust MCP module can be understood as the reference implementation +- a Rust A2A module can be implemented without re-designing the trust model +- a Go LLM proxy module can be implemented without guessing where policy lives +- a REST or gRPC module can be implemented without inventing a different + lifecycle or error model + +## Reading Order + +1. [Core SPI](core-spi.md) +2. [Module Descriptor](module-descriptor.md) +3. [Module Lifecycle](module-lifecycle.md) +4. [Error Model](error-model.md) +5. [Conformance](conformance.md) +6. the protocol profile that matches the module being implemented + - [MCP Module Profile](mcp-module.md) + - [A2A Module Profile](a2a-module.md) + - [LLM Module Profile](llm-module.md) + - [REST/gRPC Module Profile](rest-grpc-module.md) + +## Contract Status + +This spec set is **normative at the architecture level**: + +- the boundaries are intended +- the ownership rules are intended +- the lifecycle and required fields are intended + +It is **illustrative at the wire-schema level**: + +- example JSON and proto-like structures define the required information model +- final generated bindings can still evolve as long as they preserve those + semantics + +## Implemented Precedent + +The first implemented precedent is the +[Rust MCP Runtime](../rust-mcp-runtime.md). + +That precedent proves: + +- a protocol runtime can move out of Python +- a module can own direct public ingress +- the core can remain authoritative for auth, token scoping, and RBAC +- plugin parity and rollback can still be preserved + +It does **not** mean that every future module must copy the exact current Rust +MCP seam. The target-state contracts in this section are slightly more generic +than the current implementation. + +## Status by Protocol Family + +| Protocol family | Status | +|-----------------|--------| +| MCP | Implemented as a Rust sidecar/runtime path | +| A2A | Planned against this spec | +| LLM | Planned against this spec | +| REST/gRPC | Planned against this spec | diff --git a/docs/docs/architecture/modular-runtime/llm-module.md b/docs/docs/architecture/modular-runtime/llm-module.md new file mode 100644 index 0000000000..e0e34ce87c --- /dev/null +++ b/docs/docs/architecture/modular-runtime/llm-module.md @@ -0,0 +1,82 @@ +# LLM Module Profile + +This profile defines how a future LLM proxy or chat module should map onto the +current ContextForge LLM surfaces. + +## Current Surface + +Today there are two main LLM-facing surfaces: + +- **OpenAI-compatible proxy** + - `POST /chat/completions` + - `GET /models` +- **Session-oriented chat** + - `/llmchat/connect` + - `/llmchat/chat` + - `/llmchat/disconnect` + - `/llmchat/status/{user_id}` + - `/llmchat/config/{user_id}` + - `/llmchat/gateway/models` + +Those surfaces are currently implemented through Python routers and services. + +The important split is that ContextForge already has both: + +- a direct OpenAI-compatible proxy surface +- a higher-level chat surface that coordinates models, servers, and session + state + +## What the LLM Module Owns + +A future LLM module should own: + +- request parsing for OpenAI-compatible and session-style chat surfaces +- streaming transport behavior +- provider relay runtime behavior +- chat-session orchestration and protocol-local session state +- provider-specific retries, deadlines, and streaming normalization +- protocol-local metrics and runtime health + +## What Stays in Core + +The core should continue to own: + +- provider and model registry CRUD +- provider credentials and secret handling +- auth, RBAC, and token-scope policy +- model visibility and governance +- prompt, tool, and resource catalogs +- plugin policy +- admin UI and provider-management workflows +- any shared governance around which virtual servers or model records are + exposed to which callers + +## Required SPI Usage + +A future LLM module will typically require: + +- `AuthPolicyService` +- `CatalogService` for model lookup and MCP-facing resource access +- `PluginService` where chat or provider flows become plugin-sensitive +- `ObservabilityService` +- `ConfigSecretsService` +- `SessionEventService` if shared chat-session semantics are extracted + +## Cross-Protocol Constraint + +If the LLM module can call MCP tools or prompts, it must not call another +module directly. The core still decides: + +- what catalog entry is being invoked +- what protocol owns it +- what policy and plugin rules apply + +## Conformance Additions + +An LLM module should additionally prove: + +- non-streaming and streaming parity +- model visibility and deny paths +- provider-auth failure handling without leaking sensitive details +- correct cross-protocol invocation when chat flows reach MCP-backed tools or + prompts diff --git a/docs/docs/architecture/modular-runtime/mcp-module.md b/docs/docs/architecture/modular-runtime/mcp-module.md new file mode 100644 index 0000000000..8017b09eb2 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/mcp-module.md @@ -0,0 +1,85 @@ +# MCP Module Profile + +This profile maps the implemented Rust MCP module onto the modular runtime +specification and defines what future MCP implementations should preserve. + +## Current Status + +MCP is the first implemented protocol module in ContextForge. + +The current implementation is the +[Rust MCP Runtime](../rust-mcp-runtime.md), which already proves: + +- sidecar deployment +- direct public ingress in `edge` and `full` mode +- core-owned auth, token scoping, and RBAC +- plugin parity on validated flows +- rollback through mode-based rollout + +## What the MCP Module Owns + +The MCP module owns: + +- MCP wire parsing and serialization +- transport behavior for streamable HTTP and related protocol edge behavior +- session lifecycle and capability negotiation +- replay, resume, and live-stream runtime behavior where enabled +- protocol-specific upstream MCP client behavior +- protocol-specific health and stats + +## What Stays in Core + +The core continues to own: + +- authentication and token normalization +- RBAC and visibility filtering +- core-owned catalogs for tools, resources, prompts, servers, and gateways +- plugin configuration and policy +- admin UI and platform observability +- cross-protocol mediation + +## Current Implemented Seam vs Target-State Seam + +The current Rust MCP module is the reference implementation, but it is still a +transition architecture in one important respect: + +- implemented today: + - trusted internal HTTP over UDS or loopback on some seams +- target-state default: + - gRPC over UDS for the core SPI + +That difference is acceptable. The current module proves the boundary; the +target-state SPI documents how future modules should converge. + +## Required Invariants + +Any MCP module, including the existing Rust one, must preserve: + +- core-owned auth and RBAC authority +- session ownership and isolation +- plugin parity on plugin-sensitive flows +- rollback and degraded-mode safety +- no direct module-to-module calls for cross-protocol behavior + +## Required SPI Usage + +An MCP module requires: + +- `AuthPolicyService` +- `CatalogService` +- `PluginService` +- `SessionEventService` +- `ObservabilityService` +- `ConfigSecretsService` + +## Release Expectations + +An MCP module should be held to the strongest conformance bar because it is the +first extracted runtime and the precedent for later modules. + +That means: + +- live stack-backed protocol tests +- deny-path and isolation tests +- plugin parity tests +- benchmark validation on the intended hot paths diff --git a/docs/docs/architecture/modular-runtime/module-descriptor.md b/docs/docs/architecture/modular-runtime/module-descriptor.md new file mode 100644 index 0000000000..b8bcb40472 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/module-descriptor.md @@ -0,0 +1,110 @@ +# Module Descriptor + +Every protocol module must expose a stable descriptor that the core can use for +discovery, compatibility checks, health integration, and release validation. + +## Required Fields + +| Field | Meaning | +|-------|---------| +| `moduleId` | Stable module identifier, for example `mcp-rust-runtime` | +| `protocolFamily` | One of `mcp`, `a2a`, `llm`, `rest-grpc`, or a future family | +| `implementationLanguage` | `python`, `rust`, `go`, or another language identifier | +| `moduleVersion` | Module build or release version | +| `spiVersions` | Supported core SPI versions | +| `runtimeModes` | Supported runtime modes such as `embedded`, `sidecar` | +| `ingressModes` | Whether the module can run behind core routing, direct public ingress, or both | +| `capabilities` | Declared protocol and runtime capabilities | +| `health` | How health and readiness are queried | +| `stats` | Optional runtime metrics surface | +| `pluginParity` | Which plugin-sensitive flows are fully supported, delegated, or not yet supported | +| `fallbackStrategy` | Whether rollback to legacy or embedded path exists | + +## Example Descriptor + +```json +{ + "moduleId": "a2a-rust-runtime", + "protocolFamily": "a2a", + "implementationLanguage": "rust", + "moduleVersion": "0.1.0", + "spiVersions": ["v1alpha1"], + "runtimeModes": ["sidecar"], + "ingressModes": ["core-routed"], + "capabilities": { + "discovery": true, + "invoke": true, + "taskState": true, + "streaming": false, + "pushNotifications": false + }, + "health": { + "readiness": "grpc", + "liveness": "grpc" + }, + "pluginParity": { + "preInvoke": "delegate", + "postInvoke": "delegate" + }, + "fallbackStrategy": { + "supportsRollback": true, + "fallbackPath": "python-core" + } +} +``` + +Second illustrative example for a Go LLM proxy module: + +```json +{ + "moduleId": "llm-go-proxy", + "protocolFamily": "llm", + "implementationLanguage": "go", + "moduleVersion": "0.1.0", + "spiVersions": ["v1alpha1"], + "runtimeModes": ["sidecar"], + "ingressModes": ["core-routed"], + "capabilities": { + "chatCompletions": true, + "streaming": true, + "sessionChat": true, + "providerRelay": true + } +} +``` + +## Capability Taxonomy + +Capabilities should be declarative, not inferred from language or module name. + +Recommended categories: + +- ingress +- transport +- request or response streaming +- session or task state +- replay or resume +- subscriptions +- prompt rendering +- resource reads +- tool or agent invocation +- provider relay +- plugin parity support + +## Descriptor Rules + +- The descriptor must be available before live traffic. +- Capabilities must be honest. Unsupported optional protocol surfaces must be + declared as unsupported, not silently dropped. +- The descriptor must be sufficient for the core to decide: + - whether the module can be started + - whether a given deployment mode is valid + - whether the module satisfies release policy for the protocol family + +## Protocol-Specific Notes + +- A Rust A2A module should declare task-state and invoke support explicitly. +- A Go LLM proxy module should declare both chat-completion and streaming + support explicitly. +- A REST or gRPC module should declare whether it owns reflection, OpenAPI + import, or only invocation relay. diff --git a/docs/docs/architecture/modular-runtime/module-lifecycle.md b/docs/docs/architecture/modular-runtime/module-lifecycle.md new file mode 100644 index 0000000000..0419239bb5 --- /dev/null +++ b/docs/docs/architecture/modular-runtime/module-lifecycle.md @@ -0,0 +1,87 @@ +# Module Lifecycle + +Every module must support the same lifecycle phases, regardless of language. + +## Lifecycle Phases + +1. **register** + The core discovers the module and reads its descriptor. +2. **initialize** + The core provides module-scoped configuration, trust material, and SPI + connection information. +3. **ready** + The module is healthy and can accept live traffic. +4. **drain** + The module stops accepting new work and lets in-flight work complete. +5. **shutdown** + The module releases resources and exits cleanly. + +## Lifecycle Sequence + +```mermaid +sequenceDiagram + participant Core + participant Module + + Core->>Module: register / descriptor query + Module-->>Core: descriptor + Core->>Module: initialize(config, spi, trust) + Module-->>Core: initialized + Core->>Module: readiness probe + Module-->>Core: ready + Note over Core,Module: live traffic + Core->>Module: drain + Module-->>Core: draining + Core->>Module: shutdown + Module-->>Core: stopped +``` + +## Initialize Payload + +The initialize payload should include: + +- module-scoped configuration +- supported SPI version selected by the core +- how to reach the core SPI +- trace and observability configuration +- any trusted channel or module authentication material +- deployment mode +- explicit fallback expectations + +It should not include unrestricted access to the core configuration model. + +## Readiness Semantics + +A module is not ready merely because the process is listening. + +Ready means: + +- descriptor is loaded +- configuration was accepted +- SPI compatibility was established +- any required warmup completed +- the module can enforce required deny paths safely + +## Drain Semantics + +Drain should: + +- reject new live traffic +- keep ownership checks and in-flight responses correct +- preserve resumable or replayable state where the protocol requires it +- expose a clear draining status to the core + +## Failure Rules + +- If SPI compatibility fails, startup must fail before traffic. +- If the core becomes unavailable, the module must report degraded state. +- If the module cannot preserve protocol correctness safely, it must fail + closed or trigger rollback rather than continue in a partial-trust mode. + +## Rollback Requirement + +Every production-facing extracted module should document: + +- whether rollback to legacy or embedded path exists +- how rollback is triggered +- what state must be preserved during rollback diff --git a/docs/docs/architecture/modular-runtime/rest-grpc-module.md b/docs/docs/architecture/modular-runtime/rest-grpc-module.md new file mode 100644 index 0000000000..2d833428ff --- /dev/null +++ b/docs/docs/architecture/modular-runtime/rest-grpc-module.md @@ -0,0 +1,63 @@ +# REST/gRPC Module Profile + +This profile defines how a future REST or gRPC module should map onto the +current virtualized service surfaces. + +## Current Surface + +Today ContextForge can: + +- expose REST-backed tools and virtual servers +- register and manage gRPC services +- use OpenAPI import or reflection-style discovery to create gateway-managed + records + +This is still largely core-owned today. + +That means a future REST/gRPC module is more likely to be a runtime extraction +than a greenfield subsystem. The registration and governance model already +exists in the core. + +## What the REST/gRPC Module Owns + +A future REST/gRPC module should own: + +- protocol-specific outbound transport behavior +- reflection or discovery runtime behavior where enabled +- protocol-specific request and response normalization +- streaming semantics where the underlying protocol supports them +- runtime metrics, health, and deadlines + +## What Stays in Core + +The core should continue to own: + +- service registration and persistence +- visibility, ownership, and governance policy +- generated tool, prompt, or resource catalog records +- auth, RBAC, and token scoping +- secret storage and TLS material governance +- cross-protocol exposure into MCP or other front-door protocols + +In other words, the module owns protocol behavior, not the registry of record. + +## Required SPI Usage + +At minimum, a REST/gRPC module will usually require: + +- `AuthPolicyService` +- `CatalogService` +- `ObservabilityService` +- `ConfigSecretsService` +- optionally `PluginService` if response mutation or policy hooks are required + +## Conformance Additions + +A REST/gRPC module should additionally prove: + +- SSRF and target-validation rules remain enforced +- TLS and metadata handling preserve the current trust model +- reflection or OpenAPI-derived surfaces do not bypass core visibility or + ownership rules +- virtualized service behavior remains consistent when surfaced through MCP or + another protocol diff --git a/docs/docs/architecture/performance-architecture.md b/docs/docs/architecture/performance-architecture.md index 01f2ef2e77..684dd5a7d4 100644 --- a/docs/docs/architecture/performance-architecture.md +++ b/docs/docs/architecture/performance-architecture.md @@ -169,77 +169,63 @@ This diagram showcases the performance-optimized architecture of ContextForge, h └─────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## MCP Streamable HTTP Request Path +## MCP Streamable HTTP Request Paths -Every MCP request to `/servers/{server_id}/mcp` passes through these layers: +ContextForge now has two materially different MCP request paths, depending on +the Rust runtime mode. +### Mode summary + +| Mode | Public `/mcp` ingress | Session/runtime ownership | +|------|------------------------|---------------------------| +| `off` | Python | Python | +| `shadow` | Python | Python (Rust sidecar present internally only) | +| `edge` | Rust | Mixed: Rust ingress, Python still backs more MCP internals | +| `full` | Rust | Rust ingress plus Rust session/event/resume/live-stream/affinity cores | + +### Python-owned public path (`off`, `shadow`) + +```text +Client Request + -> NGINX + -> Python gateway middleware/auth/token scoping + -> Python MCP session manager + handlers + -> upstream MCP server ``` -Client Request (JSON-RPC over HTTP POST) - │ - ▼ -┌─────────────────────────────────────────────┐ -│ NGINX (Edge/Proxy) │ -│ • least_conn load balancing │ -│ • keepalive 512 per worker │ -│ • No caching for /mcp (POST requests) │ -└─────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────┐ -│ GATEWAY MIDDLEWARE STACK │ -│ 1. SecurityHeaders, CORS │ -│ 2. MCPPathRewrite + Auth │ -│ • JWT verification (HMAC) │ -│ • Token revocation check (DB/cache) │ -│ • User lookup (DB/cache) │ -│ • Team resolution (DB/cache) │ -│ 3. Token scoping (Layer 1 auth) │ -│ 4. Request logging │ -└─────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────┐ -│ MCP SDK SessionManager │ -│ • JSON-RPC envelope parsing │ -│ • Session tracking (stateless by default) │ -│ • Context variable propagation │ -│ • Handler method routing │ -└─────────────────────────────────────────────┘ - │ - ├── tools/list ─────┐ - ├── tools/call ─────┤ - ├── resources/list ─┤ - ├── prompts/list ───┤ - └── ping ───────────┘ - │ - ▼ -┌─────────────────────────────────────────────┐ -│ MCP HANDLER │ -│ • RBAC permission check (Layer 2 auth) │ -│ • Server/tool lookup (DB query) │ -│ • For tools/call: upstream proxy │ -│ via MCP Session Pool (if enabled) │ -└─────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────┐ -│ UPSTREAM MCP SERVER │ -│ (fast_test_server, fast_time, plugins) │ -│ • Executes tool logic │ -│ • Returns JSON-RPC result │ -└─────────────────────────────────────────────┘ + +### Rust-owned public path (`edge`, `full`) + +```text +Client Request + -> NGINX + -> Rust public MCP listener + -> trusted Python auth endpoint + -> Rust MCP routing/session/runtime logic + -> upstream MCP server or narrow Python internal route ``` +Important current behavior: + +- Python remains authoritative for JWT auth, token scoping, and RBAC in all + modes. +- `edge|full` remove the old public Python ingress hop by routing nginx + directly to Rust. +- `full` also moves MCP session, event-store, resume, live-stream, and + affinity/owner-worker logic into Rust. +- `shadow` is the safety-first fallback mode: the Rust sidecar is running, but + public `/mcp` stays mounted on Python. + ### Performance Characteristics by Layer | Layer | Typical Latency | Scaling Bottleneck | Key Tunable | |-------|----------------|-------------------|-------------| | nginx | <1ms | Not a bottleneck | `keepalive`, `worker_connections` | -| Middleware + Auth | 5-15ms | Auth DB queries | `AUTH_CACHE_*_TTL`, `AUTH_CACHE_BATCH_QUERIES` | -| MCP SDK SessionManager | 2-5ms | JSON-RPC parsing, context vars | `JSON_RESPONSE_ENABLED` | +| Python auth/control path | 5-15ms | Auth DB/cache queries | `AUTH_CACHE_*`, `AUTH_CACHE_BATCH_QUERIES` | +| Rust public ingress (`edge`, `full`) | low single-digit ms | Syscall/network overhead | keepalive, upstream reuse, request shaping | +| Python MCP session manager (`off`, `shadow`) | 2-5ms | JSON-RPC parsing, context vars | `JSON_RESPONSE_ENABLED` | | RBAC check | 1-5ms | Permission DB queries | Role cache TTL (5 min internal) | -| tools/list (DB) | 5-10ms | Sequential table scans | `REGISTRY_CACHE_TOOLS_TTL` | -| tools/call (upstream) | 10-200ms | Upstream server + network | `MCP_SESSION_POOL_ENABLED` | +| tools/list / resources / prompts | 5-10ms | DB and compatibility paths | cache TTLs, Rust specialized handlers | +| tools/call (upstream) | 10-200ms | Upstream server + network | upstream session reuse, direct execution, RMCP client reuse | ### Feature Flags and Middleware Overhead @@ -249,12 +235,21 @@ The most impactful features to disable when not needed are: admin UI, A2A protoc ### Key Architectural Insight -The `/rpc` endpoint and the `/servers/{id}/mcp` endpoint serve the same logical operations (tools/list, tools/call) but follow different code paths: +The important transport distinction is no longer only `/rpc` versus `/mcp`. +It is now also **Python-owned MCP** versus **Rust-owned public MCP ingress**: + +- **`/rpc`** still benefits heavily from Redis-backed caches and does not follow + the streamable HTTP MCP session path. +- **Python MCP (`off`, `shadow`)** still pays the full Python middleware, + session-manager, and handler cost on the public path. +- **Rust MCP (`edge`, `full`)** removes the public Python ingress hop and moves + progressively more MCP session/runtime work to Rust, but Python auth/RBAC + remains part of the control plane. -- **`/rpc`**: Uses Redis-backed caching (registry cache, tool lookup cache) for most lookups. Under load, Redis handles the read pressure, keeping PgBouncer/PostgreSQL near idle. -- **`/mcp`**: Routes through the MCP SDK session manager, which executes its own handler functions. These handlers query the database via SQLAlchemy for server resolution, tool lookup, and RBAC checks. The auth cache (Redis-backed, TTL up to 300s) mitigates some of this, but RBAC and server/tool lookups still hit the database. +This means that scaling MCP throughput now depends on two different concerns: -This means that scaling MCP throughput depends heavily on reducing per-request database queries in the MCP transport handlers. +1. shrinking Python auth/control work that still happens for Rust MCP traffic +2. minimizing per-request transport and upstream costs on the Rust side --- diff --git a/docs/docs/architecture/rust-mcp-runtime.md b/docs/docs/architecture/rust-mcp-runtime.md new file mode 100644 index 0000000000..615cd61350 --- /dev/null +++ b/docs/docs/architecture/rust-mcp-runtime.md @@ -0,0 +1,177 @@ +# Rust MCP Runtime + +The Rust MCP runtime is an optional sidecar/runtime path for ContextForge's +streamable HTTP MCP traffic. It is designed to move the public MCP hot path out +of Python incrementally while keeping Python authoritative for authentication, +token scoping, and RBAC. + +It is also the first concrete precedent for the broader +[Modular Runtime Architecture](modular-design.md): a protocol-specific runtime +that can move out of the Python process while the core platform remains the +shared policy and control plane. The generalized implementor-facing contract +for future modules is documented in the +[Modular Runtime Specification](modular-runtime/index.md). + +This page describes the current architecture and the supported rollout modes. + +## Mode Model + +The user-facing control is `RUST_MCP_MODE`: + +| Mode | Public `/mcp` ingress | Rust session/event/resume/live-stream cores | Intended use | +|------|------------------------|--------------------------------------------|--------------| +| `off` | Python | No | Baseline Python MCP path | +| `shadow` | Python | No public Rust ownership | Safety-first rollback/comparison mode with Rust sidecar present | +| `edge` | Rust | No | Direct public Rust ingress with Python still backing more MCP internals | +| `full` | Rust | Yes | Fastest public Rust path with Rust-owned MCP session/runtime cores | + +Use the testing stack wrappers to bring these up locally: + +```bash +make testing-rebuild-rust-shadow +make testing-rebuild-rust +make testing-rebuild-rust-full +``` + +## Request Flows + +### `off` and `shadow` + +In `off` and `shadow`, the public MCP path remains Python-owned: + +```text +client + -> nginx + -> Python gateway transport/auth/token scoping/RBAC + -> Python MCP handlers + -> upstream MCP server +``` + +`shadow` differs from `off` only in that the Rust sidecar is present and can be +used for internal validation and comparison; it does not own the public MCP +transport. + +### `edge` and `full` + +In `edge` and `full`, nginx routes public `GET/POST/DELETE /mcp` directly to +the Rust runtime: + +```text +client + -> nginx + -> Rust public listener + -> trusted Python auth endpoint (internal) + -> Rust MCP routing/execution/session logic + -> upstream MCP server or narrow Python internal endpoint +``` + +Important details: + +- Direct public Rust ingress is enabled by the dedicated public listener set up + from `RUST_MCP_MODE=edge|full`. +- Rust authenticates public traffic through the trusted Python internal endpoint + `POST /_internal/mcp/authenticate`. +- Rust strips forwarded/proxy-chain headers on the trusted Rust -> Python hop so + Python evaluates the request as an internal runtime dispatch rather than as an + external client IP. + +## Responsibility Split + +The current split is intentionally conservative: + +| Concern | Python | Rust | +|---------|--------|------| +| JWT authentication | Yes | Via trusted internal Python auth | +| Token scoping / team visibility | Yes | Consumes authenticated context | +| RBAC | Yes | Enforces Python-authenticated result | +| Public MCP HTTP edge | `off`, `shadow` | `edge`, `full` | +| Session registry | Python in `off`, `shadow` | Rust in `full` | +| Event store / replay / resume | Python in `off`, `shadow`, `edge` | Rust in `full` | +| Live `GET /mcp` SSE edge | Python in `off`, `shadow`, `edge` | Rust in `full` | +| Affinity / owner-worker forwarding | Python in `off`, `shadow`, `edge` | Rust in `full` | +| Direct `tools/call` execution | Python fallback still exists | Rust hot path when eligible | + +The important architectural point is that Rust does not currently replace the +full security model. Python remains the authority for auth and RBAC while Rust +owns progressively more of the public MCP transport and session/runtime work. + +## Session/Auth Reuse Model + +To reduce repeated auth overhead on session-bound MCP traffic, Rust can reuse +authenticated context for an established MCP session. This is not a global +per-user cache. It is bound to the MCP session and validated against the +original authenticated context. + +Key invariants: + +- a session belongs to exactly one authenticated caller context +- a different caller cannot reuse the same `mcp-session-id` +- a changed auth binding on the same session is denied rather than reused +- replay/resume and delete operations preserve the same ownership checks + +This model is validated by the dedicated isolation suite: + +```bash +make test-mcp-session-isolation +``` + +See the detailed threat model and test matrix in +`tools_rust/mcp_runtime/TESTING-DESIGN.md` in the repository. + +## Verification + +After bringing up the stack, verify the active mode through `/health`: + +```bash +curl -sD - http://localhost:8080/health -o /dev/null | rg 'x-contextforge-mcp-' +``` + +Representative full-Rust headers: + +```text +x-contextforge-mcp-runtime-mode: rust-managed +x-contextforge-mcp-transport-mounted: rust +x-contextforge-mcp-session-core-mode: rust +x-contextforge-mcp-event-store-mode: rust +x-contextforge-mcp-resume-core-mode: rust +x-contextforge-mcp-live-stream-core-mode: rust +x-contextforge-mcp-affinity-core-mode: rust +x-contextforge-mcp-session-auth-reuse-mode: rust +``` + +Representative shadow-mode headers: + +```text +x-contextforge-mcp-runtime-mode: rust-managed +x-contextforge-mcp-transport-mounted: python +x-contextforge-mcp-session-core-mode: python +x-contextforge-mcp-event-store-mode: python +x-contextforge-mcp-resume-core-mode: python +x-contextforge-mcp-live-stream-core-mode: python +x-contextforge-mcp-affinity-core-mode: python +x-contextforge-mcp-session-auth-reuse-mode: python +``` + +## Validation and Benchmark Workflow + +Recommended stack-backed validation: + +```bash +make testing-rebuild-rust-full +make test-mcp-cli +make test-mcp-rbac +make test-mcp-session-isolation +cargo test --release --manifest-path tools_rust/mcp_runtime/Cargo.toml +``` + +Recommended benchmark wrappers: + +```bash +make benchmark-mcp-mixed +make benchmark-mcp-tools +make benchmark-mcp-mixed-300 +make benchmark-mcp-tools-300 +``` + +For Rust-local profiling and crate-level lint/test helpers, see +`tools_rust/mcp_runtime/README.md` in the repository. diff --git a/docs/docs/deployment/compose.md b/docs/docs/deployment/compose.md index b39d8f52e9..ecc96e1ad7 100644 --- a/docs/docs/deployment/compose.md +++ b/docs/docs/deployment/compose.md @@ -179,6 +179,42 @@ curl http://localhost:8080/health # {"status":"healthy"} --- +## Rust MCP Compose Modes + +For the compose-backed testing stack, the Rust MCP runtime is exposed through +mode-specific make targets: + +```bash +make testing-rebuild-rust-shadow +make testing-rebuild-rust +make testing-rebuild-rust-full +``` + +Mode summary: + +- `shadow`: Rust sidecar present, but public `/mcp` stays on Python +- `edge`: public `/mcp` routed directly from nginx to Rust +- `full`: `edge` plus Rust session/event-store/resume/live-stream/affinity + cores + +Verify the active mode via `/health`: + +```bash +curl -sD - http://localhost:8080/health -o /dev/null | rg 'x-contextforge-mcp-' +``` + +Examples: + +- `x-contextforge-mcp-transport-mounted: python` means the public MCP path is + still Python-owned +- `x-contextforge-mcp-transport-mounted: rust` means nginx is routing public + `/mcp` traffic directly to the Rust runtime + +For the current runtime architecture, see +[Rust MCP Runtime](../architecture/rust-mcp-runtime.md). + +--- + ## 🗄 Selecting a database Uncomment one service block in `docker-compose.yml` and align `DATABASE_URL`: diff --git a/docs/docs/development/profiling.md b/docs/docs/development/profiling.md index 7a125a0ace..0d7c700332 100644 --- a/docs/docs/development/profiling.md +++ b/docs/docs/development/profiling.md @@ -16,6 +16,7 @@ This guide covers tools and techniques for profiling ContextForge performance un | **memray** | Python memory profiling | Find memory leaks and allocation hotspots | | **docker stats** | Resource monitoring | Track CPU/memory usage | | **Redis CLI** | Cache analysis | Check hit rates | +| **perf / cargo flamegraph** | Rust CPU profiling | Inspect Rust MCP runtime hotspots | --- @@ -237,6 +238,39 @@ py-spy record -o flamegraph.svg -- python -m mcpgateway --- +## Rust MCP Runtime Profiling + +For Rust-local profiling of the MCP runtime crate: + +```bash +make -C tools_rust/mcp_runtime setup-profiling +make -C tools_rust/mcp_runtime flamegraph-test +make -C tools_rust/mcp_runtime flamegraph-test-rmcp +``` + +These targets generate flamegraphs under: + +```text +tools_rust/mcp_runtime/profiles/ +``` + +Use them to inspect Rust-internal startup and hot-path behavior in the runtime +crate itself. + +For live profiling of the compose-backed Rust runtime under load: + +```bash +ps -eo pid,cmd | grep contextforge-mcp-runtime +sudo perf record -F 99 -g -p -- sleep 20 +sudo perf report --stdio +``` + +Use live `perf` during a real benchmark when you want steady-state behavior. +Use the crate-local flamegraph targets when you want in-process Rust visibility +without the rest of the stack. + +--- + ## Memory Profiling with memray [memray](https://github.com/bloomberg/memray) is a memory profiler for Python that tracks allocations in Python code, native extension modules, and the Python interpreter itself. It's ideal for finding memory leaks, high-water marks, and allocation hotspots. diff --git a/docs/docs/testing/index.md b/docs/docs/testing/index.md index 56ef89fe73..3f8bb1bb24 100644 --- a/docs/docs/testing/index.md +++ b/docs/docs/testing/index.md @@ -10,7 +10,7 @@ This section covers the testing strategy and tools for ContextForge. |-------|------|----------|--------| | **Unit tests** | pytest | `tests/unit/` | Implemented | | **Integration tests** | pytest | `tests/integration/` | Implemented | -| **End-to-end tests** | pytest | `tests/e2e/` | Implemented | +| **End-to-end tests** | pytest | `tests/e2e/`, `tests/e2e_rust/` | Implemented | | **UI automation** | Playwright | `tests/playwright/` | Implemented | | **Security / DAST** | Playwright + OWASP ZAP | `tests/playwright/security/` | Implemented | | **Load testing** | Locust | `tests/loadtest/` | Implemented | @@ -93,6 +93,66 @@ Access the Locust dashboard at `http://localhost:8089` when running with the web --- +## 🦀 Rust MCP Runtime Validation + +For the Rust MCP runtime path, the most important stack-backed checks are: + +```bash +make testing-rebuild-rust-full +make test-mcp-cli +make test-mcp-rbac +make test-mcp-access-matrix +make test-mcp-session-isolation +make test-mcp-session-isolation-load MCP_ISOLATION_LOAD_RUN_TIME=30s +cargo test --release --manifest-path tools_rust/mcp_runtime/Cargo.toml +``` + +For live plugin parity, use the test-specific plugin config and run the same +E2E against both Python mode and Rust full mode: + +```bash +PLUGINS_CONFIG_FILE=plugins/plugin_parity_config.yaml make testing-up +MCP_PLUGIN_PARITY_EXPECTED_RUNTIME=python make test-mcp-plugin-parity + +PLUGINS_CONFIG_FILE=plugins/plugin_parity_config.yaml make testing-rebuild-rust-full +MCP_PLUGIN_PARITY_EXPECTED_RUNTIME=rust make test-mcp-plugin-parity +``` + +This parity gate currently proves live plugin behavior on: +- `resources/read` +- `tools/call` +- `prompts/get` + +For revocation and membership/role-drift validation, shorten the reuse TTL so +the bounded-TTL contract completes quickly: + +```bash +MCP_RUST_SESSION_AUTH_REUSE_TTL_SECONDS=2 MCP_RUST_SESSION_AUTH_REUSE_GRACE_SECONDS=1 make testing-rebuild-rust-full +make test-mcp-access-matrix +make test-mcp-session-isolation +make test-mcp-session-isolation-load MCP_ISOLATION_LOAD_RUN_TIME=30s +``` + +Use these mode-specific rebuild targets when validating rollout behavior: + +```bash +make testing-rebuild-rust-shadow +make testing-rebuild-rust +make testing-rebuild-rust-full +``` + +These validate, respectively: + +- `shadow`: Rust sidecar present while public `/mcp` stays on Python +- `edge`: direct Rust public ingress without the full Rust session/runtime cores +- `full`: direct Rust public ingress plus Rust session/event/resume/live-stream + and affinity cores + +For throughput benchmarks and Locust wrappers, see +[Performance Testing](performance.md). + +--- + ## 🌐 Frontend JavaScript Testing Frontend JavaScript unit tests are **not yet implemented**. The codebase uses plain JavaScript (not TypeScript) with: diff --git a/docs/docs/testing/performance.md b/docs/docs/testing/performance.md index feb876258e..993e99aaef 100644 --- a/docs/docs/testing/performance.md +++ b/docs/docs/testing/performance.md @@ -36,6 +36,43 @@ Compare the 95/99th percentile latencies and error rates with and without the ga * Overhead from JSON-RPC wrapping/unwrapping * Improper worker/thread config in Gunicorn +## Rust MCP Benchmark Workflow + +When benchmarking the Rust MCP runtime on the compose-backed test stack, use the +mode-specific rebuild helpers first: + +```bash +make testing-rebuild-rust-shadow +make testing-rebuild-rust +make testing-rebuild-rust-full +``` + +Then use the benchmark wrappers: + +```bash +make benchmark-mcp-mixed +make benchmark-mcp-tools +make benchmark-mcp-mixed-300 +make benchmark-mcp-tools-300 +``` + +These wrappers target the nginx-exposed compose stack on +`http://localhost:8080` and use the MCP protocol Locust file under +`tests/loadtest/locustfile_mcp_protocol.py`. + +Recommended Rust MCP validation sequence: + +```bash +make testing-rebuild-rust-full +make test-mcp-cli +make test-mcp-rbac +make test-mcp-session-isolation +make benchmark-mcp-tools-300 +``` + +If you are comparing performance and rollback behavior, run the same benchmark +suite in `shadow` and `full`. + ## 🚀 Scripted Load Tests: `tests/hey/hey.sh` A wrapper script exists at: diff --git a/infra/nginx/nginx-performance.conf b/infra/nginx/nginx-performance.conf index 94dc58175b..7c268ef994 100644 --- a/infra/nginx/nginx-performance.conf +++ b/infra/nginx/nginx-performance.conf @@ -168,6 +168,15 @@ http { keepalive_timeout 60s; # Connection idle timeout } + upstream mcp_transport_backend { + least_conn; + server gateway:8787 max_fails=0; + server gateway:4444 max_fails=0 backup; + keepalive 512; + keepalive_requests 100000; + keepalive_timeout 60s; + } + # Cache bypass conditions map $request_method $skip_cache { default 0; @@ -469,6 +478,30 @@ http { proxy_read_timeout 1h; } + # MCP Streamable HTTP transport + location ~ ^(/mcp/?|/servers/.*/mcp/?)$ { + proxy_pass http://mcp_transport_backend; + + # MCP GET /mcp can be a long-lived SSE stream; disable buffering. + proxy_http_version 1.1; + proxy_set_header Connection ''; + proxy_request_buffering off; + proxy_buffering off; + proxy_cache off; + add_header X-Accel-Buffering "no" always; + + # Proxy headers + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Extended timeouts for long-lived MCP transport streams + proxy_connect_timeout 1h; + proxy_send_timeout 1h; + proxy_read_timeout 1h; + } + # ============================================================ # JSON-RPC Endpoint - No Caching # ============================================================ diff --git a/infra/nginx/nginx-tls.conf b/infra/nginx/nginx-tls.conf index b6d58a3961..a022b62736 100644 --- a/infra/nginx/nginx-tls.conf +++ b/infra/nginx/nginx-tls.conf @@ -549,6 +549,31 @@ http { proxy_read_timeout 1h; } + # MCP Streamable HTTP transport + location ~ ^(/mcp/?|/servers/.*/mcp/?)$ { + proxy_pass https://gateway_backend; + + # MCP GET /mcp can be a long-lived SSE stream; disable buffering. + proxy_http_version 1.1; + proxy_set_header Connection ''; + proxy_request_buffering off; + proxy_buffering off; + proxy_cache off; + add_header X-Accel-Buffering "no" always; + + # Proxy headers + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $forwarded_proto; + proxy_set_header X-Forwarded-Host $http_host; + + # Extended timeouts for long-lived MCP transport streams + proxy_connect_timeout 1h; + proxy_send_timeout 1h; + proxy_read_timeout 1h; + } + # ============================================================ # JSON-RPC Endpoint - No Caching # ============================================================ diff --git a/infra/nginx/nginx.conf b/infra/nginx/nginx.conf index 9a9be05b3e..b44702e757 100644 --- a/infra/nginx/nginx.conf +++ b/infra/nginx/nginx.conf @@ -171,6 +171,15 @@ http { keepalive_timeout 60s; # Connection idle timeout } + upstream mcp_transport_backend { + least_conn; + server gateway:8787 max_fails=0; + server gateway:4444 max_fails=0 backup; + keepalive 512; + keepalive_requests 100000; + keepalive_timeout 60s; + } + # ============================================================ # SSL Backend Configuration (for HTTPS gateway backend) # ============================================================ @@ -558,6 +567,31 @@ http { proxy_read_timeout 1h; } + # MCP Streamable HTTP transport + location ~ ^(/mcp/?|/servers/.*/mcp/?)$ { + proxy_pass http://mcp_transport_backend; + + # MCP GET /mcp can be long-lived SSE, so do not buffer the stream. + proxy_http_version 1.1; + proxy_set_header Connection ''; + proxy_request_buffering off; + proxy_buffering off; + proxy_cache off; + add_header X-Accel-Buffering "no" always; + + # Proxy headers + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $forwarded_proto; + proxy_set_header X-Forwarded-Host $http_host; + + # Extended timeouts for stateful MCP sessions and SSE streams + proxy_connect_timeout 1h; + proxy_send_timeout 1h; + proxy_read_timeout 1h; + } + # ============================================================ # JSON-RPC Endpoint - No Caching # ============================================================ diff --git a/mcpgateway/admin.py b/mcpgateway/admin.py index f4a8bbfd56..ff5ced2144 100644 --- a/mcpgateway/admin.py +++ b/mcpgateway/admin.py @@ -1771,6 +1771,7 @@ async def get_overview_partial( "redis_available": redis_available, "redis_reachable": redis_reachable, "uptime_seconds": uptime_seconds, + "mcp_runtime": version_module.mcp_runtime_status_payload(), } return request.app.state.templates.TemplateResponse(request, "overview_partial.html", context) diff --git a/mcpgateway/config.py b/mcpgateway/config.py index cdc749f402..7badc013c3 100644 --- a/mcpgateway/config.py +++ b/mcpgateway/config.py @@ -235,6 +235,48 @@ class Settings(BaseSettings): # Protocol protocol_version: str = "2025-11-25" + experimental_rust_mcp_runtime_enabled: bool = Field( + default=False, + description="Proxy POST /mcp traffic through the experimental Rust MCP runtime sidecar.", + ) + experimental_rust_mcp_runtime_url: str = Field( + default="http://127.0.0.1:8787", + description="Base URL for the experimental Rust MCP runtime sidecar.", + ) + experimental_rust_mcp_runtime_uds: Optional[str] = Field( + default=None, + description="Optional Unix domain socket path for the experimental Rust MCP runtime sidecar.", + ) + experimental_rust_mcp_runtime_timeout_seconds: int = Field( + default=30, + ge=1, + le=300, + description="Timeout in seconds for Python-to-Rust MCP runtime proxy requests.", + ) + experimental_rust_mcp_session_core_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned MCP session metadata core while keeping Python as the fallback transport backend.", + ) + experimental_rust_mcp_event_store_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned resumable MCP event-store backend for Streamable HTTP sessions.", + ) + experimental_rust_mcp_resume_core_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned public MCP replay/resume path for GET /mcp with Last-Event-ID while keeping Python fallback available.", + ) + experimental_rust_mcp_live_stream_core_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned public MCP live GET /mcp SSE path while keeping Python as the fallback upstream stream source.", + ) + experimental_rust_mcp_affinity_core_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned MCP session-affinity forwarding path while keeping Python worker forwarding as the fallback.", + ) + experimental_rust_mcp_session_auth_reuse_enabled: bool = Field( + default=False, + description="Enable the experimental Rust-owned MCP session-bound auth-context reuse path for direct public /mcp ingress.", + ) # Authentication basic_auth_user: str = "admin" @@ -1830,6 +1872,30 @@ def _auto_enable_security_txt(cls, v: Any, info: ValidationInfo) -> bool: return bool(info.data["well_known_security_txt"].strip()) return bool(v) + @field_validator("experimental_rust_mcp_runtime_uds", mode="after") + @classmethod + def _validate_experimental_rust_mcp_runtime_uds(cls, value: Optional[str]) -> Optional[str]: + """Validate the optional UDS path used for the Rust MCP runtime sidecar. + + Args: + value: Candidate UDS path from configuration. + + Returns: + The normalized absolute UDS path, or ``None`` when unset. + + Raises: + ValueError: If the path is not absolute or its parent directory is missing. + """ + if value in (None, ""): + return None + + uds_path = Path(value).expanduser() + if not uds_path.is_absolute(): + raise ValueError("experimental_rust_mcp_runtime_uds must be an absolute path") + if not uds_path.parent.exists(): + raise ValueError(f"experimental_rust_mcp_runtime_uds parent directory does not exist: {uds_path.parent}") + return str(uds_path) + # ------------------------------- # Flexible list parsing for envs # ------------------------------- diff --git a/mcpgateway/main.py b/mcpgateway/main.py index 160b549307..e650bbd52c 100644 --- a/mcpgateway/main.py +++ b/mcpgateway/main.py @@ -28,10 +28,12 @@ # Standard import asyncio +import base64 from contextlib import asynccontextmanager, suppress from datetime import datetime, timezone from functools import lru_cache import hashlib +import hmac import html import re import sys @@ -64,7 +66,9 @@ from uvicorn.middleware.proxy_headers import ProxyHeadersMiddleware # First-Party +# Import the admin routes from the new module from mcpgateway import __version__ +from mcpgateway import version as version_module from mcpgateway.admin import admin_router, set_logging_service from mcpgateway.auth import _check_token_revoked_sync, _lookup_api_token_sync, _resolve_teams_from_db, get_current_user, get_user_team_roles, normalize_token_teams from mcpgateway.bootstrap_db import main as bootstrap_db @@ -143,8 +147,16 @@ from mcpgateway.services.server_service import ServerError, ServerLockConflictError, ServerNameConflictError, ServerNotFoundError from mcpgateway.services.tag_service import TagService from mcpgateway.services.tool_service import ToolError, ToolLockConflictError, ToolNameConflictError, ToolNotFoundError +from mcpgateway.transports.rust_mcp_runtime_proxy import RustMCPRuntimeProxy from mcpgateway.transports.sse_transport import SSETransport -from mcpgateway.transports.streamablehttp_transport import SessionManagerWrapper, set_shared_session_registry, streamable_http_auth +from mcpgateway.transports.streamablehttp_transport import ( + _validate_streamable_session_access, + get_streamable_http_auth_context, + SessionManagerWrapper, + set_shared_session_registry, + streamable_http_auth, + user_context_var, +) from mcpgateway.utils.db_isready import wait_for_db_ready from mcpgateway.utils.error_formatter import ErrorFormatter from mcpgateway.utils.metadata_capture import MetadataCapture @@ -156,8 +168,6 @@ from mcpgateway.utils.token_scoping import validate_server_access from mcpgateway.utils.verify_credentials import extract_websocket_bearer_token, is_proxy_auth_trust_active, require_admin_auth, require_docs_auth_override, verify_jwt_token from mcpgateway.validation.jsonrpc import JSONRPCError - -# Import the admin routes from the new module from mcpgateway.version import router as version_router # Initialize logging service first @@ -299,6 +309,340 @@ def get_user_email(user): return str(user) if user else "unknown" +_INTERNAL_MCP_AUTH_CONTEXT_HEADER = "x-contextforge-auth-context" +_INTERNAL_MCP_RUNTIME_AUTH_HEADER = "x-contextforge-mcp-runtime-auth" +_INTERNAL_MCP_RUNTIME_AUTH_CONTEXT = "contextforge-internal-mcp-runtime-v1" +_INTERNAL_MCP_SESSION_VALIDATED_HEADER = "x-contextforge-session-validated" + + +def _get_internal_mcp_auth_context(request: Request) -> Optional[Dict[str, Any]]: + """Return trusted auth context forwarded from the StreamableHTTP MCP auth layer. + + Args: + request: Incoming request that may carry trusted MCP auth context on state. + + Returns: + The forwarded auth context dictionary when present, otherwise ``None``. + """ + internal_auth_context = getattr(request.state, "_mcp_internal_auth_context", None) + if isinstance(internal_auth_context, dict): + return internal_auth_context + return None + + +def _decode_internal_mcp_auth_context(header_value: str) -> Dict[str, Any]: + """Decode the trusted internal MCP auth header payload. + + Args: + header_value: Base64url-encoded trusted auth context header value. + + Returns: + Decoded auth context dictionary. + + Raises: + ValueError: If the decoded payload is not a JSON object. + """ + padding = "=" * (-len(header_value) % 4) + decoded = base64.urlsafe_b64decode(f"{header_value}{padding}".encode("ascii")) + payload = orjson.loads(decoded) + if not isinstance(payload, dict): + raise ValueError("Decoded internal MCP auth context must be an object") + return payload + + +def _auth_encryption_secret_value() -> str: + """Return the configured auth-encryption secret as a plain string. + + Returns: + The auth-encryption secret, normalized to a regular string. + """ + secret = settings.auth_encryption_secret + if hasattr(secret, "get_secret_value"): + return secret.get_secret_value() + return str(secret) + + +@lru_cache(maxsize=8) +def _expected_internal_mcp_runtime_auth_header_for_secret(secret: str) -> str: + """Return the shared secret-derived trust header for Rust->Python MCP hops. + + Args: + secret: Auth-encryption secret to derive the trust header from. + + Returns: + Hex-encoded SHA-256 digest derived from the provided auth secret. + """ + material = f"{secret}:{_INTERNAL_MCP_RUNTIME_AUTH_CONTEXT}".encode("utf-8") + return hashlib.sha256(material).hexdigest() + + +def _expected_internal_mcp_runtime_auth_header() -> str: + """Return the current shared secret-derived trust header for Rust->Python MCP hops. + + Returns: + Hex-encoded SHA-256 digest derived from the current auth secret. + """ + return _expected_internal_mcp_runtime_auth_header_for_secret(_auth_encryption_secret_value()) + + +def _has_valid_internal_mcp_runtime_auth_header(request: Request) -> bool: + """Validate the shared secret-derived trust header for internal MCP requests. + + Args: + request: Incoming internal MCP request. + + Returns: + ``True`` when the derived trust header matches the expected value. + """ + provided = request.headers.get(_INTERNAL_MCP_RUNTIME_AUTH_HEADER) + if not provided: + return False + return hmac.compare_digest(provided, _expected_internal_mcp_runtime_auth_header()) + + +def _is_trusted_internal_mcp_runtime_request(request: Request) -> bool: + """Return whether the request came from the local Rust runtime sidecar. + + Args: + request: Incoming request to inspect. + + Returns: + ``True`` when the request carries the trusted Rust runtime marker from + loopback, otherwise ``False``. + """ + runtime_marker = request.headers.get("x-contextforge-mcp-runtime") + client_host = getattr(getattr(request, "client", None), "host", None) + return runtime_marker == "rust" and _has_valid_internal_mcp_runtime_auth_header(request) and client_host in ("127.0.0.1", "::1") + + +def _build_internal_mcp_forwarded_user(request: Request) -> Dict[str, Any]: + """Build the authenticated user payload for internal Rust -> Python MCP dispatch. + + Args: + request: Trusted internal request forwarded from the Rust runtime. + + Returns: + Synthetic authenticated user payload used by internal MCP handlers. + + Raises: + HTTPException: If the request is not trusted or the forwarded auth context + is missing or invalid. + """ + if not _is_trusted_internal_mcp_runtime_request(request): + raise HTTPException(status_code=403, detail="Internal MCP dispatch is only available to the local Rust runtime") + + header_value = request.headers.get(_INTERNAL_MCP_AUTH_CONTEXT_HEADER) + if not header_value: + raise HTTPException(status_code=400, detail="Missing trusted MCP auth context") + + try: + auth_context = _decode_internal_mcp_auth_context(header_value) + except Exception as exc: + raise HTTPException(status_code=400, detail=f"Invalid trusted MCP auth context: {exc}") from exc + + setattr(request.state, "_mcp_internal_auth_context", auth_context) + + if "teams" in auth_context and (auth_context["teams"] is None or isinstance(auth_context["teams"], list)): + request.state.token_teams = auth_context["teams"] + + if request.headers.get(_INTERNAL_MCP_SESSION_VALIDATED_HEADER) == "rust": + auth_context["_rust_session_validated"] = True + + return { + "email": auth_context.get("email"), + "full_name": auth_context.get("email") or "MCP Internal Forward", + "is_admin": bool(auth_context.get("permission_is_admin", auth_context.get("is_admin", False))), + "auth_method": "mcp_internal_forward", + "token_use": auth_context.get("token_use"), + } + + +def _enforce_internal_mcp_server_scope(request: Request, server_id: str) -> None: + """Validate trusted internal server scope against any forwarded token server scope. + + Args: + request: Trusted internal MCP request. + server_id: Effective virtual server identifier for the operation. + + Raises: + HTTPException: If the forwarded token scope does not authorize the server. + """ + auth_context = _get_internal_mcp_auth_context(request) + if not isinstance(auth_context, dict): + return + + scoped_server_id = auth_context.get("scoped_server_id") + if isinstance(scoped_server_id, str) and scoped_server_id and not validate_server_access({"server_id": scoped_server_id}, server_id): + raise HTTPException(status_code=403, detail=f"Token not authorized for server: {server_id}") + + +async def _authorize_internal_mcp_request(request: Request, db: Session, *, permission: str, method: str, server_id: Optional[str] = None): + """Authorize trusted Rust-side MCP dispatch while preserving permissive MCP semantics. + + For authenticated callers, this enforces the same token-scope and RBAC rules as + the regular RPC dispatcher. For unauthenticated MCP callers in permissive mode, + StreamableHTTP middleware already downgraded them to public-only scope and + enforced per-server OAuth, so the internal Rust -> Python hop should not re-deny + public-only requests merely because there is no authenticated RBAC identity. + + Args: + request: Trusted internal MCP request. + db: Active database session. + permission: RBAC permission required for the method. + method: MCP method name being authorized. + server_id: Optional virtual server identifier used for additional scope checks. + + Returns: + The forwarded user payload used for downstream authorization and scoping. + """ + user = _build_internal_mcp_forwarded_user(request) + auth_context = _get_internal_mcp_auth_context(request) or {} + + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + if auth_context.get("is_authenticated", True) is True: + await _ensure_rpc_permission(user, db, permission, method, request=request) + + return user + + +def _build_internal_mcp_auth_scope( + *, + method: str, + path: str, + query_string: str, + headers: Dict[str, str], + client_ip: Optional[str], +) -> Dict[str, Any]: + """Construct a synthetic ASGI scope for internal Rust -> Python MCP auth. + + Args: + method: HTTP method of the original public MCP request. + path: Public MCP path, for example ``/mcp`` or ``/servers//mcp``. + query_string: Raw query string without the leading ``?``. + headers: Public request headers to replay through auth/token scoping. + client_ip: Effective client IP derived by Rust from the public request. + + Returns: + ASGI scope dictionary suitable for token scoping and ``streamable_http_auth``. + """ + raw_headers = [] + for name, value in headers.items(): + if not isinstance(name, str) or not isinstance(value, str): + continue + raw_headers.append((name.lower().encode("latin-1"), value.encode("latin-1"))) + + return { + "type": "http", + "method": method.upper(), + "path": path, + "raw_path": path.encode("latin-1"), + "query_string": query_string.encode("latin-1"), + "headers": raw_headers, + "client": (client_ip or "unknown", 0), + "state": {}, + } + + +async def _run_internal_mcp_authentication( + *, + method: str, + path: str, + query_string: str, + headers: Dict[str, str], + client_ip: Optional[str], +) -> tuple[Optional[Response], Dict[str, Any]]: + """Run token scoping and MCP transport auth for a direct Rust ingress request. + + Args: + method: HTTP method of the public request. + path: Public request path. + query_string: Raw query string without the leading ``?``. + headers: Public request headers replayed from Rust. + client_ip: Effective client IP for token-scope IP restriction checks. + + Returns: + Tuple of ``(error_response, auth_context)``. + ``error_response`` is ``None`` on success; otherwise it contains the exact + response generated by the existing token-scoping/auth layers. + """ + scope = _build_internal_mcp_auth_scope( + method=method, + path=path, + query_string=query_string, + headers=headers, + client_ip=client_ip, + ) + request = starletteRequest(scope) + sent_messages: list[dict[str, Any]] = [] + + async def _receive() -> dict[str, Any]: + """Return an empty request body for the synthetic auth probe. + + Returns: + Minimal ASGI ``http.request`` message with no body content. + """ + return {"type": "http.request", "body": b"", "more_body": False} + + async def _send(message: dict[str, Any]) -> None: + """Capture ASGI response messages emitted by auth middleware. + + Args: + message: ASGI response message emitted by the auth stack. + """ + sent_messages.append(message) + + def _captured_response() -> Response: + """Build a concrete response from the captured ASGI messages. + + Returns: + Response reconstructed from the captured auth middleware output. + """ + status_code = 500 + response_headers: Dict[str, str] = {} + body = b"" + for message in sent_messages: + if message.get("type") == "http.response.start": + status_code = int(message.get("status", 500)) + response_headers = { + key.decode("latin-1"): value.decode("latin-1") for key, value in message.get("headers", []) if isinstance(key, (bytes, bytearray)) and isinstance(value, (bytes, bytearray)) + } + elif message.get("type") == "http.response.body": + body += message.get("body", b"") + return Response(content=body, status_code=status_code, headers=response_headers) + + async def _call_next(_request: starletteRequest) -> Response: + """Run the existing Streamable HTTP auth layer for the synthetic request. + + Returns: + Success response when authentication passes, otherwise the captured + failure response emitted by the existing middleware chain. + """ + auth_ok = await streamable_http_auth(scope, _receive, _send) + if auth_ok: + return ORJSONResponse(status_code=200, content={"authenticated": True}) + return _captured_response() + + original_context = user_context_var.get() + user_context_var.set({}) + try: + if settings.email_auth_enabled: + response = await token_scoping_middleware(request, _call_next) + else: + response = await _call_next(request) + + if response is None: + response = _captured_response() + + if response.status_code >= 400: + return response, {} + + return None, get_streamable_http_auth_context() + finally: + user_context_var.set(original_context) + + def _normalize_token_teams(teams: Optional[List]) -> List[str]: """ Normalize token teams to list of team IDs. @@ -371,6 +715,12 @@ def _get_token_teams_from_request(request: Request) -> Optional[List[str]]: >>> main._get_token_teams_from_request(req) [] """ + internal_auth_context = _get_internal_mcp_auth_context(request) + if isinstance(internal_auth_context, dict) and "teams" in internal_auth_context: + internal_teams = internal_auth_context.get("teams") + if internal_teams is None or isinstance(internal_teams, list): + return internal_teams + # SECURITY: First check request.state.token_teams (already normalized by auth.py) # This is the preferred path as auth.py has already applied normalize_token_teams # Use getattr with a sentinel to distinguish "not set" from "set to None" @@ -431,6 +781,15 @@ def _get_rpc_filter_context(request: Request, user) -> tuple: # Check if user is admin - MUST come from token, not DB user # This ensures that tokens with restricted scope (empty teams) don't inherit admin bypass is_admin = False + internal_auth_context = _get_internal_mcp_auth_context(request) + if isinstance(internal_auth_context, dict): + if user_email is None: + user_email = internal_auth_context.get("email") + is_admin = bool(internal_auth_context.get("is_admin", False)) + if token_teams is not None and len(token_teams) == 0: + is_admin = False + return user_email, token_teams, is_admin + cached = getattr(request.state, "_jwt_verified_payload", None) if cached and isinstance(cached, tuple) and len(cached) == 2: _, payload = cached @@ -455,6 +814,9 @@ def _has_verified_jwt_payload(request: Request) -> bool: Returns: ``True`` when a verified payload tuple is present, otherwise ``False``. """ + internal_auth_context = _get_internal_mcp_auth_context(request) + if isinstance(internal_auth_context, dict): + return True cached = getattr(request.state, "_jwt_verified_payload", None) return bool(cached and isinstance(cached, tuple) and len(cached) == 2 and cached[1]) @@ -540,6 +902,13 @@ def _extract_scoped_permissions(request: Request) -> set[str] | None: None: no explicit scope cap (empty permissions or no JWT — defer to RBAC) set: explicit permission set (may contain '*' for wildcard) """ + internal_auth_context = _get_internal_mcp_auth_context(request) + if isinstance(internal_auth_context, dict): + permissions = internal_auth_context.get("scoped_permissions") + if not permissions: + return None + return set(permissions) + cached = getattr(request.state, "_jwt_verified_payload", None) if not cached or not isinstance(cached, tuple) or len(cached) != 2: return None @@ -555,6 +924,27 @@ def _extract_scoped_permissions(request: Request) -> set[str] | None: return set(permissions) +def _is_permission_admin_user(user) -> bool: + """Return whether the caller already has permission-layer admin authority. + + This is stricter than token-scope admin semantics. It is used only to skip + redundant RBAC DB lookups after token scope caps have already been enforced. + + Args: + user: Authenticated user object or dict-like payload. + + Returns: + ``True`` when the caller already has permission-layer admin authority. + """ + if hasattr(user, "is_admin"): + return bool(getattr(user, "is_admin", False)) + if isinstance(user, dict): + if "permission_is_admin" in user: + return bool(user.get("permission_is_admin", False)) + return False + return False + + async def _ensure_rpc_permission(user, db: Session, permission: str, method: str, request: Request | None = None) -> None: """Require a specific RPC permission for a method branch. @@ -579,6 +969,9 @@ async def _ensure_rpc_permission(user, db: Session, permission: str, method: str logger.warning("RPC permission denied (token scope): method=%s, required=%s", method, permission) raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": method}) + if permission == "admin.system_config" and _is_permission_admin_user(user): + return + # Layer 2: RBAC check # Session tokens have no explicit team_id, so check across all team-scoped roles. # Mirrors the @require_permission decorator's check_any_team fallback (rbac.py:562-576). @@ -589,6 +982,72 @@ async def _ensure_rpc_permission(user, db: Session, permission: str, method: str raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": method}) +def _serialize_mcp_tool_definition(tool: Any) -> Dict[str, Any]: + """Return an MCP-compliant tool definition without API-only metadata fields. + + Args: + tool: Tool ORM object, pydantic model, or dict-like payload. + + Returns: + MCP-compatible tool definition dictionary. + """ + if hasattr(tool, "model_dump"): + data = tool.model_dump(by_alias=True, exclude_none=True) + elif isinstance(tool, dict): + data = dict(tool) + else: + data = {} + + payload: Dict[str, Any] = { + "name": data.get("name", getattr(tool, "name", None)), + "description": data.get("description", getattr(tool, "description", None)), + "inputSchema": data.get("inputSchema", getattr(tool, "input_schema", None)), + } + + output_schema = data.get("outputSchema", getattr(tool, "output_schema", None)) + if output_schema is not None: + payload["outputSchema"] = output_schema + + annotations = data.get("annotations", getattr(tool, "annotations", None)) + if annotations is not None: + payload["annotations"] = annotations + + return {key: value for key, value in payload.items() if value is not None} + + +def _serialize_mcp_tool_definitions(tools: List[Any]) -> List[Dict[str, Any]]: + """Serialize tool records to MCP tool definitions. + + Args: + tools: Iterable of tool-like records to serialize. + + Returns: + List of MCP-compatible tool definitions. + """ + return [_serialize_mcp_tool_definition(tool) for tool in tools] + + +def _serialize_legacy_tool_payloads(tools: List[Any]) -> List[Dict[str, Any]]: + """Serialize tool records using the legacy JSON-RPC shape. + + Args: + tools: Iterable of tool-like records to serialize. + + Returns: + List of legacy tool payload dictionaries. + """ + payloads: List[Dict[str, Any]] = [] + for tool in tools: + if hasattr(tool, "model_dump"): + payload = tool.model_dump(by_alias=True, exclude_none=True) + elif isinstance(tool, dict): + payload = dict(tool) + else: + payload = {} + payloads.append(payload) + return payloads + + def _enforce_scoped_resource_access(request: Request, db: Session, user, resource_path: str) -> None: """Apply token-scope ownership checks for a concrete resource path. @@ -685,6 +1144,147 @@ async def _authorize_run_cancellation(request: Request, user, request_id: str, * resource_cache = ResourceCache(max_size=settings.resource_cache_size, ttl=settings.resource_cache_ttl) +def _rust_build_included() -> bool: + """Return whether the current image includes Rust MCP artifacts. + + Returns: + ``True`` when the current image contains the Rust MCP binaries/plugins. + """ + return version_module.rust_build_included() + + +def _rust_runtime_managed() -> bool: + """Return whether the gateway expects to manage the Rust MCP sidecar locally. + + Returns: + ``True`` when the gateway should launch and supervise the Rust sidecar. + """ + return version_module.rust_runtime_managed() + + +def _current_mcp_transport_mount() -> str: + """Return which public /mcp transport is currently mounted. + + Returns: + Runtime label identifying the currently mounted public MCP transport. + """ + return version_module.current_mcp_transport_mount() + + +def _should_mount_public_rust_transport() -> bool: + """Return whether the public ``/mcp`` path should be served directly by Rust. + + Returns: + ``True`` only when the Rust runtime is enabled and the session-auth reuse + path is enabled, allowing Rust to safely own steady-state public MCP + session traffic. Otherwise returns ``False`` and leaves public MCP on + the Python ingress path. + """ + return version_module.should_mount_public_rust_transport() + + +def _should_use_rust_public_session_stack() -> bool: + """Return whether Rust should own the effective public MCP session stack. + + Returns: + ``True`` only when the Rust runtime is enabled and session-auth reuse is + enabled, allowing the public transport, session metadata, replay/resume, + live-stream, and affinity behavior to stay on a consistent Rust-backed + path. Otherwise returns ``False`` so the public MCP session stack falls + back to Python semantics. + """ + return version_module.should_use_rust_public_session_stack() + + +def _current_mcp_runtime_mode() -> str: + """Return a compact runtime-mode label for observability. + + Returns: + Human-readable runtime mode label for health/readiness reporting. + """ + return version_module.current_mcp_runtime_mode() + + +def _current_mcp_session_core_mode() -> str: + """Return which session core currently owns MCP session metadata. + + Returns: + ``"rust"`` when the Rust session core is enabled, otherwise ``"python"``. + """ + return version_module.current_mcp_session_core_mode() + + +def _current_mcp_event_store_mode() -> str: + """Return which runtime currently owns MCP resumable event-store semantics. + + Returns: + ``"rust"`` when the Rust event store is enabled, otherwise ``"python"``. + """ + return version_module.current_mcp_event_store_mode() + + +def _current_mcp_resume_core_mode() -> str: + """Return which runtime currently owns public MCP replay/resume behavior. + + Returns: + ``"rust"`` when Rust owns replay/resume, otherwise ``"python"``. + """ + return version_module.current_mcp_resume_core_mode() + + +def _current_mcp_live_stream_core_mode() -> str: + """Return which runtime currently owns non-resume public GET /mcp SSE behavior. + + Returns: + ``"rust"`` when Rust owns live GET /mcp streaming, otherwise ``"python"``. + """ + return version_module.current_mcp_live_stream_core_mode() + + +def _current_mcp_affinity_core_mode() -> str: + """Return which runtime currently owns MCP multi-worker session-affinity forwarding. + + Returns: + ``"rust"`` when Rust owns session-affinity forwarding, otherwise ``"python"``. + """ + return version_module.current_mcp_affinity_core_mode() + + +def _current_mcp_session_auth_reuse_mode() -> str: + """Return which runtime currently owns MCP session-bound auth-context reuse. + + Returns: + ``"rust"`` when Rust session auth reuse is enabled, otherwise ``"python"``. + """ + return version_module.current_mcp_session_auth_reuse_mode() + + +def _mcp_runtime_status_payload() -> Dict[str, Any]: + """Return MCP runtime diagnostics for health/readiness endpoints. + + Returns: + Diagnostic payload describing the active MCP runtime configuration. + """ + return version_module.mcp_runtime_status_payload() + + +def _apply_runtime_mode_headers(response: Response) -> None: + """Attach MCP runtime mode headers to a response. + + Args: + response: Response object to annotate. + """ + response.headers["x-contextforge-mcp-runtime-mode"] = _current_mcp_runtime_mode() + response.headers["x-contextforge-mcp-transport-mounted"] = _current_mcp_transport_mount() + response.headers["x-contextforge-rust-build-included"] = "true" if _rust_build_included() else "false" + response.headers["x-contextforge-mcp-session-core-mode"] = _current_mcp_session_core_mode() + response.headers["x-contextforge-mcp-event-store-mode"] = _current_mcp_event_store_mode() + response.headers["x-contextforge-mcp-resume-core-mode"] = _current_mcp_resume_core_mode() + response.headers["x-contextforge-mcp-live-stream-core-mode"] = _current_mcp_live_stream_core_mode() + response.headers["x-contextforge-mcp-affinity-core-mode"] = _current_mcp_affinity_core_mode() + response.headers["x-contextforge-mcp-session-auth-reuse-mode"] = _current_mcp_session_auth_reuse_mode() + + @lru_cache(maxsize=512) def _parse_jsonpath(jsonpath: str) -> JSONPath: """Cache parsed JSONPath expression. @@ -2424,7 +3024,10 @@ def get_db(): pass # nosec B110 - Best effort cleanup on connection failure raise finally: - db.close() + try: + db.close() + except Exception: + pass # nosec B110 - Best effort cleanup on already-failed prompt bridge sessions async def _read_request_json(request: Request) -> Any: @@ -6028,17 +6631,2241 @@ async def remove_root( @utility_router.post("/rpc/") @utility_router.post("/rpc") async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depends(get_current_user_with_permissions)): - """Handle RPC requests. + """Handle authenticated public RPC requests. Args: - request (Request): The incoming FastAPI request. - db (Session): Database session. - user: The authenticated user (dict with RBAC context). + request: Incoming public RPC request. + db: Database session provided by dependency injection. + user: Authenticated user payload with permissions. Returns: - Response with the RPC result or error. + JSON-RPC response generated by the shared authenticated RPC dispatcher. + """ + return await _handle_rpc_authenticated(request, db=db, user=user) - Raises: + +@utility_router.post("/_internal/mcp/authenticate/") +@utility_router.post("/_internal/mcp/authenticate") +async def handle_internal_mcp_authenticate(request: Request): + """Authenticate a public MCP request for direct Rust ingress. + + Args: + request: Trusted internal request sent by the local Rust runtime. + + Returns: + Auth context payload that Rust can forward on subsequent internal MCP calls. + + Raises: + HTTPException: If the request is not trusted or the forwarded payload is invalid. + """ + if not _is_trusted_internal_mcp_runtime_request(request): + raise HTTPException(status_code=403, detail="Internal MCP authenticate is only available to the local Rust runtime") + + payload = await request.json() + if not isinstance(payload, dict): + raise HTTPException(status_code=400, detail="Invalid internal MCP authenticate payload") + + method = str(payload.get("method") or "GET").upper() + path = payload.get("path") + query_string = payload.get("queryString", "") + forwarded_headers = payload.get("headers", {}) + client_ip = payload.get("clientIp") + + if not isinstance(path, str) or not path: + raise HTTPException(status_code=400, detail="Internal MCP authenticate payload requires path") + if not isinstance(query_string, str): + raise HTTPException(status_code=400, detail="Internal MCP authenticate payload queryString must be a string") + if not isinstance(forwarded_headers, dict) or not all(isinstance(name, str) and isinstance(value, str) for name, value in forwarded_headers.items()): + raise HTTPException(status_code=400, detail="Internal MCP authenticate payload headers must be a string map") + if client_ip is not None and not isinstance(client_ip, str): + raise HTTPException(status_code=400, detail="Internal MCP authenticate payload clientIp must be a string") + + error_response, auth_context = await _run_internal_mcp_authentication( + method=method, + path=path, + query_string=query_string, + headers=forwarded_headers, + client_ip=client_ip, + ) + if error_response is not None: + return error_response + + return ORJSONResponse(status_code=200, content={"authContext": auth_context}) + + +@utility_router.post("/_internal/mcp/rpc/") +@utility_router.post("/_internal/mcp/rpc") +async def handle_internal_mcp_rpc(request: Request): + """Handle trusted MCP dispatch forwarded from the local Rust runtime. + + Args: + request: Trusted internal MCP request from the Rust runtime. + + Returns: + JSON-RPC response from the shared authenticated RPC dispatcher. + + Raises: + Exception: Propagated after rolling back the local database session. + """ + user = _build_internal_mcp_forwarded_user(request) + db = SessionLocal() + try: + response = await _handle_rpc_authenticated(request, db=db, user=user) + if db.is_active and db.in_transaction() is not None: + db.commit() + return response + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/initialize/") +@utility_router.post("/_internal/mcp/initialize") +async def handle_internal_mcp_initialize(request: Request): + """Handle trusted MCP initialize requests forwarded from the local Rust runtime. + + Args: + request: Trusted internal MCP initialize request. + + Returns: + JSON-RPC initialize response payload. + """ + user = _build_internal_mcp_forwarded_user(request) + req_id = None + try: + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") + if req_id is None: + req_id = str(uuid.uuid4()) + + if body.get("method") != "initialize": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + + result = await _execute_rpc_initialize( + request, + user, + params=params, + server_id=server_id, + mcp_session_id=request.headers.get("mcp-session-id") or request.headers.get("x-mcp-session-id"), + ) + return ORJSONResponse(content={"jsonrpc": "2.0", "result": result, "id": req_id}) + except JSONRPCError as exc: + error = exc.to_dict() + return ORJSONResponse(content={"jsonrpc": "2.0", "error": error["error"], "id": req_id}) + except Exception as exc: + logger.error("Internal MCP initialize error: %s", exc) + return ORJSONResponse( + content={ + "jsonrpc": "2.0", + "error": {"code": -32000, "message": "Internal error", "data": str(exc)}, + "id": req_id, + } + ) + + +@utility_router.delete("/_internal/mcp/session/") +@utility_router.delete("/_internal/mcp/session") +async def handle_internal_mcp_session_delete(request: Request): + """Handle trusted MCP session teardown forwarded from the local Rust runtime. + + Args: + request: Trusted internal MCP session-delete request. + + Returns: + Empty HTTP response indicating the session was removed. + """ + _build_internal_mcp_forwarded_user(request) + auth_context = _get_internal_mcp_auth_context(request) or {} + mcp_session_id = request.headers.get("mcp-session-id") or request.headers.get("x-mcp-session-id") + if not mcp_session_id: + return ORJSONResponse(status_code=400, content={"detail": "mcp-session-id header is required"}) + + if auth_context.get("_rust_session_validated") is not True: + session_allowed, deny_status, deny_detail = await _validate_streamable_session_access( + mcp_session_id=mcp_session_id, + user_context=auth_context, + ) + if not session_allowed: + return ORJSONResponse(status_code=deny_status, content={"detail": deny_detail}) + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + await session_registry.remove_session(mcp_session_id) + + if settings.mcpgateway_session_affinity_enabled: + try: + # First-Party + from mcpgateway.services.mcp_session_pool import get_mcp_session_pool # pylint: disable=import-outside-toplevel + + pool = get_mcp_session_pool() + await pool.cleanup_streamable_http_session_owner(mcp_session_id) + except RuntimeError: + pass + + return Response(status_code=204) + + +@utility_router.post("/_internal/mcp/notifications/initialized/") +@utility_router.post("/_internal/mcp/notifications/initialized") +async def handle_internal_mcp_notifications_initialized(request: Request): + """Handle trusted MCP notifications/initialized requests from the local Rust runtime. + + Args: + request: Trusted internal MCP notification request. + + Returns: + Empty HTTP response acknowledging the notification. + + Raises: + HTTPException: If trusted server-scope validation fails. + """ + _build_internal_mcp_forwarded_user(request) + req_id = None + try: + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") + if body.get("method") != "notifications/initialized": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + logger.info("Client initialized") + await logging_service.notify("Client initialized", LogLevel.INFO) + return Response(status_code=status.HTTP_204_NO_CONTENT) + except HTTPException: + raise + except Exception as exc: + logger.error("Internal MCP notifications/initialized error: %s", exc) + return ORJSONResponse( + content={ + "jsonrpc": "2.0", + "error": {"code": -32000, "message": "Internal error", "data": str(exc)}, + "id": req_id, + } + ) + + +@utility_router.post("/_internal/mcp/notifications/message/") +@utility_router.post("/_internal/mcp/notifications/message") +async def handle_internal_mcp_notifications_message(request: Request): + """Handle trusted MCP notifications/message requests from the local Rust runtime. + + Args: + request: Trusted internal MCP notification request. + + Returns: + Empty HTTP response acknowledging the notification. + + Raises: + HTTPException: If trusted server-scope validation fails. + """ + _build_internal_mcp_forwarded_user(request) + req_id = None + try: + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") + if body.get("method") != "notifications/message": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + await logging_service.notify( + params.get("data"), + LogLevel(params.get("level", "info")), + params.get("logger"), + ) + return Response(status_code=status.HTTP_204_NO_CONTENT) + except HTTPException: + raise + except Exception as exc: + logger.error("Internal MCP notifications/message error: %s", exc) + return ORJSONResponse( + content={ + "jsonrpc": "2.0", + "error": {"code": -32000, "message": "Internal error", "data": str(exc)}, + "id": req_id, + } + ) + + +@utility_router.post("/_internal/mcp/notifications/cancelled/") +@utility_router.post("/_internal/mcp/notifications/cancelled") +async def handle_internal_mcp_notifications_cancelled(request: Request): + """Handle trusted MCP notifications/cancelled requests from the local Rust runtime. + + Args: + request: Trusted internal MCP cancellation notification. + + Returns: + Empty HTTP response acknowledging the cancellation. + + Raises: + HTTPException: If cancellation authorization or trusted scope validation fails. + """ + user = _build_internal_mcp_forwarded_user(request) + req_id = None + try: + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") + if body.get("method") != "notifications/cancelled": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + raw_request_id = params.get("requestId") + request_id = str(raw_request_id) if raw_request_id is not None else None + reason = params.get("reason") + logger.info("Request cancelled: %s, reason: %s", request_id, reason) + if request_id is not None: + await _authorize_run_cancellation(request, user, request_id, as_jsonrpc_error=False) + await cancellation_service.cancel_run(request_id, reason=reason) + await logging_service.notify(f"Request cancelled: {request_id}", LogLevel.INFO) + return Response(status_code=status.HTTP_204_NO_CONTENT) + except HTTPException: + raise + except Exception as exc: + logger.error("Internal MCP notifications/cancelled error: %s", exc) + return ORJSONResponse( + content={ + "jsonrpc": "2.0", + "error": {"code": -32000, "message": "Internal error", "data": str(exc)}, + "id": req_id, + } + ) + + +@utility_router.post("/_internal/mcp/tools/list/") +@utility_router.post("/_internal/mcp/tools/list") +async def handle_internal_mcp_tools_list(request: Request): + """Handle trusted server-scoped tools/list requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP tools/list request. + + Returns: + MCP tools/list response payload for the requested virtual server. + + Raises: + HTTPException: If the trusted server scope is missing or invalid. + """ + server_id = request.headers.get("x-contextforge-server-id") + if not server_id: + raise HTTPException(status_code=400, detail="Missing trusted MCP server scope") + + db = SessionLocal() + try: + user = await _authorize_internal_mcp_request( + request, + db, + permission="tools.read", + method="tools/list", + server_id=server_id, + ) + user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) + if is_admin and token_teams is None: + user_email = None + token_teams = None + elif token_teams is None: + token_teams = [] + + tools = await tool_service.list_server_mcp_tool_definitions( + db, + server_id, + user_email=user_email, + token_teams=token_teams, + ) + return ORJSONResponse(content={"tools": tools}) + except HTTPException: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content={"code": exc.code, "message": exc.message, "data": exc.data}) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/list/") +@utility_router.post("/_internal/mcp/resources/list") +async def handle_internal_mcp_resources_list(request: Request): + """Handle trusted resources/list requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP resources/list request. + + Returns: + MCP resources/list response payload. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "resources/list": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + cursor = params.get("cursor") + + await _authorize_internal_mcp_request( + request, + db, + permission="resources.read", + method="resources/list", + server_id=server_id, + ) + + user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) + if is_admin and token_teams is None: + user_email = None + token_teams = None + elif token_teams is None: + token_teams = [] + + if server_id: + resources = await resource_service.list_server_resources( + db, + server_id, + user_email=user_email, + token_teams=token_teams, + ) + payload = {"resources": [r.model_dump(by_alias=True, exclude_none=True) for r in resources]} + else: + resources, next_cursor = await resource_service.list_resources( + db, + cursor=cursor, + limit=0, + user_email=user_email, + token_teams=token_teams, + ) + payload = {"resources": [r.model_dump(by_alias=True, exclude_none=True) for r in resources]} + if next_cursor: + payload["nextCursor"] = next_cursor + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/read/") +@utility_router.post("/_internal/mcp/resources/read") +async def handle_internal_mcp_resources_read(request: Request): + """Handle trusted resources/read requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP resources/read request. + + Returns: + MCP resources/read response payload. + """ + db = SessionLocal() + req_id = None + uri = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "resources/read": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + + await _authorize_internal_mcp_request( + request, + db, + permission="resources.read", + method="resources/read", + server_id=server_id, + ) + + uri = params.get("uri") + request_id = params.get("requestId") + meta_data = params.get("_meta") + if not uri: + return ORJSONResponse( + status_code=400, + content={ + "code": -32602, + "message": "Missing resource URI in parameters", + "data": params, + }, + ) + + auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) + if auth_is_admin and auth_token_teams is None: + auth_user_email = None + elif auth_token_teams is None: + auth_token_teams = [] + + plugin_context_table = getattr(request.state, "plugin_context_table", None) + plugin_global_context = getattr(request.state, "plugin_global_context", None) + result = await resource_service.read_resource( + db, + resource_uri=uri, + request_id=request_id, + user=auth_user_email, + server_id=server_id, + token_teams=auth_token_teams, + plugin_context_table=plugin_context_table, + plugin_global_context=plugin_global_context, + meta_data=meta_data, + ) + # First-Party + from mcpgateway.common.models import ResourceContent # pylint: disable=import-outside-toplevel + + if isinstance(result, ResourceContent): + normalized_content = {"uri": result.uri} + if result.mime_type: + normalized_content["mimeType"] = result.mime_type + if result.text is not None: + normalized_content["text"] = result.text + elif result.blob is not None: + normalized_content["blob"] = base64.b64encode(result.blob).decode("ascii") + payload = {"contents": [normalized_content]} + elif hasattr(result, "model_dump"): + payload = {"contents": [result.model_dump(by_alias=True, exclude_none=True)]} + else: + payload = {"contents": [result]} + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except ResourceNotFoundError as exc: + return ORJSONResponse( + status_code=404, + content={ + "code": -32002, + "message": str(exc), + "data": {"uri": uri} if uri else None, + }, + ) + except ResourceError as exc: + return ORJSONResponse( + status_code=400, + content={ + "code": -32602, + "message": str(exc), + "data": {"uri": uri} if uri else None, + }, + ) + except JSONRPCError as exc: + status_code = 403 if exc.code == -32003 else 400 + return ORJSONResponse(status_code=status_code, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/subscribe/") +@utility_router.post("/_internal/mcp/resources/subscribe") +async def handle_internal_mcp_resources_subscribe(request: Request): + """Handle trusted resources/subscribe requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP resources/subscribe request. + + Returns: + Empty JSON response confirming the subscription. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "resources/subscribe": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + await _authorize_internal_mcp_request( + request, + db, + permission="resources.read", + method="resources/subscribe", + server_id=server_id, + ) + + uri = params.get("uri") + if not uri: + return ORJSONResponse( + status_code=400, + content={ + "code": -32602, + "message": "Missing resource URI in parameters", + "data": params, + }, + ) + + access_user_email, access_token_teams = _get_scoped_resource_access_context(request, user) + user_email = get_user_email(user) + subscription = ResourceSubscription(uri=uri, subscriber_id=user_email) + await resource_service.subscribe_resource( + db, + subscription, + user_email=access_user_email, + token_teams=access_token_teams, + ) + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content={}) + except ResourceNotFoundError as exc: + return ORJSONResponse( + status_code=404, + content={"code": -32002, "message": str(exc), "data": None}, + ) + except PermissionError: + return ORJSONResponse( + status_code=403, + content={"code": -32003, "message": _ACCESS_DENIED_MSG, "data": {"method": "resources/subscribe"}}, + ) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/unsubscribe/") +@utility_router.post("/_internal/mcp/resources/unsubscribe") +async def handle_internal_mcp_resources_unsubscribe(request: Request): + """Handle trusted resources/unsubscribe requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP resources/unsubscribe request. + + Returns: + Empty JSON response confirming the unsubscription. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "resources/unsubscribe": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + await _authorize_internal_mcp_request( + request, + db, + permission="resources.read", + method="resources/unsubscribe", + server_id=server_id, + ) + + uri = params.get("uri") + if not uri: + return ORJSONResponse( + status_code=400, + content={ + "code": -32602, + "message": "Missing resource URI in parameters", + "data": params, + }, + ) + + user_email = get_user_email(user) + subscription = ResourceSubscription(uri=uri, subscriber_id=user_email) + await resource_service.unsubscribe_resource(db, subscription) + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content={}) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/templates/list/") +@utility_router.post("/_internal/mcp/resources/templates/list") +async def handle_internal_mcp_resource_templates_list(request: Request): + """Handle trusted resources/templates/list requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP resources/templates/list request. + + Returns: + MCP resources/templates/list response payload. + + Raises: + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "resources/templates/list": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + + await _authorize_internal_mcp_request( + request, + db, + permission="resources.read", + method="resources/templates/list", + server_id=server_id, + ) + + user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) + if is_admin and token_teams is None: + token_teams = None + elif token_teams is None: + token_teams = [] + + resource_templates = await resource_service.list_resource_templates( + db, + user_email=user_email, + token_teams=token_teams, + server_id=server_id, + ) + payload = {"resourceTemplates": [rt.model_dump(by_alias=True, exclude_none=True) for rt in resource_templates]} + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/roots/list/") +@utility_router.post("/_internal/mcp/roots/list") +async def handle_internal_mcp_roots_list(request: Request): + """Handle trusted roots/list requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP roots/list request. + + Returns: + MCP roots/list response payload. + + Raises: + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + db = SessionLocal() + req_id = None + try: + _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "roots/list": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + await _authorize_internal_mcp_request( + request, + db, + permission="admin.system_config", + method="roots/list", + server_id=None, + ) + roots = await root_service.list_roots() + payload = {"roots": [r.model_dump(by_alias=True, exclude_none=True) for r in roots]} + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/completion/complete/") +@utility_router.post("/_internal/mcp/completion/complete") +async def handle_internal_mcp_completion_complete(request: Request): + """Handle trusted completion/complete requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP completion/complete request. + + Returns: + MCP completion response payload. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "completion/complete": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + + await _authorize_internal_mcp_request( + request, + db, + permission="tools.read", + method="completion/complete", + server_id=server_id, + ) + + user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) + if is_admin and token_teams is None: + user_email = None + token_teams = None + elif token_teams is None: + token_teams = [] + + payload = await completion_service.handle_completion( + db, + params, + user_email=user_email, + token_teams=token_teams, + ) + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/sampling/createMessage/") +@utility_router.post("/_internal/mcp/sampling/createMessage") +async def handle_internal_mcp_sampling_create_message(request: Request): + """Handle trusted sampling/createMessage requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP sampling/createMessage request. + + Returns: + MCP sampling/createMessage response payload. + """ + db = SessionLocal() + req_id = None + try: + _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "sampling/createMessage": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + if request.headers.get("x-contextforge-mcp-runtime") == "rust": + server_id = request.headers.get("x-contextforge-server-id") + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + payload = await sampling_handler.create_message(db, params) + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/logging/setLevel/") +@utility_router.post("/_internal/mcp/logging/setLevel") +async def handle_internal_mcp_logging_set_level(request: Request): + """Handle trusted logging/setLevel requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP logging/setLevel request. + + Returns: + Empty JSON response confirming the new log level. + """ + db = SessionLocal() + req_id = None + try: + _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "logging/setLevel": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + await _authorize_internal_mcp_request( + request, + db, + permission="admin.system_config", + method="logging/setLevel", + server_id=None, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + level = LogLevel(params.get("level")) + await logging_service.set_level(level) + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content={}) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception as exc: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse(status_code=500, content={"code": -32000, "message": "Internal error", "data": str(exc)}) + finally: + db.close() + + +@utility_router.post("/_internal/mcp/prompts/list/") +@utility_router.post("/_internal/mcp/prompts/list") +async def handle_internal_mcp_prompts_list(request: Request): + """Handle trusted prompts/list requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP prompts/list request. + + Returns: + MCP prompts/list response payload. + + Raises: + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + db = SessionLocal() + req_id = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "prompts/list": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + cursor = params.get("cursor") + + await _authorize_internal_mcp_request( + request, + db, + permission="prompts.read", + method="prompts/list", + server_id=server_id, + ) + + user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) + if is_admin and token_teams is None: + user_email = None + token_teams = None + elif token_teams is None: + token_teams = [] + + if server_id: + prompts = await prompt_service.list_server_prompts( + db, + server_id, + cursor=cursor, + user_email=user_email, + token_teams=token_teams, + ) + payload = {"prompts": [p.model_dump(by_alias=True, exclude_none=True) for p in prompts]} + else: + prompts, next_cursor = await prompt_service.list_prompts( + db, + cursor=cursor, + limit=0, + user_email=user_email, + token_teams=token_teams, + ) + payload = {"prompts": [p.model_dump(by_alias=True, exclude_none=True) for p in prompts]} + if next_cursor: + payload["nextCursor"] = next_cursor + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/prompts/get/") +@utility_router.post("/_internal/mcp/prompts/get") +async def handle_internal_mcp_prompts_get(request: Request): + """Handle trusted prompts/get requests forwarded from the Rust runtime. + + Args: + request: Trusted internal MCP prompts/get request. + + Returns: + MCP prompts/get response payload. + + Raises: + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + db = SessionLocal() + req_id = None + name = None + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + req_id = body.get("id") if isinstance(body, dict) else None + if not isinstance(body, dict) or body.get("method") != "prompts/get": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": req_id, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") if request.headers.get("x-contextforge-mcp-runtime") == "rust" else None + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + else: + server_id = params.get("server_id") + + await _authorize_internal_mcp_request( + request, + db, + permission="prompts.read", + method="prompts/get", + server_id=server_id, + ) + + name = params.get("name") + arguments = params.get("arguments", {}) + meta_data = params.get("_meta") + if not name: + return ORJSONResponse( + status_code=400, + content={ + "code": -32602, + "message": "Missing prompt name in parameters", + "data": params, + }, + ) + + auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) + if auth_is_admin and auth_token_teams is None: + auth_user_email = None + elif auth_token_teams is None: + auth_token_teams = [] + + plugin_context_table = getattr(request.state, "plugin_context_table", None) + plugin_global_context = getattr(request.state, "plugin_global_context", None) + result = await prompt_service.get_prompt( + db, + name, + arguments, + user=auth_user_email, + server_id=server_id, + token_teams=auth_token_teams, + plugin_context_table=plugin_context_table, + plugin_global_context=plugin_global_context, + _meta_data=meta_data, + ) + payload = result.model_dump(by_alias=True, exclude_none=True) if hasattr(result, "model_dump") else result + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=payload) + except PromptNotFoundError as exc: + return ORJSONResponse( + status_code=404, + content={ + "code": -32002, + "message": str(exc), + "data": {"name": name} if name else None, + }, + ) + except PromptError as exc: + try: + if db.is_active and db.in_transaction() is not None: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + return ORJSONResponse( + status_code=422, + content={ + "code": -32000, + "message": str(exc), + "data": {"name": name} if name else None, + }, + ) + except JSONRPCError as exc: + status_code = 403 if exc.code == -32003 else 400 + return ORJSONResponse(status_code=status_code, content=exc.to_dict()["error"]) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/tools/list/authz/") +@utility_router.post("/_internal/mcp/tools/list/authz") +async def handle_internal_mcp_tools_list_authz(request: Request): + """Authorize trusted server-scoped tools/list requests for the Rust direct-DB path. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="tools.read", + method="tools/list", + ) + + +async def _authorize_internal_mcp_server_scoped_method( + request: Request, + *, + permission: str, + method: str, +) -> Response: + """Authorize a trusted server-scoped MCP method for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + permission: Permission required for the target method. + method: MCP method name being authorized. + + Returns: + Empty success response when the method is authorized, otherwise a JSON error response. + + Raises: + HTTPException: If the trusted server scope header is missing. + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + server_id = request.headers.get("x-contextforge-server-id") + if not server_id: + raise HTTPException(status_code=400, detail="Missing trusted MCP server scope") + + db = SessionLocal() + try: + await _authorize_internal_mcp_request( + request, + db, + permission=permission, + method=method, + server_id=server_id, + ) + if db.is_active and db.in_transaction() is not None: + db.commit() + return Response(status_code=status.HTTP_204_NO_CONTENT) + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content={"code": exc.code, "message": exc.message, "data": exc.data}) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + db.close() + + +@utility_router.post("/_internal/mcp/resources/list/authz/") +@utility_router.post("/_internal/mcp/resources/list/authz") +async def handle_internal_mcp_resources_list_authz(request: Request): + """Authorize trusted server-scoped resources/list requests for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="resources.read", + method="resources/list", + ) + + +@utility_router.post("/_internal/mcp/resources/read/authz/") +@utility_router.post("/_internal/mcp/resources/read/authz") +async def handle_internal_mcp_resources_read_authz(request: Request): + """Authorize trusted server-scoped resources/read requests for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="resources.read", + method="resources/read", + ) + + +@utility_router.post("/_internal/mcp/resources/templates/list/authz/") +@utility_router.post("/_internal/mcp/resources/templates/list/authz") +async def handle_internal_mcp_resource_templates_list_authz(request: Request): + """Authorize trusted server-scoped resources/templates/list requests for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="resources.read", + method="resources/templates/list", + ) + + +@utility_router.post("/_internal/mcp/prompts/list/authz/") +@utility_router.post("/_internal/mcp/prompts/list/authz") +async def handle_internal_mcp_prompts_list_authz(request: Request): + """Authorize trusted server-scoped prompts/list requests for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="prompts.read", + method="prompts/list", + ) + + +@utility_router.post("/_internal/mcp/prompts/get/authz/") +@utility_router.post("/_internal/mcp/prompts/get/authz") +async def handle_internal_mcp_prompts_get_authz(request: Request): + """Authorize trusted server-scoped prompts/get requests for Rust direct-path execution. + + Args: + request: Trusted internal MCP authz request. + + Returns: + Empty success response when the request is authorized. + """ + return await _authorize_internal_mcp_server_scoped_method( + request, + permission="prompts.read", + method="prompts/get", + ) + + +async def _maybe_forward_affinitized_rpc_request( + request: Request, + *, + method: str, + params: Dict[str, Any], + req_id: Any, + lowered_request_headers: Dict[str, str], +) -> Optional[Dict[str, Any]]: + """Forward an MCP request to the owning worker when session affinity requires it. + + Args: + request: Incoming RPC request. + method: MCP method name being executed. + params: Parsed JSON-RPC params payload. + req_id: JSON-RPC request identifier. + lowered_request_headers: Lower-cased request headers used for forwarding. + + Returns: + Forwarded JSON-RPC response payload when affinity forwarding handled the + request, otherwise ``None`` so local execution can continue. + """ + request_headers = request.headers + rpc_client_host = getattr(getattr(request, "client", None), "host", None) + rpc_from_loopback = rpc_client_host in ("127.0.0.1", "::1") if rpc_client_host else False + mcp_session_id = request_headers.get("mcp-session-id") or request_headers.get("x-mcp-session-id") + is_internally_forwarded = rpc_from_loopback and request_headers.get("x-forwarded-internally") == "true" + + if settings.mcpgateway_session_affinity_enabled and mcp_session_id and method != "initialize" and not is_internally_forwarded: + # First-Party + from mcpgateway.services.mcp_session_pool import MCPSessionPool, WORKER_ID # pylint: disable=import-outside-toplevel + + if not MCPSessionPool.is_valid_mcp_session_id(mcp_session_id): + logger.debug("Invalid MCP session id for affinity forwarding, executing locally") + return None + + session_short = mcp_session_id[:8] if len(mcp_session_id) >= 8 else mcp_session_id + logger.debug("[AFFINITY] Worker %s | Session %s... | Method: %s | RPC request received, checking affinity", WORKER_ID, session_short, method) + try: + # First-Party + from mcpgateway.services.mcp_session_pool import get_mcp_session_pool # pylint: disable=import-outside-toplevel + + pool = get_mcp_session_pool() + forwarded_response = await pool.forward_request_to_owner( + mcp_session_id, + {"method": method, "params": params, "headers": lowered_request_headers, "req_id": req_id}, + ) + if forwarded_response is not None: + logger.info("[AFFINITY] Worker %s | Session %s... | Method: %s | Forwarded response received", WORKER_ID, session_short, method) + if "error" in forwarded_response: + return {"jsonrpc": "2.0", "error": forwarded_response["error"], "id": req_id} + return {"jsonrpc": "2.0", "result": forwarded_response.get("result", {}), "id": req_id} + except RuntimeError: + logger.debug("[AFFINITY] Worker %s | Session %s... | Method: %s | Pool not initialized, executing locally", WORKER_ID, session_short, method) + return None + + if is_internally_forwarded and mcp_session_id: + # First-Party + from mcpgateway.services.mcp_session_pool import WORKER_ID # pylint: disable=import-outside-toplevel + + session_short = mcp_session_id[:8] if len(mcp_session_id) >= 8 else mcp_session_id + logger.debug("[AFFINITY] Worker %s | Session %s... | Method: %s | Internally forwarded request, executing locally", WORKER_ID, session_short, method) + + return None + + +async def _execute_rpc_initialize( + request: Request, + user, + *, + params: Dict[str, Any], + server_id: Optional[str], + mcp_session_id: Optional[str], +): + """Execute the MCP initialize handshake while preserving session ownership semantics. + + Args: + request: Incoming RPC request. + user: Authenticated user payload. + params: Initialize params payload. + server_id: Optional virtual server identifier. + mcp_session_id: Session id from the transport headers, when present. + + Returns: + Serialized initialize result payload. + + Raises: + JSONRPCError: If session ownership cannot be claimed or validated. + """ + init_session_id = params.get("session_id") or params.get("sessionId") or request.query_params.get("session_id") + requester_email, requester_is_admin = _get_request_identity(request, user) + + if init_session_id: + effective_owner = await session_registry.claim_session_owner(init_session_id, requester_email) + if effective_owner is None: + raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": "initialize"}) + + if effective_owner and not requester_is_admin and requester_email != effective_owner: + raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": "initialize"}) + + result = await session_registry.handle_initialize_logic(params, session_id=init_session_id, server_id=server_id) + if hasattr(result, "model_dump"): + result = result.model_dump(by_alias=True, exclude_none=True) + + if settings.mcpgateway_session_affinity_enabled and mcp_session_id and mcp_session_id != "not-provided": + try: + # First-Party + from mcpgateway.services.mcp_session_pool import get_mcp_session_pool, WORKER_ID # pylint: disable=import-outside-toplevel + + pool = get_mcp_session_pool() + await pool.register_pool_session_owner(mcp_session_id) + logger.debug("[AFFINITY_INIT] Worker %s | Session %s... | Registered ownership after initialize", WORKER_ID, mcp_session_id[:8]) + except Exception as e: + logger.warning("[AFFINITY_INIT] Failed to register session ownership: %s", e) + + return result + + +async def _execute_rpc_tools_call( + request: Request, + db: Session, + user, + *, + req_id: Any, + params: Dict[str, Any], + lowered_request_headers: Dict[str, str], + server_id: Optional[str], +): + """Execute the hot-path ``tools/call`` branch without the generic RPC method switch. + + Args: + request: Incoming RPC request. + db: Active database session. + user: Authenticated user payload. + req_id: JSON-RPC request identifier. + params: Parsed tools/call params payload. + lowered_request_headers: Lower-cased request headers used for passthrough. + server_id: Optional virtual server identifier. + + Returns: + Serialized MCP tools/call result payload. + + Raises: + JSONRPCError: If the tool name is missing, execution is cancelled, or the + downstream tool branch reports a JSON-RPC-visible failure. + """ + name = params.get("name") + arguments = params.get("arguments", {}) + meta_data = params.get("_meta", None) + if not name: + raise JSONRPCError(-32602, "Missing tool name in parameters", params) + + auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) + run_owner_email = auth_user_email + run_owner_team_ids = [] if auth_token_teams is None else list(auth_token_teams) + if auth_is_admin and auth_token_teams is None: + auth_user_email = None + elif auth_token_teams is None: + auth_token_teams = [] + + oauth_user_email = get_user_email(user) + plugin_context_table = getattr(request.state, "plugin_context_table", None) + plugin_global_context = getattr(request.state, "plugin_global_context", None) + + run_id = str(req_id) if req_id is not None else None + tool_task: Optional[asyncio.Task] = None + + async def cancel_tool_task(reason: Optional[str] = None): + """Cancel the active tool execution task when cancellation is requested. + + Args: + reason: Optional human-readable cancellation reason. + """ + if tool_task and not tool_task.done(): + logger.info("Cancelling tool task for run_id=%s, reason=%s", run_id, reason) + tool_task.cancel() + + if settings.mcpgateway_tool_cancellation_enabled and run_id: + await cancellation_service.register_run( + run_id, + name=f"tool:{name}", + cancel_callback=cancel_tool_task, + owner_email=run_owner_email, + owner_team_ids=run_owner_team_ids, + ) + + try: + if settings.mcpgateway_tool_cancellation_enabled and run_id: + run_status = await cancellation_service.get_status(run_id) + if run_status and run_status.get("cancelled"): + raise JSONRPCError(-32800, f"Tool execution cancelled: {name}", {"requestId": run_id}) + + async def execute_tool(): + """Execute the tool invocation using the existing Python service layer. + + Returns: + Result returned by the Python tool service. + + Raises: + JSONRPCError: If the requested tool cannot be found. + """ + try: + return await tool_service.invoke_tool( + db=db, + name=name, + arguments=arguments, + request_headers=lowered_request_headers, + app_user_email=oauth_user_email, + user_email=auth_user_email, + token_teams=auth_token_teams, + server_id=server_id, + plugin_context_table=plugin_context_table, + plugin_global_context=plugin_global_context, + meta_data=meta_data, + ) + except (ToolNotFoundError, ValueError): + logger.error("Tool not found: %s", name) + raise JSONRPCError(-32601, f"Tool not found: {name}", None) + + tool_task = asyncio.create_task(execute_tool()) + + if settings.mcpgateway_tool_cancellation_enabled and run_id: + run_status = await cancellation_service.get_status(run_id) + if run_status and run_status.get("cancelled"): + tool_task.cancel() + + try: + result = await tool_task + if hasattr(result, "model_dump"): + result = result.model_dump(by_alias=True, exclude_none=True) + return result + except asyncio.CancelledError as exc: + logger.info("Tool execution cancelled for run_id=%s, tool=%s", run_id, name) + raise JSONRPCError(-32800, f"Tool execution cancelled: {name}", {"requestId": run_id, "partial": False}) from exc + finally: + if settings.mcpgateway_tool_cancellation_enabled and run_id: + await cancellation_service.unregister_run(run_id) + + +@utility_router.post("/_internal/mcp/tools/call/") +@utility_router.post("/_internal/mcp/tools/call") +async def handle_internal_mcp_tools_call(request: Request): + """Handle trusted tools/call requests forwarded from the local Rust runtime. + + Args: + request: Trusted internal MCP tools/call request. + + Returns: + JSON-RPC response payload for the tools/call request. + + Raises: + PluginError: Re-raised so plugin middleware can preserve existing behavior. + PluginViolationError: Re-raised so plugin middleware can preserve existing behavior. + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + req_id = None + db = SessionLocal() + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + if not isinstance(body, dict) or body.get("method") != "tools/call": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": body.get("id") if isinstance(body, dict) else None, + }, + ) + + req_id = body.get("id") + if req_id is None: + req_id = str(uuid.uuid4()) + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + server_id = request.headers.get("x-contextforge-server-id") or params.get("server_id") + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + lowered_request_headers = {k.lower(): v for k, v in request.headers.items()} + forwarded_response = await _maybe_forward_affinitized_rpc_request( + request, + method="tools/call", + params=params, + req_id=req_id, + lowered_request_headers=lowered_request_headers, + ) + if forwarded_response is not None: + return forwarded_response + + if (_get_internal_mcp_auth_context(request) or {}).get("is_authenticated", True) is True: + await _ensure_rpc_permission(user, db, "tools.execute", "tools/call", request=request) + + try: + result = await _execute_rpc_tools_call( + request, + db, + user, + req_id=req_id, + params=params, + lowered_request_headers=lowered_request_headers, + server_id=server_id, + ) + finally: + if db.is_active and db.in_transaction() is not None: + db.commit() + db.close() + + return {"jsonrpc": "2.0", "result": result, "id": req_id} + except (PluginError, PluginViolationError): + raise + except JSONRPCError as e: + error = e.to_dict() + return {"jsonrpc": "2.0", "error": error["error"], "id": req_id} + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + try: + db.close() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + + +@utility_router.post("/_internal/mcp/tools/call/resolve/") +@utility_router.post("/_internal/mcp/tools/call/resolve") +async def handle_internal_mcp_tools_call_resolve(request: Request): + """Resolve a Rust-direct MCP tools/call execution plan without executing the tool. + + Args: + request: Trusted internal MCP tools/call resolve request. + + Returns: + JSON response containing either an execution plan or a JSON-RPC-visible error. + + Raises: + PluginError: Re-raised so plugin middleware can preserve existing behavior. + PluginViolationError: Re-raised so plugin middleware can preserve existing behavior. + Exception: Propagated after best-effort rollback when unexpected failures occur. + """ + db = SessionLocal() + try: + user = _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32700, "message": "Parse error"}, + "id": None, + }, + ) + + if not isinstance(body, dict) or body.get("method") != "tools/call": + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32600, "message": "Invalid Request"}, + "id": body.get("id") if isinstance(body, dict) else None, + }, + ) + + params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + + name = params.get("name") + if not name: + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32602, "message": "Missing tool name in parameters"}, + "id": body.get("id"), + }, + ) + + server_id = request.headers.get("x-contextforge-server-id") or params.get("server_id") + if server_id: + _enforce_internal_mcp_server_scope(request, server_id) + + if (_get_internal_mcp_auth_context(request) or {}).get("is_authenticated", True) is True: + await _ensure_rpc_permission(user, db, "tools.execute", "tools/call", request=request) + + auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) + if auth_is_admin and auth_token_teams is None: + auth_user_email = None + elif auth_token_teams is None: + auth_token_teams = [] + + plan = await tool_service.prepare_rust_mcp_tool_execution( + db=db, + name=name, + request_headers={k.lower(): v for k, v in request.headers.items()}, + app_user_email=get_user_email(user), + user_email=auth_user_email, + token_teams=auth_token_teams, + server_id=server_id, + ) + + if db.is_active and db.in_transaction() is not None: + db.commit() + return ORJSONResponse(content=plan) + except ToolNotFoundError as exc: + request_id = body.get("id") if isinstance(body, dict) else None + return ORJSONResponse( + status_code=404, + content={ + "jsonrpc": "2.0", + "error": {"code": -32601, "message": str(exc)}, + "id": request_id, + }, + ) + except ToolError as exc: + request_id = body.get("id") if isinstance(body, dict) else None + return ORJSONResponse( + status_code=400, + content={ + "jsonrpc": "2.0", + "error": {"code": -32000, "message": str(exc)}, + "id": request_id, + }, + ) + except (PluginError, PluginViolationError): + raise + except JSONRPCError as exc: + return ORJSONResponse(status_code=403, content=exc.to_dict()["error"]) + except Exception: + try: + db.rollback() + except Exception: + try: + db.invalidate() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + raise + finally: + try: + db.close() + except Exception: + pass # nosec B110 - Best effort cleanup on connection failure + + +@utility_router.post("/_internal/mcp/tools/call/metric/") +@utility_router.post("/_internal/mcp/tools/call/metric") +async def handle_internal_mcp_tools_call_metric(request: Request): + """Record buffered tool/server metrics for a Rust-direct `tools/call`. + + Args: + request: Trusted internal metrics writeback request. + + Returns: + ORJSONResponse acknowledging the buffered metric writeback. + """ + _build_internal_mcp_forwarded_user(request) + try: + body = orjson.loads(await request.body()) + except orjson.JSONDecodeError: + return ORJSONResponse(status_code=400, content={"detail": "Invalid JSON body"}) + + if not isinstance(body, dict): + return ORJSONResponse(status_code=400, content={"detail": "Invalid metrics payload"}) + + tool_id = body.get("toolId") + duration_ms = body.get("durationMs") + success = body.get("success") + server_id = body.get("serverId") + error_message = body.get("errorMessage") + + if not isinstance(tool_id, str) or not tool_id.strip(): + return ORJSONResponse(status_code=400, content={"detail": "Missing toolId"}) + if not isinstance(duration_ms, (int, float)) or duration_ms < 0: + return ORJSONResponse(status_code=400, content={"detail": "Invalid durationMs"}) + if not isinstance(success, bool): + return ORJSONResponse(status_code=400, content={"detail": "Invalid success flag"}) + if server_id is not None and (not isinstance(server_id, str) or not server_id.strip()): + return ORJSONResponse(status_code=400, content={"detail": "Invalid serverId"}) + if error_message is not None and not isinstance(error_message, str): + return ORJSONResponse(status_code=400, content={"detail": "Invalid errorMessage"}) + + request_server_id = request.headers.get("x-contextforge-server-id") + if request_server_id: + _enforce_internal_mcp_server_scope(request, request_server_id) + if server_id and server_id != request_server_id: + return ORJSONResponse(status_code=400, content={"detail": "serverId does not match forwarded server scope"}) + server_id = request_server_id + + # First-Party + from mcpgateway.services.metrics_buffer_service import get_metrics_buffer_service # pylint: disable=import-outside-toplevel + + metrics_buffer = get_metrics_buffer_service() + response_time = float(duration_ms) / 1000.0 + metrics_buffer.record_tool_metric_with_duration( + tool_id=tool_id, + response_time=response_time, + success=success, + error_message=error_message, + ) + if server_id: + metrics_buffer.record_server_metric_with_duration( + server_id=server_id, + response_time=response_time, + success=success, + error_message=error_message, + ) + + return ORJSONResponse(content={"status": "ok"}) + + +async def _handle_rpc_authenticated(request: Request, db: Session, user): + """Handle RPC requests. + + Args: + request (Request): The incoming FastAPI request. + db (Session): Database session. + user: The authenticated user (dict with RBAC context). + + Returns: + Response with the RPC result or error. + + Raises: PluginError: If encounters issue with plugin PluginViolationError: If plugin violated the request. Example - In case of OPA plugin, if the request is denied by policy. """ @@ -6064,13 +8891,35 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen "id": None, }, ) + request_headers = request.headers + lowered_headers: Optional[Dict[str, str]] = None + + def _lowered_request_headers() -> Dict[str, str]: + """Return a cached lower-cased copy of the incoming request headers. + + Returns: + Dict[str, str]: Lower-cased request headers cached for repeated access. + """ + nonlocal lowered_headers + if lowered_headers is None: + lowered_headers = {k.lower(): v for k, v in request_headers.items()} + return lowered_headers + + _trusted_internal_mcp_dispatch = _get_internal_mcp_auth_context(request) is not None + _internal_runtime_server_id = request_headers.get("x-contextforge-server-id") if request_headers.get("x-contextforge-mcp-runtime") == "rust" else None + method = body["method"] req_id = body.get("id") if req_id is None: req_id = str(uuid.uuid4()) params = body.get("params", {}) + if not isinstance(params, dict): + params = {} + if _internal_runtime_server_id: + params["server_id"] = _internal_runtime_server_id server_id = params.get("server_id", None) cursor = params.get("cursor") # Extract cursor parameter + mcp_session_id = request_headers.get("mcp-session-id") or request_headers.get("x-mcp-session-id") # RBAC: Enforce server_id scoping for server-scoped tokens. # Extract token scopes once, then: @@ -6081,6 +8930,11 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen _cached = getattr(request.state, "_jwt_verified_payload", None) _jwt_payload = _cached[1] if (isinstance(_cached, tuple) and len(_cached) == 2 and isinstance(_cached[1], dict)) else None _token_scopes = _jwt_payload.get("scopes", {}) if _jwt_payload else {} + _internal_auth_context = _get_internal_mcp_auth_context(request) + if (not _token_scopes) and isinstance(_internal_auth_context, dict): + _scoped_server_id = _internal_auth_context.get("scoped_server_id") + if isinstance(_scoped_server_id, str) and _scoped_server_id: + _token_scopes = {"server_id": _scoped_server_id} _token_server_id = _token_scopes.get("server_id") if _token_scopes else None if server_id: @@ -6096,91 +8950,27 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen elif _token_server_id is not None: server_id = _token_server_id - RPCRequest(jsonrpc="2.0", method=method, params=params) # Validate the request body against the RPCRequest model - - # Multi-worker session affinity: check if we should forward to another worker - # This applies to ALL methods (except initialize which creates new sessions) - # The x-forwarded-internally header marks requests that have already been forwarded - # to prevent infinite forwarding loops - headers = {k.lower(): v for k, v in request.headers.items()} - # Session ID can come from two sources: - # 1. MCP-Session-Id (mcp-session-id) - MCP protocol header from Streamable HTTP clients - # 2. x-mcp-session-id - our internal header from SSE session_registry calls - mcp_session_id = headers.get("mcp-session-id") or headers.get("x-mcp-session-id") - # Only trust x-forwarded-internally from loopback to prevent external spoofing - _rpc_client_host = request.client.host if request.client else None - _rpc_from_loopback = _rpc_client_host in ("127.0.0.1", "::1") if _rpc_client_host else False - is_internally_forwarded = _rpc_from_loopback and headers.get("x-forwarded-internally") == "true" - - if settings.mcpgateway_session_affinity_enabled and mcp_session_id and method != "initialize" and not is_internally_forwarded: - # First-Party - from mcpgateway.services.mcp_session_pool import MCPSessionPool, WORKER_ID # pylint: disable=import-outside-toplevel - - if not MCPSessionPool.is_valid_mcp_session_id(mcp_session_id): - logger.debug("Invalid MCP session id for affinity forwarding, executing locally") - else: - session_short = mcp_session_id[:8] if len(mcp_session_id) >= 8 else mcp_session_id - logger.debug(f"[AFFINITY] Worker {WORKER_ID} | Session {session_short}... | Method: {method} | RPC request received, checking affinity") - try: - # First-Party - from mcpgateway.services.mcp_session_pool import get_mcp_session_pool # pylint: disable=import-outside-toplevel - - pool = get_mcp_session_pool() - forwarded_response = await pool.forward_request_to_owner( - mcp_session_id, - {"method": method, "params": params, "headers": dict(headers), "req_id": req_id}, - ) - if forwarded_response is not None: - # Request was handled by another worker - logger.info(f"[AFFINITY] Worker {WORKER_ID} | Session {session_short}... | Method: {method} | Forwarded response received") - if "error" in forwarded_response: - raise JSONRPCError( - forwarded_response["error"].get("code", -32603), - forwarded_response["error"].get("message", "Forwarded request failed"), - ) - result = forwarded_response.get("result", {}) - return {"jsonrpc": "2.0", "result": result, "id": req_id} - except RuntimeError: - # Pool not initialized - execute locally - logger.debug(f"[AFFINITY] Worker {WORKER_ID} | Session {session_short}... | Method: {method} | Pool not initialized, executing locally") - elif is_internally_forwarded and mcp_session_id: - # First-Party - from mcpgateway.services.mcp_session_pool import WORKER_ID # pylint: disable=import-outside-toplevel + if not _trusted_internal_mcp_dispatch: + RPCRequest(jsonrpc="2.0", method=method, params=params) # Validate the request body against the RPCRequest model - session_short = mcp_session_id[:8] if len(mcp_session_id) >= 8 else mcp_session_id - logger.debug(f"[AFFINITY] Worker {WORKER_ID} | Session {session_short}... | Method: {method} | Internally forwarded request, executing locally") + forwarded_response = await _maybe_forward_affinitized_rpc_request( + request, + method=method, + params=params, + req_id=req_id, + lowered_request_headers=_lowered_request_headers(), + ) + if forwarded_response is not None: + return forwarded_response if method == "initialize": - # Extract session_id from params or query string (for capability tracking) - init_session_id = params.get("session_id") or params.get("sessionId") or request.query_params.get("session_id") - requester_email, requester_is_admin = _get_request_identity(request, user) - - if init_session_id: - effective_owner = await session_registry.claim_session_owner(init_session_id, requester_email) - if effective_owner is None: - raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": method}) - - if effective_owner and not requester_is_admin and requester_email != effective_owner: - raise JSONRPCError(-32003, _ACCESS_DENIED_MSG, {"method": method}) - - # Pass server_id to advertise OAuth capability if configured per RFC 9728 - result = await session_registry.handle_initialize_logic(body.get("params", {}), session_id=init_session_id, server_id=server_id) - if hasattr(result, "model_dump"): - result = result.model_dump(by_alias=True, exclude_none=True) - - # Register session ownership in Redis for multi-worker affinity - # This must happen AFTER initialize succeeds so subsequent requests route to this worker - if settings.mcpgateway_session_affinity_enabled and mcp_session_id and mcp_session_id != "not-provided": - try: - # First-Party - from mcpgateway.services.mcp_session_pool import get_mcp_session_pool, WORKER_ID # pylint: disable=import-outside-toplevel - - pool = get_mcp_session_pool() - # Claim-or-refresh ownership for this session (does not steal). - await pool.register_pool_session_owner(mcp_session_id) - logger.debug(f"[AFFINITY_INIT] Worker {WORKER_ID} | Session {mcp_session_id[:8]}... | Registered ownership after initialize") - except Exception as e: - logger.warning(f"[AFFINITY_INIT] Failed to register session ownership: {e}") + result = await _execute_rpc_initialize( + request, + user, + params=params, + server_id=server_id, + mcp_session_id=mcp_session_id, + ) elif method == "tools/list": await _ensure_rpc_permission(user, db, "tools.read", method, request=request) user_email, token_teams, is_admin = _get_rpc_filter_context(request, user) @@ -6206,7 +8996,7 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen # Release DB connection early to prevent idle-in-transaction under load db.commit() db.close() - result = {"tools": [t.model_dump(by_alias=True, exclude_none=True) for t in tools]} + result = {"tools": _serialize_mcp_tool_definitions(tools)} else: tools, next_cursor = await tool_service.list_tools( db, @@ -6221,7 +9011,7 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen # Release DB connection early to prevent idle-in-transaction under load db.commit() db.close() - result = {"tools": [t.model_dump(by_alias=True, exclude_none=True) for t in tools]} + result = {"tools": _serialize_mcp_tool_definitions(tools)} if next_cursor: result["nextCursor"] = next_cursor elif method == "list_tools": # Legacy endpoint @@ -6249,7 +9039,7 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen ) db.commit() db.close() - result = {"tools": [t.model_dump(by_alias=True, exclude_none=True) for t in tools]} + result = {"tools": _serialize_legacy_tool_payloads(tools)} else: tools, next_cursor = await tool_service.list_tools( db, @@ -6263,7 +9053,7 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen ) db.commit() db.close() - result = {"tools": [t.model_dump(by_alias=True, exclude_none=True) for t in tools]} + result = {"tools": _serialize_legacy_tool_payloads(tools)} if next_cursor: result["nextCursor"] = next_cursor elif method == "list_gateways": @@ -6345,7 +9135,7 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen result = {"contents": [result]} except (ValueError, ResourceNotFoundError): # Resource not found in the gateway - logger.error(f"Resource not found: {uri}") + logger.error("Resource not found: %s", uri) raise JSONRPCError(-32002, f"Resource not found: {uri}", {"uri": uri}) # Release transaction after resources/read completes db.commit() @@ -6443,109 +9233,17 @@ async def handle_rpc(request: Request, db: Session = Depends(get_db), user=Depen await _ensure_rpc_permission(user, db, "tools.execute", method, request=request) # Note: Multi-worker session affinity forwarding is handled earlier # (before method routing) to apply to ALL methods, not just tools/call - name = params.get("name") - arguments = params.get("arguments", {}) - meta_data = params.get("_meta", None) - if not name: - raise JSONRPCError(-32602, "Missing tool name in parameters", params) - - # Get authorization context (same as tools/list) - auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) - run_owner_email = auth_user_email - run_owner_team_ids = [] if auth_token_teams is None else list(auth_token_teams) - if auth_is_admin and auth_token_teams is None: - auth_user_email = None - # auth_token_teams stays None (unrestricted) - elif auth_token_teams is None: - auth_token_teams = [] # Non-admin without teams = public-only - - # Get user email for OAuth token selection - oauth_user_email = get_user_email(user) - # Get plugin contexts from request.state for cross-hook sharing - plugin_context_table = getattr(request.state, "plugin_context_table", None) - plugin_global_context = getattr(request.state, "plugin_global_context", None) - - # Register the tool execution for cancellation tracking with task reference (if enabled) - # Note: req_id can be 0 which is falsy but valid per JSON-RPC spec, so use 'is not None' - run_id = str(req_id) if req_id is not None else None - tool_task: Optional[asyncio.Task] = None - - async def cancel_tool_task(reason: Optional[str] = None): - """Cancel callback that actually cancels the asyncio task. - - Args: - reason: Optional reason for cancellation. - """ - if tool_task and not tool_task.done(): - logger.info(f"Cancelling tool task for run_id={run_id}, reason={reason}") - tool_task.cancel() - - if settings.mcpgateway_tool_cancellation_enabled and run_id: - await cancellation_service.register_run( - run_id, - name=f"tool:{name}", - cancel_callback=cancel_tool_task, - owner_email=run_owner_email, - owner_team_ids=run_owner_team_ids, - ) - try: - # Check if cancelled before execution (only if feature enabled) - if settings.mcpgateway_tool_cancellation_enabled and run_id: - run_status = await cancellation_service.get_status(run_id) - if run_status and run_status.get("cancelled"): - raise JSONRPCError(-32800, f"Tool execution cancelled: {name}", {"requestId": run_id}) - - # Create task for tool execution to enable real cancellation - async def execute_tool(): - """Execute tool invocation with fallback to gateway forwarding. - - Returns: - The tool invocation result or gateway forwarding result. - - Raises: - JSONRPCError: If the tool is not found. - """ - try: - return await tool_service.invoke_tool( - db=db, - name=name, - arguments=arguments, - request_headers=headers, - app_user_email=oauth_user_email, - user_email=auth_user_email, - token_teams=auth_token_teams, - server_id=server_id, - plugin_context_table=plugin_context_table, - plugin_global_context=plugin_global_context, - meta_data=meta_data, - ) - except ValueError: - # Tool not found log error and raise JSONRPCError - logger.error(f"Tool not found: {name}") - raise JSONRPCError(-32601, f"Tool not found: {name}", None) - - tool_task = asyncio.create_task(execute_tool()) - - # Re-check cancellation after task creation to handle race condition - # where cancel arrived between pre-check and task creation (callback saw tool_task=None) - if settings.mcpgateway_tool_cancellation_enabled and run_id: - run_status = await cancellation_service.get_status(run_id) - if run_status and run_status.get("cancelled"): - tool_task.cancel() - - try: - result = await tool_task - if hasattr(result, "model_dump"): - result = result.model_dump(by_alias=True, exclude_none=True) - except asyncio.CancelledError: - # Task was cancelled - return partial result or error - logger.info(f"Tool execution cancelled for run_id={run_id}, tool={name}") - raise JSONRPCError(-32800, f"Tool execution cancelled: {name}", {"requestId": run_id, "partial": False}) + result = await _execute_rpc_tools_call( + request, + db, + user, + req_id=req_id, + params=params, + lowered_request_headers=_lowered_request_headers(), + server_id=server_id, + ) finally: - # Unregister the run when done (only if feature enabled) - if settings.mcpgateway_tool_cancellation_enabled and run_id: - await cancellation_service.unregister_run(run_id) # Release transaction after tools/call completes db.commit() db.close() @@ -6590,7 +9288,7 @@ async def execute_tool(): raw_request_id = params.get("requestId") request_id = str(raw_request_id) if raw_request_id is not None else None reason = params.get("reason") - logger.info(f"Request cancelled: {request_id}, reason: {reason}") + logger.info("Request cancelled: %s, reason: %s", request_id, reason) # Attempt local cancellation per MCP spec if request_id is not None: await _authorize_run_cancellation(request, user, request_id, as_jsonrpc_error=True) @@ -6640,7 +9338,7 @@ async def execute_tool(): if not capable_sessions: raise JSONRPCError(-32000, "No elicitation-capable clients available", {"message": elicit_params.message}) target_session_id = capable_sessions[0] - logger.debug(f"Selected session {target_session_id} for elicitation") + logger.debug("Selected session %s for elicitation", target_session_id) # Verify session has elicitation capability if not await session_registry.has_elicitation_capability(target_session_id): @@ -6683,7 +9381,7 @@ async def execute_tool(): } await session_registry.broadcast(target_session_id, elicitation_request) - logger.debug(f"Sent elicitation request {pending.request_id} to session {target_session_id}") + logger.debug("Sent elicitation request %s to session %s", pending.request_id, target_session_id) # Wait for response elicit_result = await elicitation_task @@ -6711,9 +9409,7 @@ async def execute_tool(): # Catch-all for other completion/* methods (currently unsupported) result = {} elif method == "logging/setLevel": - # MCP logging/setLevel is a standard MCP capability invoked by clients during - # initialization; servers.use (not admin.system_config) keeps the handshake working. - await _ensure_rpc_permission(user, db, "servers.use", method, request=request) + await _ensure_rpc_permission(user, db, "admin.system_config", method, request=request) level = LogLevel(params.get("level")) await logging_service.set_level(level) result = {} @@ -6724,8 +9420,6 @@ async def execute_tool(): # Backward compatibility: Try to invoke as a tool directly # This allows both old format (method=tool_name) and new format (method=tools/call) await _ensure_rpc_permission(user, db, "tools.execute", method, request=request) - # Standard - headers = {k.lower(): v for k, v in request.headers.items()} # Get authorization context (same as tools/call) auth_user_email, auth_token_teams, auth_is_admin = _get_rpc_filter_context(request, user) @@ -6750,7 +9444,7 @@ async def execute_tool(): db=db, name=method, arguments=params, - request_headers=headers, + request_headers=_lowered_request_headers(), app_user_email=oauth_user_email, user_email=auth_user_email, token_teams=auth_token_teams, @@ -6765,7 +9459,7 @@ async def execute_tool(): raise except Exception: # Log error and return invalid method - logger.error(f"Method not found: {method}") + logger.error("Method not found: %s", method) raise JSONRPCError(-32000, "Invalid method", params) return {"jsonrpc": "2.0", "result": result, "id": req_id} @@ -7189,7 +9883,7 @@ async def reset_metrics(entity: Optional[str] = None, entity_id: Optional[int] = # Healthcheck # #################### @app.get("/health") -def healthcheck(): +def healthcheck(response: Response = None): """ Perform a basic health check to verify database connectivity. @@ -7197,6 +9891,9 @@ def healthcheck(): Uses a dedicated session to avoid cross-thread issues and double-commit from get_db dependency. All DB operations happen in the same thread. + Args: + response: Optional response object used to attach runtime-mode headers. + Returns: A dictionary with the health status and optional error message. """ @@ -7205,7 +9902,9 @@ def healthcheck(): db.execute(text("SELECT 1")) # Explicitly commit to release PgBouncer backend connection in transaction mode. db.commit() - return {"status": "healthy"} + if response is not None: + _apply_runtime_mode_headers(response) + return {"status": "healthy", "mcp_runtime": _mcp_runtime_status_payload()} except Exception as e: # Rollback, then invalidate if rollback fails (mirrors get_db cleanup). try: @@ -7217,7 +9916,9 @@ def healthcheck(): pass # nosec B110 - Best effort cleanup on connection failure error_message = f"Database connection error: {str(e)}" logger.error(error_message) - return {"status": "unhealthy", "error": error_message} + if response is not None: + _apply_runtime_mode_headers(response) + return {"status": "unhealthy", "error": error_message, "mcp_runtime": _mcp_runtime_status_payload()} finally: db.close() @@ -7266,8 +9967,12 @@ def _check_db() -> str | None: if error: error_message = f"Readiness check failed: {error}" logger.error(error_message) - return ORJSONResponse(content={"status": "not ready", "error": error_message}, status_code=503) - return ORJSONResponse(content={"status": "ready"}, status_code=200) + response = ORJSONResponse(content={"status": "not ready", "error": error_message, "mcp_runtime": _mcp_runtime_status_payload()}, status_code=503) + _apply_runtime_mode_headers(response) + return response + response = ORJSONResponse(content={"status": "ready", "mcp_runtime": _mcp_runtime_status_payload()}, status_code=200) + _apply_runtime_mode_headers(response) + return response @app.get("/health/security", tags=["health"]) @@ -7921,8 +10626,163 @@ async def cleanup_import_statuses(max_age_hours: int = 24, user=Depends(get_curr else: logger.warning("Admin API routes not mounted - Admin API disabled via MCPGATEWAY_ADMIN_API_ENABLED=False") + +class MCPRuntimeHeaderTransportWrapper: + """Annotate Python-owned MCP transport responses with the active runtime marker.""" + + def __init__(self, transport_app, *, runtime_name: str) -> None: + """Wrap an MCP transport app and stamp a runtime header on responses. + + Args: + transport_app: Underlying MCP transport app. + runtime_name: Runtime label to expose via response headers. + """ + self.transport_app = transport_app + self.runtime_name = runtime_name.encode("ascii") + + async def handle_streamable_http(self, scope, receive, send): + """Forward an MCP request while ensuring the runtime marker header is present. + + Args: + scope: Incoming ASGI scope. + receive: ASGI receive callable. + send: ASGI send callable. + """ + + async def _send_with_runtime_header(message): + """Attach MCP runtime mode headers before sending the ASGI event downstream. + + Args: + message: Outgoing ASGI message emitted by the wrapped application. + """ + if message.get("type") == "http.response.start": + headers = list(message.get("headers") or []) + if not any(isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-runtime" for item in headers): + headers.append((b"x-contextforge-mcp-runtime", self.runtime_name)) + if not any( + isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-session-core" for item in headers + ): + headers.append((b"x-contextforge-mcp-session-core", _current_mcp_session_core_mode().encode("ascii"))) + if not any(isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-resume-core" for item in headers): + headers.append((b"x-contextforge-mcp-resume-core", _current_mcp_resume_core_mode().encode("ascii"))) + if not any( + isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-live-stream-core" for item in headers + ): + headers.append((b"x-contextforge-mcp-live-stream-core", _current_mcp_live_stream_core_mode().encode("ascii"))) + if not any( + isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-affinity-core" for item in headers + ): + headers.append((b"x-contextforge-mcp-affinity-core", _current_mcp_affinity_core_mode().encode("ascii"))) + if not any( + isinstance(item, (tuple, list)) and len(item) == 2 and isinstance(item[0], (bytes, bytearray)) and item[0].lower() == b"x-contextforge-mcp-session-auth-reuse" for item in headers + ): + headers.append((b"x-contextforge-mcp-session-auth-reuse", _current_mcp_session_auth_reuse_mode().encode("ascii"))) + message = dict(message) + message["headers"] = headers + await send(message) + + await self.transport_app.handle_streamable_http(scope, receive, _send_with_runtime_header) + + +def _build_mcp_transport_app(): + """Choose the MCP transport app for the mounted /mcp path. + + Returns: + Transport app object that should be mounted at the public ``/mcp`` path. + """ + if _should_mount_public_rust_transport(): + logger.warning( + "MCP runtime mode: %s. GET/POST/DELETE /mcp requests will be proxied to %s. MCP session core mode: %s. MCP replay/resume core mode: %s. MCP live stream core mode: %s. MCP affinity core mode: %s. MCP session auth reuse mode: %s.", + _current_mcp_runtime_mode(), + settings.experimental_rust_mcp_runtime_uds or settings.experimental_rust_mcp_runtime_url, + _current_mcp_session_core_mode(), + _current_mcp_resume_core_mode(), + _current_mcp_live_stream_core_mode(), + _current_mcp_affinity_core_mode(), + _current_mcp_session_auth_reuse_mode(), + ) + return RustMCPRuntimeProxy(streamable_http_session.handle_streamable_http) + + if settings.experimental_rust_mcp_runtime_enabled: + logger.warning( + "MCP runtime mode: %s. Rust sidecar remains enabled, but public /mcp stays on the Python transport because MCP session auth reuse is disabled. MCP session core mode: %s. MCP replay/resume core mode: %s. MCP live stream core mode: %s. MCP affinity core mode: %s. MCP session auth reuse mode: %s.", + _current_mcp_runtime_mode(), + _current_mcp_session_core_mode(), + _current_mcp_resume_core_mode(), + _current_mcp_live_stream_core_mode(), + _current_mcp_affinity_core_mode(), + _current_mcp_session_auth_reuse_mode(), + ) + return MCPRuntimeHeaderTransportWrapper(streamable_http_session, runtime_name="python") + + if _rust_build_included(): + logger.warning( + "MCP runtime mode: %s. Rust MCP artifacts are present in this image, but EXPERIMENTAL_RUST_MCP_RUNTIME_ENABLED=false so /mcp remains on the Python transport. Set RUST_MCP_MODE=edge or RUST_MCP_MODE=full to activate the Rust runtime with the simple env flow.", + _current_mcp_runtime_mode(), + ) + else: + logger.info("MCP runtime mode: %s. /mcp is mounted on the Python transport.", _current_mcp_runtime_mode()) + + return MCPRuntimeHeaderTransportWrapper(streamable_http_session, runtime_name="python") + + +class InternalTrustedMCPTransportBridge: + """Trusted internal bridge from Rust MCP transport requests to the Python session manager.""" + + def __init__(self, transport_app) -> None: + """Store the underlying Python transport app used for trusted forwarding. + + Args: + transport_app: Python transport app that ultimately owns session handling. + """ + self.transport_app = transport_app + + async def handle_streamable_http(self, scope, receive, send): + """Translate trusted Rust transport requests into Python session-manager calls. + + Args: + scope: Incoming ASGI scope. + receive: ASGI receive callable. + send: ASGI send callable. + """ + if scope.get("type") != "http": + response = ORJSONResponse(status_code=404, content={"detail": "Not found"}) + await response(scope, receive, send) + return + + method = str(scope.get("method", "GET")).upper() + if method not in {"GET", "POST", "DELETE"}: + response = ORJSONResponse(status_code=405, content={"detail": "Method not allowed"}) + await response(scope, receive, send) + return + + request = Request(scope, receive=receive) + try: + _build_internal_mcp_forwarded_user(request) + except HTTPException as exc: + response = ORJSONResponse(status_code=exc.status_code, content={"detail": exc.detail}) + await response(scope, receive, send) + return + + auth_context = _get_internal_mcp_auth_context(request) or {} + server_id = request.headers.get("x-contextforge-server-id") + forwarded_scope = dict(scope) + forwarded_scope["path"] = "/mcp/" + forwarded_scope["modified_path"] = f"/servers/{server_id}/mcp" if server_id else "/mcp/" + + token = user_context_var.set(auth_context) + try: + await self.transport_app.handle_streamable_http(forwarded_scope, receive, send) + finally: + user_context_var.reset(token) + + +mcp_transport_app = _build_mcp_transport_app() +internal_trusted_mcp_transport = InternalTrustedMCPTransportBridge(streamable_http_session) + # Streamable http Mount -app.mount("/mcp", app=streamable_http_session.handle_streamable_http) +app.mount("/mcp", app=mcp_transport_app.handle_streamable_http) +app.mount("/_internal/mcp/transport", app=internal_trusted_mcp_transport.handle_streamable_http) # Conditional static files mounting and root redirect if UI_ENABLED: diff --git a/mcpgateway/middleware/token_scoping.py b/mcpgateway/middleware/token_scoping.py index 6f57f9c9b7..f8835997d8 100644 --- a/mcpgateway/middleware/token_scoping.py +++ b/mcpgateway/middleware/token_scoping.py @@ -13,6 +13,8 @@ # Standard from datetime import datetime, timedelta, timezone from functools import lru_cache +import hashlib +import hmac import ipaddress import re from typing import List, Optional, Pattern, Tuple @@ -59,6 +61,11 @@ (re.compile(r"/gateways/?([a-f0-9\-]+)"), "gateway"), ] _AUTH_COOKIE_NAMES = ("jwt_token", "access_token") +_INTERNAL_MCP_PATH_PREFIX = "/_internal/mcp" +_INTERNAL_MCP_RUNTIME_HEADER = "x-contextforge-mcp-runtime" +_INTERNAL_MCP_AUTH_CONTEXT_HEADER = "x-contextforge-auth-context" +_INTERNAL_MCP_RUNTIME_AUTH_HEADER = "x-contextforge-mcp-runtime-auth" +_INTERNAL_MCP_RUNTIME_AUTH_CONTEXT = "contextforge-internal-mcp-runtime-v1" # Permission map with precompiled patterns # Maps (HTTP method, path pattern) to required permission @@ -1210,6 +1217,13 @@ async def __call__(self, request: Request, call_next): if normalized_path == "/": return await call_next(request) + # Trusted internal Rust -> Python MCP dispatch already carries a + # normalized auth context and is re-authorized by the internal MCP + # handlers. Re-applying token-scoping path checks here would reject + # the private /_internal/mcp/* hop for scoped tokens. + if self._is_trusted_internal_mcp_runtime_request(request, normalized_path): + return await call_next(request) + if any(normalized_path.startswith(path) for path in skip_paths): return await call_next(request) @@ -1328,6 +1342,74 @@ async def __call__(self, request: Request, call_next): content={"detail": exc.detail}, ) + def _is_trusted_internal_mcp_runtime_request(self, request: Request, normalized_path: str) -> bool: + """Return whether the request is a trusted loopback Rust MCP sidecar hop. + + Args: + request: Incoming HTTP request. + normalized_path: Canonicalized request path used for route matching. + + Returns: + ``True`` when the request originated from the local Rust MCP runtime and + includes the expected trusted headers. + """ + if normalized_path != _INTERNAL_MCP_PATH_PREFIX and not normalized_path.startswith(f"{_INTERNAL_MCP_PATH_PREFIX}/"): + return False + + if request.headers.get(_INTERNAL_MCP_RUNTIME_HEADER) != "rust": + return False + + provided_auth = request.headers.get(_INTERNAL_MCP_RUNTIME_AUTH_HEADER) + if not provided_auth: + return False + + expected_auth = self._expected_internal_mcp_runtime_auth_header() + if not hmac.compare_digest(provided_auth, expected_auth): + return False + + if not request.headers.get(_INTERNAL_MCP_AUTH_CONTEXT_HEADER): + return False + + client_host = getattr(getattr(request, "client", None), "host", None) + return client_host in ("127.0.0.1", "::1") + + @staticmethod + def _auth_encryption_secret_value() -> str: + """Return the configured auth-encryption secret as a plain string. + + Returns: + The auth-encryption secret, normalized to a regular string. + """ + secret = settings.auth_encryption_secret + if hasattr(secret, "get_secret_value"): + return secret.get_secret_value() + return str(secret) + + @staticmethod + @lru_cache(maxsize=8) + def _expected_internal_mcp_runtime_auth_header_for_secret(secret: str) -> str: + """Return the expected shared internal-auth header for a specific secret. + + Args: + secret: Auth-encryption secret to derive the trust header from. + + Returns: + Hex-encoded SHA-256 digest derived from the provided auth secret. + """ + material = f"{secret}:{_INTERNAL_MCP_RUNTIME_AUTH_CONTEXT}".encode("utf-8") + return hashlib.sha256(material).hexdigest() + + @staticmethod + def _expected_internal_mcp_runtime_auth_header() -> str: + """Return the expected shared internal-auth header for Rust MCP hops. + + Returns: + Shared secret-derived digest expected on trusted internal Rust MCP calls. + """ + return TokenScopingMiddleware._expected_internal_mcp_runtime_auth_header_for_secret( + TokenScopingMiddleware._auth_encryption_secret_value() + ) + # Create middleware instance token_scoping_middleware = TokenScopingMiddleware() diff --git a/mcpgateway/schemas.py b/mcpgateway/schemas.py index 1c80475ae8..d752dfdd6a 100644 --- a/mcpgateway/schemas.py +++ b/mcpgateway/schemas.py @@ -295,6 +295,14 @@ class MetricsResponse(BaseModelWithConfigDict): @model_serializer(mode="wrap") def _exclude_none_a2a(self, handler): + """Omit the A2A metrics field when that feature is disabled. + + Args: + handler: Pydantic serializer callback for the wrapped model. + + Returns: + Dict[str, Any]: Serialized metrics payload without empty A2A fields. + """ result = handler(self) if self.a2a_agents is None: result.pop("a2aAgents", None) diff --git a/mcpgateway/services/email_auth_service.py b/mcpgateway/services/email_auth_service.py index 00bcb36672..4f2e5579e5 100644 --- a/mcpgateway/services/email_auth_service.py +++ b/mcpgateway/services/email_auth_service.py @@ -70,9 +70,6 @@ logging_service = LoggingService() logger = logging_service.get_logger(__name__) -# Strong references to background tasks to prevent GC before completion -_background_tasks: set[asyncio.Task] = set() - _GET_ALL_USERS_LIMIT = 10000 _DUMMY_ARGON2_HASH = "$argon2id$v=19$m=65536,t=3,p=1$9x/nTs9D0R97+BI7BWP2Tg$V/40qCuaGh4i+94HpGpxJESEVs3IDpLzUqtNqRPuty4" @@ -422,6 +419,33 @@ async def _invalidate_user_auth_cache(self, email: str) -> None: except Exception as cache_error: # nosec B110 logger.debug("Failed to invalidate auth cache for %s: %s", email, cache_error) + async def _invalidate_deleted_user_auth_caches(self, email: str) -> None: + """Invalidate all auth-cache entries affected by permanent user deletion. + + Args: + email: User email for cache invalidation. + """ + try: + # First-Party + from mcpgateway.cache.auth_cache import auth_cache # pylint: disable=import-outside-toplevel + + results = await asyncio.wait_for( + asyncio.gather( + auth_cache.invalidate_user(email), + auth_cache.invalidate_user_teams(email), + auth_cache.invalidate_team_membership(email), + return_exceptions=True, + ), + timeout=5.0, + ) + for result in results: + if isinstance(result, Exception): + logger.debug("Failed to invalidate delete-user auth cache for %s: %s", email, result) + except asyncio.TimeoutError: + logger.warning("Delete-user auth cache invalidation timed out for %s - continuing", email) + except Exception as cache_error: # nosec B110 + logger.debug("Failed to invalidate delete-user auth cache for %s: %s", email, cache_error) + def _log_auth_event( self, event_type: str, @@ -556,14 +580,15 @@ async def create_user( if not skip_password_validation: self.validate_password(password) + # Hash before the first DB read so PgBouncer transaction pooling does not + # hold an idle transaction open across the async hashing call. + password_hash = await self.password_service.hash_password_async(password) + # Check if user already exists existing_user = await self.get_user_by_email(email) if existing_user: raise UserExistsError(f"User with email {email} already exists") - # Hash the password - password_hash = await self.password_service.hash_password_async(password) - # Create new user (record password change timestamp) user = EmailUser( email=email, @@ -1804,17 +1829,7 @@ async def delete_user(self, email: str) -> bool: self.db.delete(user) self.db.commit() - # Invalidate all auth caches for deleted user - try: - # First-Party - from mcpgateway.cache.auth_cache import auth_cache # pylint: disable=import-outside-toplevel - - for coro in [auth_cache.invalidate_user(email), auth_cache.invalidate_user_teams(email), auth_cache.invalidate_team_membership(email)]: - task = asyncio.create_task(coro) - _background_tasks.add(task) - task.add_done_callback(_background_tasks.discard) - except Exception as cache_error: - logger.debug(f"Failed to invalidate cache on user delete: {cache_error}") + await self._invalidate_deleted_user_auth_caches(email) logger.info(f"User {SecurityValidator.sanitize_log_message(email)} deleted permanently") return True diff --git a/mcpgateway/services/mcp_session_pool.py b/mcpgateway/services/mcp_session_pool.py index 7874267d37..a8d1291f59 100644 --- a/mcpgateway/services/mcp_session_pool.py +++ b/mcpgateway/services/mcp_session_pool.py @@ -1266,6 +1266,17 @@ async def _cleanup_pool_session_owner(self, mcp_session_id: str) -> None: # Cleanup failure is non-fatal logger.debug(f"Failed to cleanup pool session owner in Redis: {e}") + async def cleanup_streamable_http_session_owner(self, mcp_session_id: str) -> None: + """Public wrapper for cleaning up Streamable HTTP session ownership. + + This is used by trusted internal MCP session teardown paths that need to + remove affinity ownership without reaching into private helpers. + """ + if not self.is_valid_mcp_session_id(mcp_session_id): + logger.debug("Invalid mcp_session_id for owner cleanup, skipping") + return + await self._cleanup_pool_session_owner(mcp_session_id) + async def close_all(self) -> None: """ Gracefully close all pooled and active sessions. diff --git a/mcpgateway/services/metrics.py b/mcpgateway/services/metrics.py index 931a5e8081..8918218a70 100644 --- a/mcpgateway/services/metrics.py +++ b/mcpgateway/services/metrics.py @@ -98,6 +98,12 @@ def _get_registry_collector(metric_name: str): ["outcome"], ) +mcp_auth_cache_events_counter = Counter( + "mcp_auth_cache_events_total", + "Total number of MCP auth cache events by outcome", + ["outcome"], +) + def setup_metrics(app): """ diff --git a/mcpgateway/services/metrics_buffer_service.py b/mcpgateway/services/metrics_buffer_service.py index 2054d95f45..8364e03197 100644 --- a/mcpgateway/services/metrics_buffer_service.py +++ b/mcpgateway/services/metrics_buffer_service.py @@ -144,11 +144,57 @@ async def start(self) -> None: logger.info("MetricsBufferService disabled, skipping start") return - if self._flush_task is None or self._flush_task.done(): + current_loop = asyncio.get_running_loop() + if not self._flush_task_is_active_for_loop(current_loop): self._shutdown_event.clear() self._flush_task = asyncio.create_task(self._flush_loop()) logger.info("MetricsBufferService flush task started") + def _flush_task_is_active_for_loop(self, loop: asyncio.AbstractEventLoop) -> bool: + """Return whether the current flush task is usable for the active loop. + + Args: + loop: The currently running event loop for this worker process. + + Returns: + True when the cached flush task belongs to the current live loop. + """ + task = self._flush_task + if task is None or task.done() or task.cancelled(): + return False + + try: + task_loop = task.get_loop() + except (AttributeError, RuntimeError): + return False + + return task_loop is loop and task_loop.is_running() + + def _ensure_flush_task_started(self) -> None: + """Best-effort lazy-start for the background flush task. + + In preloaded multi-worker deployments, the singleton may exist in a + worker before the worker's startup hook has started the flush loop. A + first buffered metric should recover from that state instead of sitting + in memory forever. + """ + if not self.recording_enabled or not self.enabled: + return + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return + + if self._flush_task_is_active_for_loop(loop): + return + + if self._shutdown_event.is_set(): + self._shutdown_event = asyncio.Event() + + self._flush_task = loop.create_task(self._flush_loop()) + logger.info("MetricsBufferService flush task started lazily") + async def shutdown(self) -> None: """Shutdown service with final flush.""" logger.info("MetricsBufferService shutting down...") @@ -199,6 +245,41 @@ def record_tool_metric( error_message=error_message, ) + self._ensure_flush_task_started() + with self._lock: + self._tool_metrics.append(metric) + self._total_buffered += 1 + + def record_tool_metric_with_duration( + self, + tool_id: str, + response_time: float, + success: bool, + error_message: Optional[str] = None, + ) -> None: + """Buffer a tool metric with pre-calculated response time. + + Args: + tool_id: UUID of the tool. + response_time: Pre-calculated response time in seconds. + success: Whether the operation succeeded. + error_message: Optional error message if failed. + """ + if not self.recording_enabled: + return # Execution metrics recording disabled + if not self.enabled: + self._write_tool_metric_with_duration_immediately(tool_id, response_time, success, error_message) + return + + metric = BufferedToolMetric( + tool_id=tool_id, + timestamp=datetime.now(timezone.utc), + response_time=response_time, + is_success=success, + error_message=error_message, + ) + + self._ensure_flush_task_started() with self._lock: self._tool_metrics.append(metric) self._total_buffered += 1 @@ -232,6 +313,7 @@ def record_resource_metric( error_message=error_message, ) + self._ensure_flush_task_started() with self._lock: self._resource_metrics.append(metric) self._total_buffered += 1 @@ -265,6 +347,7 @@ def record_prompt_metric( error_message=error_message, ) + self._ensure_flush_task_started() with self._lock: self._prompt_metrics.append(metric) self._total_buffered += 1 @@ -298,6 +381,41 @@ def record_server_metric( error_message=error_message, ) + self._ensure_flush_task_started() + with self._lock: + self._server_metrics.append(metric) + self._total_buffered += 1 + + def record_server_metric_with_duration( + self, + server_id: str, + response_time: float, + success: bool, + error_message: Optional[str] = None, + ) -> None: + """Buffer a server metric with pre-calculated response time. + + Args: + server_id: UUID of the server. + response_time: Pre-calculated response time in seconds. + success: Whether the operation succeeded. + error_message: Optional error message if failed. + """ + if not self.recording_enabled: + return # Execution metrics recording disabled + if not self.enabled: + self._write_server_metric_with_duration_immediately(server_id, response_time, success, error_message) + return + + metric = BufferedServerMetric( + server_id=server_id, + timestamp=datetime.now(timezone.utc), + response_time=response_time, + is_success=success, + error_message=error_message, + ) + + self._ensure_flush_task_started() with self._lock: self._server_metrics.append(metric) self._total_buffered += 1 @@ -334,6 +452,7 @@ def record_a2a_agent_metric( error_message=error_message, ) + self._ensure_flush_task_started() with self._lock: self._a2a_agent_metrics.append(metric) self._total_buffered += 1 @@ -370,6 +489,7 @@ def record_a2a_agent_metric_with_duration( error_message=error_message, ) + self._ensure_flush_task_started() with self._lock: self._a2a_agent_metrics.append(metric) self._total_buffered += 1 @@ -594,6 +714,35 @@ def _write_tool_metric_immediately( except Exception as e: logger.error(f"Failed to write tool metric: {e}") + def _write_tool_metric_with_duration_immediately( + self, + tool_id: str, + response_time: float, + success: bool, + error_message: Optional[str], + ) -> None: + """Write a single tool metric with pre-calculated duration immediately. + + Args: + tool_id: UUID of the tool. + response_time: Pre-calculated response time in seconds. + success: Whether the operation succeeded. + error_message: Optional error message if failed. + """ + try: + with fresh_db_session() as db: + metric = ToolMetric( + tool_id=tool_id, + timestamp=datetime.now(timezone.utc), + response_time=response_time, + is_success=success, + error_message=error_message, + ) + db.add(metric) + db.commit() + except Exception as e: + logger.error(f"Failed to write tool metric: {e}") + def _write_resource_metric_immediately( self, resource_id: str, @@ -681,6 +830,35 @@ def _write_server_metric_immediately( except Exception as e: logger.error(f"Failed to write server metric: {e}") + def _write_server_metric_with_duration_immediately( + self, + server_id: str, + response_time: float, + success: bool, + error_message: Optional[str], + ) -> None: + """Write a single server metric with pre-calculated duration immediately. + + Args: + server_id: UUID of the server. + response_time: Pre-calculated response time in seconds. + success: Whether the operation succeeded. + error_message: Optional error message if failed. + """ + try: + with fresh_db_session() as db: + metric = ServerMetric( + server_id=server_id, + timestamp=datetime.now(timezone.utc), + response_time=response_time, + is_success=success, + error_message=error_message, + ) + db.add(metric) + db.commit() + except Exception as e: + logger.error(f"Failed to write server metric: {e}") + def _write_a2a_agent_metric_immediately( self, a2a_agent_id: str, diff --git a/mcpgateway/services/prompt_service.py b/mcpgateway/services/prompt_service.py index 49a25f38f8..ec336ea46e 100644 --- a/mcpgateway/services/prompt_service.py +++ b/mcpgateway/services/prompt_service.py @@ -25,6 +25,9 @@ # Third-Party from jinja2 import Environment, meta, select_autoescape, Template +from mcp import ClientSession +from mcp.client.sse import sse_client +from mcp.client.streamable_http import streamablehttp_client import orjson from pydantic import ValidationError from sqlalchemy import and_, delete, desc, not_, or_, select @@ -46,15 +49,19 @@ from mcpgateway.services.base_service import BaseService from mcpgateway.services.event_service import EventService from mcpgateway.services.logging_service import LoggingService +from mcpgateway.services.mcp_session_pool import get_mcp_session_pool, TransportType from mcpgateway.services.metrics_buffer_service import get_metrics_buffer_service from mcpgateway.services.metrics_cleanup_service import delete_metrics_in_batches, pause_rollup_during_purge from mcpgateway.services.observability_service import current_trace_id, ObservabilityService from mcpgateway.services.structured_logger import get_structured_logger from mcpgateway.services.team_management_service import TeamManagementService from mcpgateway.utils.create_slug import slugify +from mcpgateway.utils.gateway_access import build_gateway_auth_headers from mcpgateway.utils.metrics_common import build_top_performers from mcpgateway.utils.pagination import unified_paginate +from mcpgateway.utils.services_auth import decode_auth from mcpgateway.utils.sqlalchemy_modifier import json_contains_tag_expr +from mcpgateway.utils.url_auth import apply_query_param_auth, sanitize_exception_message # Cache import (lazy to avoid circular dependencies) _REGISTRY_CACHE = None @@ -244,6 +251,108 @@ def __init__(self) -> None: self._jinja_env = _get_jinja_env() self._plugin_manager: PluginManager | None = get_plugin_manager() + @staticmethod + def _should_fetch_gateway_prompt(prompt: DbPrompt) -> bool: + """Return whether a prompt must be executed against its source gateway. + + Federated prompts are synced into the catalog as metadata via + ``list_prompts()``. Those records often have ``template=""``, which + means the gateway must call the upstream MCP ``prompts/get`` endpoint + instead of trying to render a local template. + + Args: + prompt: Prompt ORM object resolved from the catalog. + + Returns: + ``True`` when the prompt is gateway-backed and has no local template. + """ + return bool(getattr(prompt, "gateway_id", None)) and not bool(getattr(prompt, "template", "")) + + async def _fetch_gateway_prompt_result(self, prompt: DbPrompt, arguments: Optional[Dict[str, str]], user_identity: Optional[str]) -> PromptResult: + """Fetch a rendered prompt from the upstream MCP gateway. + + Args: + prompt: Gateway-backed prompt record from the catalog. + arguments: Optional prompt-rendering arguments. + user_identity: Effective requester email for session-pool isolation. + + Returns: + Prompt result normalized into ContextForge models. + + Raises: + PromptError: If the gateway prompt cannot be fetched. + """ + gateway = getattr(prompt, "gateway", None) + if gateway is None: + raise PromptError(f"Prompt '{prompt.name}' is gateway-backed but missing gateway metadata") + + gateway_url = str(gateway.url) + headers = build_gateway_auth_headers(gateway) + auth_query_params_decrypted: Optional[Dict[str, str]] = None + + if getattr(gateway, "auth_type", None) == "query_param" and getattr(gateway, "auth_query_params", None): + auth_query_params_decrypted = {} + for param_key, encrypted_value in (gateway.auth_query_params or {}).items(): + try: + decoded = decode_auth(encrypted_value) + auth_query_params_decrypted[param_key] = decoded.get(param_key, "") + except Exception as exc: + raise PromptError(f"Failed to decode query-parameter auth for prompt gateway '{gateway.id}'") from exc + if auth_query_params_decrypted: + gateway_url = apply_query_param_auth(gateway_url, auth_query_params_decrypted) + + remote_name = getattr(prompt, "original_name", None) or prompt.name + pool_user_identity = (user_identity or "anonymous").strip() or "anonymous" + gateway_id = str(getattr(gateway, "id", "")) + transport = str(getattr(gateway, "transport", "streamable_http") or "streamable_http").lower() + pool_transport_type = TransportType.SSE if transport == "sse" else TransportType.STREAMABLE_HTTP + prompt_arguments = arguments or None + + try: + if settings.mcp_session_pool_enabled: + try: + pool = get_mcp_session_pool() + except RuntimeError: + pool = None + if pool is not None: + async with pool.session( + url=gateway_url, + headers=headers, + transport_type=pool_transport_type, + user_identity=pool_user_identity, + gateway_id=gateway_id, + ) as pooled: + remote_result = await pooled.session.get_prompt(remote_name, arguments=prompt_arguments) + return PromptResult( + messages=[ + Message.model_validate(message.model_dump(by_alias=True, exclude_none=True) if hasattr(message, "model_dump") else message) + for message in getattr(remote_result, "messages", []) or [] + ], + description=getattr(remote_result, "description", None) or prompt.description, + ) + + if transport == "sse": + async with sse_client(url=gateway_url, headers=headers, timeout=settings.health_check_timeout) as streams: + async with ClientSession(*streams) as session: + await session.initialize() + remote_result = await session.get_prompt(remote_name, arguments=prompt_arguments) + else: + async with streamablehttp_client(url=gateway_url, headers=headers, timeout=settings.health_check_timeout) as (read_stream, write_stream, _get_session_id): + async with ClientSession(read_stream, write_stream) as session: + await session.initialize() + remote_result = await session.get_prompt(remote_name, arguments=prompt_arguments) + + return PromptResult( + messages=[ + Message.model_validate(message.model_dump(by_alias=True, exclude_none=True) if hasattr(message, "model_dump") else message) + for message in getattr(remote_result, "messages", []) or [] + ], + description=getattr(remote_result, "description", None) or prompt.description, + ) + except Exception as exc: + sanitized_error = sanitize_exception_message(str(exc), auth_query_params_decrypted) + raise PromptError(f"Failed to fetch prompt '{remote_name}' from gateway: {sanitized_error}") from exc + @staticmethod def validate_arguments_json(args_value: Any, context: str = "") -> List[Dict[str, Any]]: """Validate and parse prompt arguments JSON. @@ -1642,15 +1751,15 @@ async def get_prompt( # Find prompt by ID first, then by name (active prompts only) search_key = str(prompt_id) - prompt = db.execute(select(DbPrompt).where(DbPrompt.id == prompt_id).where(DbPrompt.enabled)).scalar_one_or_none() + prompt = db.execute(select(DbPrompt).options(joinedload(DbPrompt.gateway)).where(DbPrompt.id == prompt_id).where(DbPrompt.enabled)).scalar_one_or_none() if not prompt: - prompt = db.execute(select(DbPrompt).where(DbPrompt.name == prompt_id).where(DbPrompt.enabled)).scalar_one_or_none() + prompt = db.execute(select(DbPrompt).options(joinedload(DbPrompt.gateway)).where(DbPrompt.name == prompt_id).where(DbPrompt.enabled)).scalar_one_or_none() if not prompt: # Check if an inactive prompt exists - inactive_prompt = db.execute(select(DbPrompt).where(DbPrompt.id == prompt_id).where(not_(DbPrompt.enabled))).scalar_one_or_none() + inactive_prompt = db.execute(select(DbPrompt).options(joinedload(DbPrompt.gateway)).where(DbPrompt.id == prompt_id).where(not_(DbPrompt.enabled))).scalar_one_or_none() if not inactive_prompt: - inactive_prompt = db.execute(select(DbPrompt).where(DbPrompt.name == prompt_id).where(not_(DbPrompt.enabled))).scalar_one_or_none() + inactive_prompt = db.execute(select(DbPrompt).options(joinedload(DbPrompt.gateway)).where(DbPrompt.name == prompt_id).where(not_(DbPrompt.enabled))).scalar_one_or_none() if inactive_prompt: raise PromptNotFoundError(f"Prompt '{search_key}' exists but is inactive") @@ -1679,7 +1788,11 @@ async def get_prompt( raise PromptNotFoundError(f"Prompt not found: {search_key}") server_scoped = True - if not arguments: + if self._should_fetch_gateway_prompt(prompt): + # Release the read transaction before any remote network I/O. + db.commit() + result = await self._fetch_gateway_prompt_result(prompt, arguments, user) + elif not arguments: result = PromptResult( messages=[ Message( @@ -1704,7 +1817,7 @@ async def get_prompt( if has_post_fetch: post_result, _ = await self._plugin_manager.invoke_hook( PromptHookType.PROMPT_POST_FETCH, - payload=PromptPosthookPayload(prompt_id=str(prompt.id), result=result), + payload=PromptPosthookPayload(prompt_id=prompt.name, result=result), global_context=global_context, local_contexts=context_table, violations_as_exceptions=True, diff --git a/mcpgateway/services/resource_service.py b/mcpgateway/services/resource_service.py index ec47de6496..b683029cf6 100644 --- a/mcpgateway/services/resource_service.py +++ b/mcpgateway/services/resource_service.py @@ -41,7 +41,7 @@ import parse from pydantic import ValidationError from sqlalchemy import and_, delete, desc, not_, or_, select -from sqlalchemy.exc import IntegrityError, OperationalError +from sqlalchemy.exc import IntegrityError, MultipleResultsFound, OperationalError from sqlalchemy.orm import joinedload, Session # First-Party @@ -2121,10 +2121,27 @@ async def read_resource( # Matches uri (modified value from pluggins if applicable) # with uri from resource DB # if uri is of type resource template then resource is retreived from DB - query = select(DbResource).where(DbResource.uri == str(uri)).where(DbResource.enabled) + query = select(DbResource) + if server_id: + query = query.join( + server_resource_association, + server_resource_association.c.resource_id == DbResource.id, + ).where(server_resource_association.c.server_id == server_id) + query = query.where(DbResource.uri == str(uri)).where(DbResource.enabled) if include_inactive: - query = select(DbResource).where(DbResource.uri == str(uri)) - resource_db = db.execute(query).scalar_one_or_none() + query = select(DbResource) + if server_id: + query = query.join( + server_resource_association, + server_resource_association.c.resource_id == DbResource.id, + ).where(server_resource_association.c.server_id == server_id) + query = query.where(DbResource.uri == str(uri)) + try: + resource_db = db.execute(query).scalar_one_or_none() + except MultipleResultsFound as exc: + if server_id: + raise ResourceError(f"Multiple resources matched URI '{uri}' for server '{server_id}'.") from exc + raise ResourceError(f"Resource URI '{uri}' is ambiguous across multiple servers; use /servers/{{id}}/mcp.") from exc # Check for direct_proxy mode if resource_db and resource_db.gateway and getattr(resource_db.gateway, "gateway_mode", "cache") == "direct_proxy" and settings.mcpgateway_direct_proxy_enabled: @@ -2180,8 +2197,23 @@ async def read_resource( # Normal cache mode - resource found in DB content = resource_db.content else: - # Check the inactivity first - check_inactivity = db.execute(select(DbResource).where(DbResource.uri == str(resource_uri)).where(not_(DbResource.enabled))).scalar_one_or_none() + # Check the inactivity first using the same server scope that + # governed the active lookup. Without this, duplicate URIs + # across different virtual servers/gateways can produce + # ambiguous results even though the current request is + # already scoped to a single server. + inactive_query = select(DbResource) + if server_id: + inactive_query = inactive_query.join( + server_resource_association, + server_resource_association.c.resource_id == DbResource.id, + ).where(server_resource_association.c.server_id == server_id) + try: + check_inactivity = db.execute(inactive_query.where(DbResource.uri == str(resource_uri)).where(not_(DbResource.enabled))).scalar_one_or_none() + except MultipleResultsFound as exc: + if server_id: + raise ResourceError(f"Multiple inactive resources matched URI '{resource_uri}' for server '{server_id}'.") from exc + raise ResourceError(f"Resource URI '{resource_uri}' is ambiguous across multiple servers; use /servers/{{id}}/mcp.") from exc if check_inactivity: raise ResourceNotFoundError(f"Resource '{resource_uri}' exists but is inactive") diff --git a/mcpgateway/services/tool_service.py b/mcpgateway/services/tool_service.py index 89e1f3f9fc..2b07efb3a6 100644 --- a/mcpgateway/services/tool_service.py +++ b/mcpgateway/services/tool_service.py @@ -2152,6 +2152,79 @@ async def list_server_tools( return result + async def list_server_mcp_tool_definitions( + self, + db: Session, + server_id: str, + *, + include_inactive: bool = False, + user_email: Optional[str] = None, + token_teams: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: + """Return server-scoped MCP tool definitions without building full ToolRead models. + + This is a hot-path helper for the internal Rust -> Python seam. It keeps + auth and visibility semantics aligned with ``list_server_tools`` while + avoiding the heavier ``ToolRead`` conversion that is only needed for the + admin/API surfaces. + + Args: + db: Active database session. + server_id: Virtual server identifier used to scope the tool listing. + include_inactive: Whether disabled tools should be included. + user_email: Requester email for owner-scoped visibility checks. + token_teams: Normalized team scope from the caller token. + + Returns: + A list of MCP-compatible tool definition dictionaries. + """ + name_column = DbTool.__table__.c.name + query = ( + select( + name_column.label("name"), + DbTool.description.label("description"), + DbTool.input_schema.label("input_schema"), + DbTool.output_schema.label("output_schema"), + DbTool.annotations.label("annotations"), + DbTool.owner_email.label("owner_email"), + DbTool.team_id.label("team_id"), + DbTool.visibility.label("visibility"), + ) + .join(server_tool_association, DbTool.id == server_tool_association.c.tool_id) + .where(server_tool_association.c.server_id == server_id) + ) + + if not include_inactive: + query = query.where(DbTool.enabled) + + if user_email is not None or token_teams is not None: + team_ids = token_teams if token_teams is not None else [] + is_public_only_token = token_teams is not None and len(token_teams) == 0 + + access_conditions = [DbTool.visibility == "public"] + if not is_public_only_token and user_email: + access_conditions.append(DbTool.owner_email == user_email) + if team_ids: + access_conditions.append(and_(DbTool.team_id.in_(team_ids), DbTool.visibility.in_(["team", "public"]))) + query = query.where(or_(*access_conditions)) + + rows = db.execute(query).mappings().all() + db.commit() + + result: List[Dict[str, Any]] = [] + for row in rows: + payload: Dict[str, Any] = { + "name": row["name"], + "description": row["description"], + "inputSchema": row["input_schema"] or {"type": "object", "properties": {}}, + "annotations": row["annotations"] or {}, + } + if row["output_schema"] is not None: + payload["outputSchema"] = row["output_schema"] + result.append(payload) + + return result + async def list_tools_for_user( self, db: Session, @@ -2755,6 +2828,319 @@ async def invoke_tool_direct( logger.exception(f"Direct proxy tool invocation failed for {name}: {e}") raise ToolInvocationError(f"Direct proxy tool invocation failed: {str(e)}") + async def prepare_rust_mcp_tool_execution( + self, + db: Session, + name: str, + request_headers: Optional[Dict[str, str]] = None, + app_user_email: Optional[str] = None, + user_email: Optional[str] = None, + token_teams: Optional[List[str]] = None, + server_id: Optional[str] = None, + ) -> Dict[str, Any]: + """Build a narrow MCP execution plan for the Rust runtime hot path. + + This reuses Python's existing auth, scoping, and secret-handling logic, + but stops before the actual upstream MCP call. The Rust runtime can then + execute the call directly for the simple streamable HTTP MCP cases that + dominate load tests, while Python remains the authority for policy. + + Args: + db: Active database session. + name: Tool name requested by the caller. + request_headers: Incoming request headers used for passthrough/auth decisions. + app_user_email: OAuth application user email, when present. + user_email: Effective requester email after auth normalization. + token_teams: Normalized team scope from the caller token. + server_id: Optional virtual server identifier restricting tool access. + + Returns: + A Rust execution plan dictionary, or a fallback descriptor when direct + Rust execution is not eligible. + + Raises: + ToolNotFoundError: If the requested tool is not visible or invocable. + ToolInvocationError: If gateway auth preparation fails or the tool name is ambiguous. + """ + if self._plugin_manager and (self._plugin_manager.has_hooks_for(ToolHookType.TOOL_PRE_INVOKE) or self._plugin_manager.has_hooks_for(ToolHookType.TOOL_POST_INVOKE)): + return {"eligible": False, "fallbackReason": "plugin-hooks-configured"} + + if current_trace_id.get(): + return {"eligible": False, "fallbackReason": "observability-trace-active"} + + gateway_id_from_header = extract_gateway_id_from_headers(request_headers) + is_direct_proxy = False + tool = None + gateway = None + tool_selected_from_server_scope = False + tool_payload: Dict[str, Any] = {} + gateway_payload: Optional[Dict[str, Any]] = None + if gateway_id_from_header: + gateway = db.execute(select(DbGateway).where(DbGateway.id == gateway_id_from_header)).scalar_one_or_none() + if gateway and gateway.gateway_mode == "direct_proxy" and settings.mcpgateway_direct_proxy_enabled: + if not await check_gateway_access(db, gateway, user_email, token_teams): + raise ToolNotFoundError(f"Tool not found: {name}") + is_direct_proxy = True + gateway_payload = { + "id": str(gateway.id), + "name": gateway.name, + "url": gateway.url, + "auth_type": gateway.auth_type, + "auth_value": encode_auth(gateway.auth_value) if isinstance(gateway.auth_value, dict) else gateway.auth_value, + "auth_query_params": gateway.auth_query_params, + "oauth_config": gateway.oauth_config, + "ca_certificate": gateway.ca_certificate, + "ca_certificate_sig": gateway.ca_certificate_sig, + "passthrough_headers": gateway.passthrough_headers, + "gateway_mode": gateway.gateway_mode, + } + tool_payload = { + "id": None, + "name": name, + "original_name": name, + "enabled": True, + "reachable": True, + "integration_type": "MCP", + "request_type": "streamablehttp", + "gateway_id": str(gateway.id), + } + + if not is_direct_proxy: + tool_lookup_cache = _get_tool_lookup_cache() + cached_payload = await tool_lookup_cache.get(name) if tool_lookup_cache.enabled else None + + if cached_payload: + status = cached_payload.get("status", "active") + if status == "missing": + raise ToolNotFoundError(f"Tool not found: {name}") + if status == "inactive": + raise ToolNotFoundError(f"Tool '{name}' exists but is inactive") + if status == "offline": + raise ToolNotFoundError(f"Tool '{name}' exists but is currently offline. Please verify if it is running.") + tool_payload = cached_payload.get("tool") or {} + gateway_payload = cached_payload.get("gateway") + + if not tool_payload: + tools = self._load_invocable_tools(db, name, server_id=server_id) + tool_selected_from_server_scope = bool(server_id) + + if not tools: + raise ToolNotFoundError(f"Tool not found: {name}") + + multiple_found = len(tools) > 1 + if not multiple_found: + tool = tools[0] + else: + visibility_priority = {"team": 0, "private": 1, "public": 2} + accessible_tools: list[tuple[int, Any]] = [] + for candidate in tools: + tool_dict = {"visibility": candidate.visibility, "team_id": candidate.team_id, "owner_email": candidate.owner_email} + if await self._check_tool_access(db, tool_dict, user_email, token_teams): + priority = visibility_priority.get(candidate.visibility, 99) + accessible_tools.append((priority, candidate)) + + if not accessible_tools: + raise ToolNotFoundError(f"Tool not found: {name}") + + accessible_tools.sort(key=lambda item: item[0]) + best_priority = accessible_tools[0][0] + best_tools = [candidate for priority, candidate in accessible_tools if priority == best_priority] + if len(best_tools) > 1: + raise ToolInvocationError(f"Multiple tools found with name '{name}' at same priority level. Tool name is ambiguous.") + tool = best_tools[0] + + if not tool.enabled: + raise ToolNotFoundError(f"Tool '{name}' exists but is inactive") + + if not tool.reachable: + await tool_lookup_cache.set_negative(name, "offline") + raise ToolNotFoundError(f"Tool '{name}' exists but is currently offline. Please verify if it is running.") + + gateway = tool.gateway + cache_payload = self._build_tool_cache_payload(tool, gateway) + tool_payload = cache_payload.get("tool") or {} + gateway_payload = cache_payload.get("gateway") + if not multiple_found: + await tool_lookup_cache.set(name, cache_payload, gateway_id=tool_payload.get("gateway_id")) + + if tool_payload.get("enabled") is False: + raise ToolNotFoundError(f"Tool '{name}' exists but is inactive") + if tool_payload.get("reachable") is False: + raise ToolNotFoundError(f"Tool '{name}' exists but is currently offline. Please verify if it is running.") + + if is_direct_proxy: + return {"eligible": False, "fallbackReason": "direct-proxy"} + + if not await self._check_tool_access(db, tool_payload, user_email, token_teams): + raise ToolNotFoundError(f"Tool not found: {name}") + + if server_id and not tool_selected_from_server_scope: + tool_id_for_check = tool_payload.get("id") + if not tool_id_for_check: + raise ToolNotFoundError(f"Tool not found: {name}") + server_match = db.execute( + select(server_tool_association.c.tool_id).where( + server_tool_association.c.server_id == server_id, + server_tool_association.c.tool_id == tool_id_for_check, + ) + ).first() + if not server_match: + raise ToolNotFoundError(f"Tool not found: {name}") + + tool_integration_type = tool_payload.get("integration_type") + if tool_integration_type != "MCP": + return {"eligible": False, "fallbackReason": f"unsupported-integration:{tool_integration_type or 'unknown'}"} + + tool_request_type = tool_payload.get("request_type") + transport = tool_request_type.lower() if tool_request_type else "sse" + if transport != "streamablehttp": + return {"eligible": False, "fallbackReason": f"unsupported-transport:{transport}"} + + tool_jsonpath_filter = tool_payload.get("jsonpath_filter") + if tool_jsonpath_filter: + return {"eligible": False, "fallbackReason": "jsonpath-filter-configured"} + + passthrough_allowed = global_config_cache.get_passthrough_headers(db, settings.default_passthrough_headers) + + if tool is not None: + gateway = tool.gateway + + tool_name_original = tool_payload.get("original_name") or tool_payload.get("name") or name + tool_id = tool_payload.get("id") + tool_gateway_id = tool_payload.get("gateway_id") + tool_timeout_ms = tool_payload.get("timeout_ms") + effective_timeout = (tool_timeout_ms / 1000) if tool_timeout_ms else settings.tool_timeout + + has_gateway = gateway_payload is not None + gateway_url = gateway_payload.get("url") if has_gateway else None + gateway_name = gateway_payload.get("name") if has_gateway else None + gateway_auth_type = gateway_payload.get("auth_type") if has_gateway else None + gateway_auth_value = gateway_payload.get("auth_value") if has_gateway and isinstance(gateway_payload.get("auth_value"), str) else None + gateway_auth_query_params = gateway_payload.get("auth_query_params") if has_gateway and isinstance(gateway_payload.get("auth_query_params"), dict) else None + gateway_oauth_config = gateway_payload.get("oauth_config") if has_gateway and isinstance(gateway_payload.get("oauth_config"), dict) else None + if has_gateway and gateway is not None: + runtime_gateway_auth_value = getattr(gateway, "auth_value", None) + if isinstance(runtime_gateway_auth_value, dict): + gateway_auth_value = encode_auth(runtime_gateway_auth_value) + elif isinstance(runtime_gateway_auth_value, str): + gateway_auth_value = runtime_gateway_auth_value + runtime_gateway_query_params = getattr(gateway, "auth_query_params", None) + if isinstance(runtime_gateway_query_params, dict): + gateway_auth_query_params = runtime_gateway_query_params + runtime_gateway_oauth_config = getattr(gateway, "oauth_config", None) + if isinstance(runtime_gateway_oauth_config, dict): + gateway_oauth_config = runtime_gateway_oauth_config + gateway_ca_cert = gateway_payload.get("ca_certificate") if has_gateway else None + gateway_id_str = gateway_payload.get("id") if has_gateway else None + + if tool is None and has_gateway: + requires_gateway_auth_hydration = gateway_auth_type in {"basic", "bearer", "authheaders", "oauth", "query_param"} + if requires_gateway_auth_hydration: + tool_id_for_hydration = tool_payload.get("id") + if tool_id_for_hydration: + tool_auth_row = db.execute(select(DbTool).options(joinedload(DbTool.gateway)).where(DbTool.id == tool_id_for_hydration)).scalar_one_or_none() + if tool_auth_row and tool_auth_row.gateway: + hydrated_gateway_auth_value = getattr(tool_auth_row.gateway, "auth_value", None) + if isinstance(hydrated_gateway_auth_value, dict): + gateway_auth_value = encode_auth(hydrated_gateway_auth_value) + elif isinstance(hydrated_gateway_auth_value, str): + gateway_auth_value = hydrated_gateway_auth_value + hydrated_gateway_query_params = getattr(tool_auth_row.gateway, "auth_query_params", None) + if isinstance(hydrated_gateway_query_params, dict): + gateway_auth_query_params = hydrated_gateway_query_params + hydrated_gateway_oauth_config = getattr(tool_auth_row.gateway, "oauth_config", None) + if isinstance(hydrated_gateway_oauth_config, dict): + gateway_oauth_config = hydrated_gateway_oauth_config + + gateway_auth_query_params_decrypted: Optional[Dict[str, str]] = None + if gateway_auth_type == "query_param" and gateway_auth_query_params: + gateway_auth_query_params_decrypted = {} + for param_key, encrypted_value in gateway_auth_query_params.items(): + if encrypted_value: + try: + decrypted = decode_auth(encrypted_value) + gateway_auth_query_params_decrypted[param_key] = decrypted.get(param_key, "") + except Exception: # noqa: S110 + logger.debug(f"Failed to decrypt query param '{param_key}' for Rust MCP tool execution plan") + if gateway_auth_query_params_decrypted and gateway_url: + gateway_url = apply_query_param_auth(gateway_url, gateway_auth_query_params_decrypted) + + if gateway_ca_cert: + return {"eligible": False, "fallbackReason": "custom-ca-certificate"} + + if not gateway_url: + return {"eligible": False, "fallbackReason": "missing-gateway-url"} + + if has_gateway and gateway_auth_type == "oauth" and isinstance(gateway_oauth_config, dict) and gateway_oauth_config: + grant_type = gateway_oauth_config.get("grant_type", "client_credentials") + if grant_type == "authorization_code": + try: + # First-Party + from mcpgateway.services.token_storage_service import TokenStorageService # pylint: disable=import-outside-toplevel + + with fresh_db_session() as token_db: + token_storage = TokenStorageService(token_db) + if not app_user_email: + raise ToolInvocationError(f"User authentication required for OAuth-protected gateway '{gateway_name}'. Please ensure you are authenticated.") + access_token = await token_storage.get_user_token(gateway_id_str, app_user_email) + + if access_token: + headers = {"Authorization": f"Bearer {access_token}"} + else: + raise ToolInvocationError(f"Please authorize {gateway_name} first. Visit /oauth/authorize/{gateway_id_str} to complete OAuth flow.") + except Exception as e: + logger.error(f"Failed to obtain stored OAuth token for gateway {gateway_name}: {e}") + raise ToolInvocationError(f"OAuth token retrieval failed for gateway: {str(e)}") + else: + try: + access_token = await self.oauth_manager.get_access_token(gateway_oauth_config) + headers = {"Authorization": f"Bearer {access_token}"} + except Exception as e: + logger.error(f"Failed to obtain OAuth access token for gateway {gateway_name}: {e}") + raise ToolInvocationError(f"OAuth authentication failed for gateway: {str(e)}") + else: + headers = decode_auth(gateway_auth_value) if gateway_auth_value else {} + + if request_headers: + headers = compute_passthrough_headers_cached( + request_headers, + headers, + passthrough_allowed, + gateway_auth_type=gateway_auth_type, + gateway_passthrough_headers=gateway_payload.get("passthrough_headers") if has_gateway else None, + ) + + runtime_headers = {str(header_name): str(header_value) for header_name, header_value in headers.items() if header_name and header_value} + + return { + "eligible": True, + "transport": transport, + "serverUrl": gateway_url, + "remoteToolName": tool_name_original, + "headers": runtime_headers, + "timeoutMs": int(effective_timeout * 1000), + "gatewayId": tool_gateway_id, + "toolName": name, + "toolId": tool_id or None, + "serverId": server_id, + } + + def _load_invocable_tools(self, db: Session, name: str, server_id: Optional[str] = None) -> List[DbTool]: + """Load candidate tools for invocation, narrowing to a virtual server when possible. + + Args: + db: Active database session. + name: Tool name to resolve. + server_id: Optional virtual server identifier used to constrain results. + + Returns: + A list of candidate tool ORM rows matching the request. + """ + query = select(DbTool).options(joinedload(DbTool.gateway)).where(DbTool.name == name) + if server_id: + query = query.join(server_tool_association, DbTool.id == server_tool_association.c.tool_id).where(server_tool_association.c.server_id == server_id) + return db.execute(query).scalars().all() + async def invoke_tool( self, db: Session, @@ -2885,7 +3271,7 @@ async def invoke_tool( # Use a single query to avoid a race between separate enabled/inactive lookups. # Use scalars().all() instead of scalar_one_or_none() to handle duplicate # tool names across teams without crashing on MultipleResultsFound. - tools = db.execute(select(DbTool).options(joinedload(DbTool.gateway)).where(DbTool.name == name)).scalars().all() + tools = self._load_invocable_tools(db, name, server_id=server_id) if not tools: raise ToolNotFoundError(f"Tool not found: {name}") diff --git a/mcpgateway/templates/overview_partial.html b/mcpgateway/templates/overview_partial.html index cf486601a7..4390ef5c5b 100644 --- a/mcpgateway/templates/overview_partial.html +++ b/mcpgateway/templates/overview_partial.html @@ -19,6 +19,103 @@

+
+
+
+ MCP Runtime +
+
+ + {% if mcp_runtime.mounted == 'rust' %}🦀 Rust MCP Core{% else %}🐍 + Python MCP Core{% endif %} + + + {{ mcp_runtime.mode | replace('-', ' ') }} + + {% if mcp_runtime.mode == 'rust-managed' and mcp_runtime.mounted == 'python' %} + + Shadow / safe fallback + + {% elif mcp_runtime.mode == 'python-rust-built-disabled' %} + + Rust build available + + {% endif %} +
+
+ +
+
+
+ Public MCP +
+
+ {% if mcp_runtime.mounted == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Session +
+
+ {% if mcp_runtime.session_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Replay +
+
+ {% if mcp_runtime.resume_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Live Stream +
+
+ {% if mcp_runtime.live_stream_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Affinity +
+
+ {% if mcp_runtime.affinity_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Auth Reuse +
+
+ {% if mcp_runtime.session_auth_reuse_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ +
diff --git a/mcpgateway/templates/version_info_partial.html b/mcpgateway/templates/version_info_partial.html index 02609502e9..04593d14f0 100644 --- a/mcpgateway/templates/version_info_partial.html +++ b/mcpgateway/templates/version_info_partial.html @@ -2,6 +2,7 @@

@@ -57,7 +58,7 @@

-
+

+ +
+
+
+

+ + MCP Runtime +

+

+ Active MCP transport and core ownership for this gateway instance. +

+
+
+ + {% if payload.mcp_runtime.mounted == 'rust' %}🦀 Rust MCP Core{% else %}🐍 Python MCP Core{% endif %} + + + {{ payload.mcp_runtime.mode | replace('-', ' ') }} + +
+
+ +
+
+
+ Public Transport +
+
+ {% if payload.mcp_runtime.mounted == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Session Core +
+
+ {% if payload.mcp_runtime.session_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Event Store +
+
+ {% if payload.mcp_runtime.event_store_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Replay / Resume +
+
+ {% if payload.mcp_runtime.resume_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Live Stream +
+
+ {% if payload.mcp_runtime.live_stream_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Affinity +
+
+ {% if payload.mcp_runtime.affinity_core_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Session Auth Reuse +
+
+ {% if payload.mcp_runtime.session_auth_reuse_mode == 'rust' %}🦀 Rust{% else %}🐍 Python{% endif %} +
+
+
+
+ Rust Build Included +
+
+ {% if payload.mcp_runtime.rust_build_included %}Yes{% else %}No{% endif %} +
+
+
+ + {% if payload.mcp_runtime.mode == 'rust-managed' and payload.mcp_runtime.mounted == 'python' %} +
+ Public MCP traffic is still on the Python path. The Rust sidecar is present internally for managed rollout or safe fallback operation. +
+ {% elif payload.mcp_runtime.mode == 'python-rust-built-disabled' %} +
+ This image includes Rust MCP artifacts, but the runtime is currently disabled and the public MCP path remains Python-only. +
+ {% endif %} +
+ -
+

-
+

-
+

diff --git a/mcpgateway/transports/rust_mcp_runtime_proxy.py b/mcpgateway/transports/rust_mcp_runtime_proxy.py new file mode 100644 index 0000000000..1da38e8e9c --- /dev/null +++ b/mcpgateway/transports/rust_mcp_runtime_proxy.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +"""Location: ./mcpgateway/transports/rust_mcp_runtime_proxy.py +Copyright 2025 +SPDX-License-Identifier: Apache-2.0 + +Experimental MCP transport proxy for the Rust runtime edge. + +This module keeps Python auth/path-rewrite middleware in front of MCP traffic +while proxying MCP transport requests to the optional Rust runtime sidecar. +""" + +# Future +from __future__ import annotations + +# Standard +import asyncio +import base64 +import logging +import re +from urllib.parse import urlsplit, urlunsplit + +# Third-Party +import httpx +import orjson +from starlette.types import Receive, Scope, Send + +# First-Party +from mcpgateway.config import settings +from mcpgateway.services.http_client_service import get_http_client, get_http_limits +from mcpgateway.transports.streamablehttp_transport import get_streamable_http_auth_context +from mcpgateway.utils.orjson_response import ORJSONResponse + +logger = logging.getLogger(__name__) + +_SERVER_ID_RE = re.compile(r"/servers/(?P[a-fA-F0-9\-]+)/mcp/?$") +_CONTEXTFORGE_SERVER_ID_HEADER = "x-contextforge-server-id" +_CONTEXTFORGE_AUTH_CONTEXT_HEADER = "x-contextforge-auth-context" +_CONTEXTFORGE_AFFINITY_FORWARDED_HEADER = "x-contextforge-affinity-forwarded" +_CLIENT_ERROR_DETAIL = "See server logs" +_REQUEST_HOP_BY_HOP_HEADERS = frozenset({"host", "content-length", "connection", "transfer-encoding", "keep-alive"}) +_FORWARDED_CHAIN_HEADERS = frozenset({"forwarded", "x-forwarded-for", "x-forwarded-host", "x-forwarded-port", "x-forwarded-proto"}) +_INTERNAL_ONLY_REQUEST_HEADERS = frozenset( + { + "x-forwarded-internally", + "x-original-worker", + "x-mcp-session-id", + "x-contextforge-mcp-runtime", + _CONTEXTFORGE_SERVER_ID_HEADER, + _CONTEXTFORGE_AUTH_CONTEXT_HEADER, + _CONTEXTFORGE_AFFINITY_FORWARDED_HEADER, + } +) +_RESPONSE_HOP_BY_HOP_HEADERS = frozenset({"connection", "transfer-encoding", "keep-alive"}) + + +class RustMCPRuntimeProxy: + """Proxy MCP transport traffic to the experimental Rust runtime.""" + + def __init__(self, python_fallback_app) -> None: + """Initialize the proxy with the existing Python MCP transport fallback. + + Args: + python_fallback_app: Python MCP transport app used when Rust cannot handle + the request. + """ + self.python_fallback_app = python_fallback_app + self._uds_client: httpx.AsyncClient | None = None + self._uds_client_lock = asyncio.Lock() + + async def handle_streamable_http(self, scope: Scope, receive: Receive, send: Send) -> None: + """Route MCP transport requests to the Rust runtime and preserve Python fallback for others. + + Args: + scope: Incoming ASGI scope. + receive: ASGI receive callable. + send: ASGI send callable. + """ + if scope.get("type") != "http": + await self.python_fallback_app(scope, receive, send) + return + + method = str(scope.get("method", "GET")).upper() + if method not in {"GET", "POST", "DELETE"}: + await self.python_fallback_app(scope, receive, send) + return + + target_url = _build_runtime_mcp_url(scope) + headers = _build_forward_headers(scope) + timeout = httpx.Timeout(settings.experimental_rust_mcp_runtime_timeout_seconds) + + try: + client = await self._get_runtime_client() + async with client.stream( + method, + target_url, + content=_stream_request_body(receive) if method == "POST" else b"", + headers=headers, + timeout=timeout, + follow_redirects=False, + ) as response: + await send( + { + "type": "http.response.start", + "status": response.status_code, + "headers": [(name, value) for name, value in response.headers.raw if name.decode("latin-1").lower() not in _RESPONSE_HOP_BY_HOP_HEADERS], + } + ) + async for chunk in response.aiter_bytes(): + if chunk: + await send({"type": "http.response.body", "body": chunk, "more_body": True}) + await send({"type": "http.response.body", "body": b"", "more_body": False}) + except httpx.HTTPError as exc: + logger.error("Experimental Rust MCP runtime request failed: %s", exc) + error_response = ORJSONResponse( + status_code=502, + content={ + "jsonrpc": "2.0", + "id": None, + "error": { + "code": -32000, + "message": "Experimental Rust MCP runtime unavailable", + "data": _CLIENT_ERROR_DETAIL, + }, + }, + ) + await error_response(scope, receive, send) + return + + async def _get_runtime_client(self) -> httpx.AsyncClient: + """Return the client used for Python -> Rust runtime proxying. + + Returns: + An async HTTP client configured for either UDS or loopback HTTP. + """ + uds_path = settings.experimental_rust_mcp_runtime_uds + if not uds_path: + return await get_http_client() + + if self._uds_client is not None: + return self._uds_client + + async with self._uds_client_lock: + if self._uds_client is None: + self._uds_client = httpx.AsyncClient( + transport=httpx.AsyncHTTPTransport(uds=uds_path), + limits=get_http_limits(), + timeout=httpx.Timeout(settings.experimental_rust_mcp_runtime_timeout_seconds), + follow_redirects=False, + ) + return self._uds_client + + +async def _stream_request_body(receive: Receive): + """Yield ASGI request body chunks without buffering the full request. + + Args: + receive: ASGI receive callable for the current request. + + Yields: + Raw request body chunks as they arrive from the client. + """ + while True: + message = await receive() + if message["type"] == "http.disconnect": + return + if message["type"] != "http.request": + continue + body = message.get("body", b"") + if body: + yield body + if not message.get("more_body", False): + return + + +def _extract_server_id_from_scope(scope: Scope) -> str | None: + """Extract server_id when the mounted MCP path came from /servers//mcp. + + Args: + scope: Incoming ASGI scope. + + Returns: + The matched server id, or ``None`` when the request is not server-scoped. + """ + modified_path = str(scope.get("modified_path") or scope.get("path") or "") + match = _SERVER_ID_RE.search(modified_path) + return match.group("server_id") if match else None + + +def _build_runtime_mcp_url(scope: Scope) -> str: + """Build the target Rust runtime /mcp URL, preserving the query string. + + Args: + scope: Incoming ASGI scope. + + Returns: + Absolute URL for the Rust sidecar MCP endpoint. + """ + base = urlsplit(settings.experimental_rust_mcp_runtime_url) + query_string = scope.get("query_string", b"") + query = query_string.decode("latin-1") if isinstance(query_string, (bytes, bytearray)) else str(query_string or "") + base_path = base.path.rstrip("/") + if not base_path: + target_path = "/mcp/" + elif base_path.endswith("/mcp"): + target_path = f"{base_path}/" + else: + target_path = f"{base_path}/mcp/" + merged_query = "&".join(part for part in (base.query, query) if part) + return urlunsplit((base.scheme, base.netloc, target_path, merged_query, "")) + + +def _build_forward_headers(scope: Scope) -> list[tuple[str, str]]: + """Forward request headers needed by the Rust runtime while stripping internal-only headers. + + Args: + scope: Incoming ASGI scope. + + Returns: + Header tuples safe to forward to the Rust sidecar. + """ + headers: list[tuple[str, str]] = [] + for item in scope.get("headers") or []: + if not isinstance(item, (tuple, list)) or len(item) != 2: + continue + name, value = item + if not isinstance(name, (bytes, bytearray)) or not isinstance(value, (bytes, bytearray)): + continue + header_name = name.decode("latin-1").lower() + if header_name in _REQUEST_HOP_BY_HOP_HEADERS or header_name in _FORWARDED_CHAIN_HEADERS or header_name in _INTERNAL_ONLY_REQUEST_HEADERS: + continue + headers.append((header_name, value.decode("latin-1"))) + + server_id = _extract_server_id_from_scope(scope) + if server_id: + headers.append((_CONTEXTFORGE_SERVER_ID_HEADER, server_id)) + + auth_context = _build_forwarded_auth_context_header() + if auth_context is not None: + headers.append((_CONTEXTFORGE_AUTH_CONTEXT_HEADER, auth_context)) + + client = scope.get("client") + client_host = client[0] if isinstance(client, (tuple, list)) and client else None + from_loopback = client_host in ("127.0.0.1", "::1") + incoming_headers = { + name.decode("latin-1").lower(): value.decode("latin-1") + for item in scope.get("headers") or [] + if isinstance(item, (tuple, list)) and len(item) == 2 + for name, value in [item] + if isinstance(name, (bytes, bytearray)) and isinstance(value, (bytes, bytearray)) + } + if from_loopback and incoming_headers.get("x-forwarded-internally") == "true": + headers.append((_CONTEXTFORGE_AFFINITY_FORWARDED_HEADER, "rust")) + + return headers + + +def _build_forwarded_auth_context_header() -> str | None: + """Serialize the authenticated MCP context for the trusted internal Python dispatcher. + + Returns: + Base64url-encoded auth context for trusted internal forwarding, or ``None`` + when no MCP auth context is available. + """ + auth_context = get_streamable_http_auth_context() + if not auth_context: + return None + encoded = base64.urlsafe_b64encode(orjson.dumps(auth_context)).decode("ascii") + return encoded.rstrip("=") diff --git a/mcpgateway/transports/streamablehttp_transport.py b/mcpgateway/transports/streamablehttp_transport.py index 0c6a6b3856..d38d4c3a54 100644 --- a/mcpgateway/transports/streamablehttp_transport.py +++ b/mcpgateway/transports/streamablehttp_transport.py @@ -38,6 +38,7 @@ from dataclasses import dataclass import re from typing import Any, AsyncGenerator, Dict, List, Optional, Pattern, Tuple, Union +from urllib.parse import urlsplit, urlunsplit from uuid import uuid4 # Third-Party @@ -64,7 +65,9 @@ from mcpgateway.db import SessionLocal from mcpgateway.middleware.rbac import _ACCESS_DENIED_MSG from mcpgateway.services.completion_service import CompletionService +from mcpgateway.services.http_client_service import get_http_client, get_http_limits from mcpgateway.services.logging_service import LoggingService +from mcpgateway.services.metrics import mcp_auth_cache_events_counter from mcpgateway.services.oauth_manager import OAuthEnforcementUnavailableError, OAuthRequiredError from mcpgateway.services.permission_service import PermissionService from mcpgateway.services.prompt_service import PromptService @@ -79,6 +82,19 @@ logging_service = LoggingService() logger = logging_service.get_logger(__name__) + +def _record_mcp_auth_cache_event(outcome: str) -> None: + """Best-effort Prometheus counter update for MCP auth cache flow. + + Args: + outcome: Cache-flow outcome label to emit. + """ + try: + mcp_auth_cache_events_counter.labels(outcome=outcome).inc() + except Exception: + pass # nosec B110 - Metrics must not break auth flow + + # Precompiled regex for server ID extraction from path _SERVER_ID_RE: Pattern[str] = re.compile(r"/servers/(?P[a-fA-F0-9\-]+)/mcp") @@ -98,6 +114,9 @@ user_context_var: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar("user_context", default={}) _oauth_checked_var: contextvars.ContextVar[bool] = contextvars.ContextVar("_oauth_checked", default=False) _shared_session_registry: Optional[Any] = None +_rust_event_store_client: Optional[httpx.AsyncClient] = None +_rust_event_store_client_lock = asyncio.Lock() +_RUST_EVENT_STORE_DEFAULT_KEY_PREFIX = "mcpgw:eventstore" # ------------------------------ Event store ------------------------------ @@ -396,6 +415,128 @@ async def replay_events_after( return last_event.stream_id +class RustEventStore(EventStore): + """Rust-backed event store that delegates resumable stream state to the sidecar.""" + + def __init__(self, max_events_per_stream: int = 100, ttl: int = 3600, key_prefix: str = _RUST_EVENT_STORE_DEFAULT_KEY_PREFIX): + """Initialize the Rust-backed event store wrapper. + + Args: + max_events_per_stream: Maximum number of events retained per stream. + ttl: Event retention time in seconds. + key_prefix: Redis key prefix shared with the Rust sidecar. + """ + self.max_events_per_stream = max_events_per_stream + self.ttl = ttl + self.key_prefix = key_prefix.rstrip(":") + + async def store_event(self, stream_id: StreamId, message: JSONRPCMessage | None) -> EventId: + """Store an event in the Rust-backed resumable event store. + + Args: + stream_id: Stream that owns the event. + message: JSON-RPC payload to persist for replay. + + Returns: + The generated event identifier returned by the Rust sidecar. + + Raises: + RuntimeError: If the Rust sidecar event store is unavailable or returns invalid data. + """ + client = await _get_rust_event_store_client() + message_dict = None if message is None else (message.model_dump() if hasattr(message, "model_dump") else dict(message)) + response = await client.post( + _build_rust_runtime_internal_url("/_internal/event-store/store"), + json={ + "streamId": stream_id, + "message": message_dict, + "keyPrefix": self.key_prefix, + "maxEventsPerStream": self.max_events_per_stream, + "ttlSeconds": self.ttl, + }, + timeout=httpx.Timeout(settings.experimental_rust_mcp_runtime_timeout_seconds), + follow_redirects=False, + ) + response.raise_for_status() + payload = response.json() + event_id = payload.get("eventId") + if not isinstance(event_id, str) or not event_id: + raise RuntimeError("Rust event store returned an invalid eventId") + return event_id + + async def replay_events_after(self, last_event_id: EventId, send_callback: EventCallback) -> Union[StreamId, None]: + """Replay events newer than ``last_event_id`` through the provided callback. + + Args: + last_event_id: Last event acknowledged by the reconnecting client. + send_callback: Callback invoked for each replayed event payload. + + Returns: + The associated stream identifier when replay succeeds, else ``None``. + """ + client = await _get_rust_event_store_client() + response = await client.post( + _build_rust_runtime_internal_url("/_internal/event-store/replay"), + json={ + "lastEventId": last_event_id, + "keyPrefix": self.key_prefix, + }, + timeout=httpx.Timeout(settings.experimental_rust_mcp_runtime_timeout_seconds), + follow_redirects=False, + ) + response.raise_for_status() + payload = response.json() + stream_id = payload.get("streamId") + if not isinstance(stream_id, str) or not stream_id: + return None + for event in payload.get("events", []): + if not isinstance(event, dict): + continue + await send_callback(event.get("message")) + return stream_id + + +async def _get_rust_event_store_client() -> httpx.AsyncClient: + """Return the HTTP client used for Python -> Rust event-store calls. + + Returns: + An async HTTP client configured for Rust event-store access. + """ + global _rust_event_store_client # pylint: disable=global-statement + + uds_path = settings.experimental_rust_mcp_runtime_uds + if not uds_path: + return await get_http_client() + + if _rust_event_store_client is not None: + return _rust_event_store_client + + async with _rust_event_store_client_lock: + if _rust_event_store_client is None: + _rust_event_store_client = httpx.AsyncClient( + transport=httpx.AsyncHTTPTransport(uds=uds_path), + limits=get_http_limits(), + timeout=httpx.Timeout(settings.experimental_rust_mcp_runtime_timeout_seconds), + follow_redirects=False, + ) + return _rust_event_store_client + + +def _build_rust_runtime_internal_url(path: str) -> str: + """Build a Rust sidecar internal URL for UDS or loopback HTTP transport. + + Args: + path: Internal Rust runtime path to append to the configured base URL. + + Returns: + Absolute URL targeting the Rust sidecar over HTTP or UDS-backed transport. + """ + base = urlsplit(settings.experimental_rust_mcp_runtime_url) + base_path = base.path.rstrip("/") + target_path = f"{base_path}{path}" if base_path else path + return urlunsplit((base.scheme, base.netloc, target_path, "", "")) + + # ------------------------------ Streamable HTTP Transport ------------------------------ @@ -655,6 +796,29 @@ def _check_scoped_permission(user_context: dict[str, Any], permission: str) -> b return allowed +def _check_any_team_for_server_scoped_rbac(user_context: dict[str, Any] | None, server_id: str | None) -> bool: + """Return whether Streamable HTTP RBAC should check across team-scoped roles. + + Server-scoped MCP routes (``/servers//mcp``) should authorize team-bound + callers against the specific virtual server context. Session tokens already do + this via ``check_any_team=True`` because they have no single explicit team_id. + Team-scoped API tokens need the same treatment on server-scoped routes; otherwise + they are evaluated only in global scope and incorrectly denied. + + Args: + user_context: Current authenticated MCP user context, if any. + server_id: Effective virtual server identifier for the MCP request. + + Returns: + ``True`` when RBAC should search across the caller's token teams. + """ + if not user_context: + return False + if user_context.get("token_use") == "session": + return True + return bool(server_id) and bool(user_context.get("teams")) + + def set_shared_session_registry(session_registry: Any) -> None: """Set the process-wide session registry used by Streamable HTTP helpers. @@ -723,6 +887,9 @@ async def _validate_streamable_session_access( if not _should_enforce_streamable_rbac(user_context): return True, 200, "" + if isinstance(user_context, dict) and user_context.get("_rust_session_validated") is True: + return True, 200, "" + # Initialize establishes a new session and is authorized separately. if (rpc_method or "").strip() == "initialize": return True, 200, "" @@ -996,11 +1163,10 @@ async def call_tool(name: str, arguments: dict) -> Union[ # Layer 2: RBAC check # Session tokens have no explicit team_id; check across all team-scoped roles. # Mirrors the @require_permission decorator's check_any_team fallback (rbac.py:562-576). - _is_session_token = user_context.get("token_use") == "session" has_execute_permission = await _check_streamable_permission( user_context=user_context, permission="tools.execute", - check_any_team=_is_session_token, + check_any_team=_check_any_team_for_server_scoped_rbac(user_context, server_id), ) if not has_execute_permission: raise PermissionError(_ACCESS_DENIED_MSG) @@ -2057,14 +2223,13 @@ async def set_logging_level(level: types.LoggingLevel) -> types.EmptyResult: if _should_enforce_streamable_rbac(user_context): # Layer 1: Token scope cap - # MCP logging/setLevel is a standard MCP capability invoked by clients during - # initialization; servers.use (not admin.system_config) keeps the handshake working. - if not _check_scoped_permission(user_context, "servers.use"): + if not _check_scoped_permission(user_context, "admin.system_config"): raise PermissionError(_ACCESS_DENIED_MSG) # Layer 2: RBAC check has_permission = await _check_streamable_permission( user_context=user_context, - permission="servers.use", + permission="admin.system_config", + check_any_team=_check_any_team_for_server_scoped_rbac(user_context, server_id), ) if not has_permission: raise PermissionError(_ACCESS_DENIED_MSG) @@ -2229,8 +2394,14 @@ def __init__(self) -> None: """ if settings.use_stateful_sessions: + if settings.experimental_rust_mcp_runtime_enabled and settings.experimental_rust_mcp_session_auth_reuse_enabled and settings.experimental_rust_mcp_event_store_enabled: + event_store = RustEventStore( + max_events_per_stream=settings.streamable_http_max_events_per_stream, + ttl=settings.streamable_http_event_ttl, + ) + logger.debug("Using RustEventStore for stateful sessions") # Use Redis event store for single-worker stateful deployments - if settings.cache_type == "redis" and settings.redis_url: + elif settings.cache_type == "redis" and settings.redis_url: event_store = RedisEventStore(max_events_per_stream=settings.streamable_http_max_events_per_stream, ttl=settings.streamable_http_event_ttl) logger.debug("Using RedisEventStore for stateful sessions (single-worker)") else: @@ -2361,11 +2532,11 @@ async def handle_streamable_http(self, scope: Scope, receive: Receive, send: Sen # This mirrors /servers/{id}/sse and /servers/{id}/message guards. user_context = user_context_var.get() if match and _should_enforce_streamable_rbac(user_context): - _is_session = user_context.get("token_use") == "session" if user_context else False + _server_id = match.group("server_id") has_server_access = await _check_streamable_permission( user_context=user_context, permission="servers.use", - check_any_team=_is_session, + check_any_team=_check_any_team_for_server_scoped_rbac(user_context, _server_id), ) if not has_server_access: response = ORJSONResponse( @@ -2766,10 +2937,47 @@ def _set_proxy_user_context(proxy_user: str) -> None: "teams": [], "is_authenticated": True, "is_admin": False, + "permission_is_admin": False, } ) +def get_streamable_http_auth_context() -> dict[str, Any]: + """Return the current StreamableHTTP auth context for trusted internal forwarding. + + The Rust MCP proxy uses this to carry already-authenticated MCP request context + across the Python -> Rust -> Python seam so the internal dispatcher does not + need to repeat JWT verification and team normalization on the hot path. + + Returns: + A shallow copy of the trusted auth context fields that may be forwarded + across the internal MCP seam. + """ + user_context = user_context_var.get() + if not isinstance(user_context, dict): + return {} + + forwarded: dict[str, Any] = {} + for key in ( + "email", + "teams", + "is_authenticated", + "is_admin", + "token_use", + "permission_is_admin", + "scoped_permissions", + "scoped_server_id", + ): + if key not in user_context: + continue + value = user_context[key] + if isinstance(value, list): + forwarded[key] = list(value) + else: + forwarded[key] = value + return forwarded + + class _StreamableHttpAuthHandler: """Per-request handler that authenticates MCP StreamableHTTP requests. @@ -2901,6 +3109,7 @@ async def _auth_no_token(self, *, path: str, bearer_header_supplied: bool) -> bo "teams": [], # Empty list = public-only access "is_authenticated": False, "is_admin": False, + "permission_is_admin": False, } ) return True # Allow request to proceed with public-only access @@ -2920,23 +3129,109 @@ async def _auth_jwt(self, *, token: str) -> bool: if not isinstance(user_payload, dict): return True - jti = user_payload.get("jti") - if jti: - # First-Party - from mcpgateway.auth import _check_token_revoked_sync # pylint: disable=import-outside-toplevel + # First-Party + from mcpgateway.auth import _get_auth_context_batched_sync # pylint: disable=import-outside-toplevel + from mcpgateway.cache.auth_cache import CachedAuthContext, get_auth_cache # pylint: disable=import-outside-toplevel + jti = user_payload.get("jti") + user_email = user_payload.get("sub") or user_payload.get("email") + nested_user = user_payload.get("user", {}) + nested_is_admin = nested_user.get("is_admin", False) if isinstance(nested_user, dict) else False + is_admin = user_payload.get("is_admin", False) or nested_is_admin + token_use = user_payload.get("token_use") + db_user_is_admin = False + user_record = None + auth_cache = get_auth_cache() if settings.auth_cache_enabled else None + cached_ctx: CachedAuthContext | None = None + batched_auth_ctx: dict[str, Any] | None = None + cached_team_ids: list[str] | None = None + platform_admin_email = getattr(settings, "platform_admin_email", "admin@example.com") + + if user_email and auth_cache is not None: try: - is_revoked = await asyncio.to_thread(_check_token_revoked_sync, jti) - except Exception as exc: - logger.warning("MCP token revocation check failed for jti=%s; allowing request (fail-open): %s", jti, exc) - is_revoked = False - if is_revoked: - return await self._send_error(detail="Token has been revoked", headers={"WWW-Authenticate": "Bearer"}) + cached_ctx = await auth_cache.get_auth_context(user_email, jti) + if cached_ctx is not None: + _record_mcp_auth_cache_event("auth_context_hit") + if cached_ctx.is_token_revoked: + _record_mcp_auth_cache_event("auth_context_hit_revoked") + return await self._send_error(detail="Token has been revoked", headers={"WWW-Authenticate": "Bearer"}) + + cached_user = cached_ctx.user + if cached_user and not cached_user.get("is_active", True): + _record_mcp_auth_cache_event("auth_context_hit_inactive") + return await self._send_error(detail="Account disabled", headers={"WWW-Authenticate": "Bearer"}) + + if cached_user: + db_user_is_admin = bool(cached_user.get("is_admin", False)) + elif settings.require_user_in_db and user_email != platform_admin_email: + return await self._send_error(detail="User not found in database", headers={"WWW-Authenticate": "Bearer"}) + + if token_use == "session" and not is_admin: # nosec B105 - token_use is a JWT claim type, not a password + cached_team_ids = await auth_cache.get_user_teams(f"{user_email}:True") + if cached_team_ids is not None: + _record_mcp_auth_cache_event("teams_cache_hit") + else: + _record_mcp_auth_cache_event("auth_context_miss") + except HTTPException: + raise + except Exception as cache_error: + _record_mcp_auth_cache_event("auth_context_cache_error") + logger.debug("MCP auth cache lookup failed for %s: %s", user_email, cache_error) + cached_ctx = None + + if user_email and cached_ctx is None and settings.auth_cache_batch_queries: + try: + batched_auth_ctx = await asyncio.to_thread(_get_auth_context_batched_sync, user_email, jti) + _record_mcp_auth_cache_event("auth_context_batch_hit") + if batched_auth_ctx.get("is_token_revoked", False): + _record_mcp_auth_cache_event("auth_context_batch_revoked") + return await self._send_error(detail="Token has been revoked", headers={"WWW-Authenticate": "Bearer"}) + + cached_user = batched_auth_ctx.get("user") + if cached_user and not cached_user.get("is_active", True): + _record_mcp_auth_cache_event("auth_context_batch_inactive") + return await self._send_error(detail="Account disabled", headers={"WWW-Authenticate": "Bearer"}) - user_email = user_payload.get("sub") or user_payload.get("email") - if user_email: + if cached_user: + db_user_is_admin = bool(cached_user.get("is_admin", False)) + elif settings.require_user_in_db and user_email != platform_admin_email: + return await self._send_error(detail="User not found in database", headers={"WWW-Authenticate": "Bearer"}) + + if auth_cache is not None: + await auth_cache.set_auth_context( + user_email, + jti, + CachedAuthContext( + user=cached_user, + personal_team_id=batched_auth_ctx.get("personal_team_id"), + is_token_revoked=bool(batched_auth_ctx.get("is_token_revoked", False)), + ), + ) + if token_use == "session" and not is_admin: # nosec B105 - token_use is a JWT claim type, not a password + cached_team_ids = list(batched_auth_ctx.get("team_ids") or []) + await auth_cache.set_user_teams(f"{user_email}:True", cached_team_ids) + _record_mcp_auth_cache_event("teams_batch_hit") + except HTTPException: + raise + except Exception as batch_error: + _record_mcp_auth_cache_event("auth_context_batch_error") + logger.warning("Batched MCP auth lookup failed for user=%s; falling back to individual checks: %s", user_email, batch_error) + batched_auth_ctx = None + + if user_email and cached_ctx is None and batched_auth_ctx is None: + _record_mcp_auth_cache_event("auth_context_fallback") # First-Party - from mcpgateway.auth import _get_user_by_email_sync # pylint: disable=import-outside-toplevel + from mcpgateway.auth import _check_token_revoked_sync, _get_user_by_email_sync # pylint: disable=import-outside-toplevel + + is_revoked = False + if jti: + try: + is_revoked = await asyncio.to_thread(_check_token_revoked_sync, jti) + except Exception as exc: + logger.warning("MCP token revocation check failed for jti=%s; allowing request (fail-open): %s", jti, exc) + is_revoked = False + if is_revoked: + return await self._send_error(detail="Token has been revoked", headers={"WWW-Authenticate": "Bearer"}) user_lookup_succeeded = True try: @@ -2949,23 +3244,61 @@ async def _auth_jwt(self, *, token: str) -> bool: if user_lookup_succeeded: if user_record and not getattr(user_record, "is_active", True): return await self._send_error(detail="Account disabled", headers={"WWW-Authenticate": "Bearer"}) - if user_record is None and settings.require_user_in_db and user_email != getattr(settings, "platform_admin_email", "admin@example.com"): + if user_record: + db_user_is_admin = bool(getattr(user_record, "is_admin", False)) + if user_record is None and settings.require_user_in_db and user_email != platform_admin_email: return await self._send_error(detail="User not found in database", headers={"WWW-Authenticate": "Bearer"}) - # Resolve teams based on token_use claim - token_use = user_payload.get("token_use") + if auth_cache is not None: + try: + await auth_cache.set_auth_context( + user_email, + jti, + CachedAuthContext( + user=( + { + "email": user_record.email, + "password_hash": user_record.password_hash, + "full_name": user_record.full_name, + "is_admin": bool(user_record.is_admin), + "is_active": bool(user_record.is_active), + "auth_provider": user_record.auth_provider, + "password_change_required": bool(user_record.password_change_required), + "email_verified_at": user_record.email_verified_at, + "created_at": user_record.created_at, + "updated_at": user_record.updated_at, + } + if user_record is not None + else None + ), + personal_team_id=None, + is_token_revoked=is_revoked, + ), + ) + except Exception as cache_set_error: + logger.debug("Failed to cache MCP auth context for %s: %s", user_email, cache_set_error) + if token_use == "session": # nosec B105 - Not a password; token_use is a JWT claim type # Session token: resolve teams from DB/cache - user_email_for_teams = user_payload.get("sub") or user_payload.get("email") - is_admin_flag = user_payload.get("is_admin", False) or user_payload.get("user", {}).get("is_admin", False) - if is_admin_flag: + if is_admin: final_teams = None # Admin bypass - elif user_email_for_teams: - # Resolve teams synchronously with L1 cache (StreamableHTTP uses sync context) - # First-Party - from mcpgateway.auth import _resolve_teams_from_db_sync # pylint: disable=import-outside-toplevel - - final_teams = _resolve_teams_from_db_sync(user_email_for_teams, is_admin=False) + elif user_email: + if cached_team_ids is not None: + final_teams = cached_team_ids + elif batched_auth_ctx is not None: + final_teams = list(batched_auth_ctx.get("team_ids") or []) + else: + _record_mcp_auth_cache_event("teams_db_resolve") + # Resolve teams synchronously with L1 cache (StreamableHTTP uses sync context) + # First-Party + from mcpgateway.auth import _resolve_teams_from_db_sync # pylint: disable=import-outside-toplevel + + final_teams = _resolve_teams_from_db_sync(user_email, is_admin=False) + if auth_cache is not None and final_teams is not None: + try: + await auth_cache.set_user_teams(f"{user_email}:True", final_teams) + except Exception as cache_set_error: + logger.debug("Failed to cache MCP teams list for %s: %s", user_email, cache_set_error) else: final_teams = [] # No email — public-only else: @@ -2979,8 +3312,6 @@ async def _auth_jwt(self, *, token: str) -> bool: # SECURITY: Validate team membership for team-scoped tokens # Users removed from a team should lose MCP access immediately, not at token expiry # ═══════════════════════════════════════════════════════════════════════════ - is_admin = user_payload.get("is_admin", False) or user_payload.get("user", {}).get("is_admin", False) - # Only validate membership for team-scoped tokens (non-empty teams list) # Skip for: public-only tokens ([]), admin unrestricted tokens (None) if final_teams and len(final_teams) > 0 and user_email: @@ -2997,10 +3328,12 @@ async def _auth_jwt(self, *, token: str) -> bool: # Check cache first (60s TTL) cached_result = auth_cache.get_team_membership_valid_sync(user_email, final_teams) if cached_result is False: + _record_mcp_auth_cache_event("team_membership_cache_reject") logger.warning("MCP auth rejected: User %s no longer member of teams (cached)", user_email) return await self._send_error(detail="Token invalid: User is no longer a member of the associated team", status_code=HTTP_403_FORBIDDEN) if cached_result is None: + _record_mcp_auth_cache_event("team_membership_cache_miss") # Cache miss - query database with SessionLocal() as db: memberships = ( @@ -3025,12 +3358,15 @@ async def _auth_jwt(self, *, token: str) -> bool: # Cache positive result auth_cache.set_team_membership_valid_sync(user_email, final_teams, True) + else: + _record_mcp_auth_cache_event("team_membership_cache_hit") auth_user_ctx: dict[str, Any] = { "email": user_email, "teams": final_teams, "is_authenticated": True, "is_admin": is_admin, + "permission_is_admin": db_user_is_admin or is_admin, "token_use": token_use, # propagated for downstream RBAC (check_any_team) } # Extract scoped permissions from JWT for per-method enforcement @@ -3038,6 +3374,9 @@ async def _auth_jwt(self, *, token: str) -> bool: jwt_scoped_perms = jwt_scopes.get("permissions") or [] if isinstance(jwt_scopes, dict) else [] if jwt_scoped_perms: auth_user_ctx["scoped_permissions"] = jwt_scoped_perms + scoped_server_id = jwt_scopes.get("server_id") if isinstance(jwt_scopes, dict) else None + if isinstance(scoped_server_id, str) and scoped_server_id: + auth_user_ctx["scoped_server_id"] = scoped_server_id user_context_var.set(auth_user_ctx) except HTTPException: # JWT verification failed (expired, malformed, bad signature, etc.) diff --git a/mcpgateway/version.py b/mcpgateway/version.py index dd6e670bfe..9dee323947 100644 --- a/mcpgateway/version.py +++ b/mcpgateway/version.py @@ -103,6 +103,314 @@ router = APIRouter(tags=["meta"]) +def _env_flag(name: str, default: bool = False) -> bool: + """Read a boolean environment variable using common truthy spellings. + + Args: + name: Environment variable name. + default: Default value used when the variable is unset. + + Returns: + Parsed boolean value. + """ + value = os.getenv(name) + if value is None: + return default + return value.strip().lower() in {"1", "true", "yes", "on"} + + +def _rust_build_included() -> bool: + """Return whether the current image includes Rust MCP artifacts. + + Returns: + ``True`` when the current image contains the Rust MCP binaries/plugins. + """ + return _env_flag("CONTEXTFORGE_ENABLE_RUST_BUILD", default=False) + + +def _rust_runtime_managed() -> bool: + """Return whether the gateway expects to manage the Rust MCP sidecar locally. + + Returns: + ``True`` when the gateway should launch and supervise the Rust sidecar. + """ + return _env_flag("EXPERIMENTAL_RUST_MCP_RUNTIME_MANAGED", default=True) + + +def _current_mcp_transport_mount() -> str: + """Return which public ``/mcp`` transport is currently mounted. + + Returns: + Runtime label identifying the currently mounted public MCP transport. + """ + return "rust" if _should_mount_public_rust_transport() else "python" + + +def _should_mount_public_rust_transport() -> bool: + """Return whether public ``/mcp`` should be served directly by Rust. + + Returns: + ``True`` only when the Rust runtime is enabled and Rust can safely own + steady-state public MCP session traffic. + """ + return bool(settings.experimental_rust_mcp_runtime_enabled and settings.experimental_rust_mcp_session_auth_reuse_enabled) + + +def _should_use_rust_public_session_stack() -> bool: + """Return whether Rust should own the effective public MCP session stack. + + Returns: + ``True`` only when the public MCP transport and session semantics should + stay on the Rust-backed path. + """ + return _should_mount_public_rust_transport() + + +def _current_mcp_runtime_mode() -> str: + """Return the current MCP runtime mode label used for health and UI surfaces. + + Returns: + Human-readable runtime mode label for diagnostics and UI reporting. + """ + if settings.experimental_rust_mcp_runtime_enabled: + return "rust-managed" if _rust_runtime_managed() else "rust-external" + if _rust_build_included(): + return "python-rust-built-disabled" + return "python" + + +def _current_mcp_session_core_mode() -> str: + """Return which runtime currently owns MCP session metadata. + + Returns: + ``"rust"`` when the Rust session core is enabled, otherwise ``"python"``. + """ + if _should_use_rust_public_session_stack() and settings.experimental_rust_mcp_session_core_enabled: + return "rust" + return "python" + + +def _current_mcp_event_store_mode() -> str: + """Return which runtime currently owns MCP resumable event-store semantics. + + Returns: + ``"rust"`` when the Rust event store is enabled, otherwise ``"python"``. + """ + if _should_use_rust_public_session_stack() and settings.experimental_rust_mcp_event_store_enabled: + return "rust" + return "python" + + +def _current_mcp_resume_core_mode() -> str: + """Return which runtime currently owns public MCP replay/resume behavior. + + Returns: + ``"rust"`` when Rust owns replay/resume, otherwise ``"python"``. + """ + if ( + _should_use_rust_public_session_stack() + and settings.experimental_rust_mcp_session_core_enabled + and settings.experimental_rust_mcp_event_store_enabled + and settings.experimental_rust_mcp_resume_core_enabled + ): + return "rust" + return "python" + + +def _current_mcp_live_stream_core_mode() -> str: + """Return which runtime currently owns non-resume public GET ``/mcp`` SSE behavior. + + Returns: + ``"rust"`` when Rust owns live GET ``/mcp`` streaming, otherwise ``"python"``. + """ + if _should_use_rust_public_session_stack() and settings.experimental_rust_mcp_live_stream_core_enabled: + return "rust" + return "python" + + +def _current_mcp_affinity_core_mode() -> str: + """Return which runtime currently owns MCP multi-worker session-affinity forwarding. + + Returns: + ``"rust"`` when Rust owns session-affinity forwarding, otherwise ``"python"``. + """ + if _should_use_rust_public_session_stack() and settings.experimental_rust_mcp_affinity_core_enabled: + return "rust" + return "python" + + +def _current_mcp_session_auth_reuse_mode() -> str: + """Return which runtime currently owns MCP session-bound auth-context reuse. + + Returns: + ``"rust"`` when Rust session auth reuse is enabled, otherwise ``"python"``. + """ + if settings.experimental_rust_mcp_runtime_enabled and settings.experimental_rust_mcp_session_auth_reuse_enabled: + return "rust" + return "python" + + +def _mcp_runtime_status_payload() -> Dict[str, Any]: + """Return MCP runtime diagnostics for health, UI, and version surfaces. + + Returns: + Diagnostic payload describing the active MCP runtime configuration. + """ + payload: Dict[str, Any] = { + "mode": _current_mcp_runtime_mode(), + "mounted": _current_mcp_transport_mount(), + "rust_build_included": _rust_build_included(), + "rust_runtime_enabled": settings.experimental_rust_mcp_runtime_enabled, + "session_core_mode": _current_mcp_session_core_mode(), + "event_store_mode": _current_mcp_event_store_mode(), + "resume_core_mode": _current_mcp_resume_core_mode(), + "live_stream_core_mode": _current_mcp_live_stream_core_mode(), + "affinity_core_mode": _current_mcp_affinity_core_mode(), + "session_auth_reuse_mode": _current_mcp_session_auth_reuse_mode(), + "rust_session_core_enabled": bool(_should_use_rust_public_session_stack() and settings.experimental_rust_mcp_session_core_enabled), + "rust_event_store_enabled": bool(_should_use_rust_public_session_stack() and settings.experimental_rust_mcp_event_store_enabled), + "rust_resume_core_enabled": bool( + _should_use_rust_public_session_stack() + and settings.experimental_rust_mcp_session_core_enabled + and settings.experimental_rust_mcp_event_store_enabled + and settings.experimental_rust_mcp_resume_core_enabled + ), + "rust_live_stream_core_enabled": bool(_should_use_rust_public_session_stack() and settings.experimental_rust_mcp_live_stream_core_enabled), + "rust_affinity_core_enabled": bool(_should_use_rust_public_session_stack() and settings.experimental_rust_mcp_affinity_core_enabled), + "rust_session_auth_reuse_enabled": bool(settings.experimental_rust_mcp_runtime_enabled and settings.experimental_rust_mcp_session_auth_reuse_enabled), + } + + if settings.experimental_rust_mcp_runtime_enabled: + payload["rust_runtime_managed"] = _rust_runtime_managed() + if settings.experimental_rust_mcp_runtime_uds: + payload["sidecar_transport"] = "uds" + payload["sidecar_target"] = settings.experimental_rust_mcp_runtime_uds + else: + payload["sidecar_transport"] = "http" + payload["sidecar_target"] = settings.experimental_rust_mcp_runtime_url + + return payload + + +def rust_build_included() -> bool: + """Return whether the current image includes Rust MCP artifacts. + + Returns: + ``True`` when the current image contains the Rust MCP binaries/plugins. + """ + return _rust_build_included() + + +def rust_runtime_managed() -> bool: + """Return whether the gateway expects to manage the Rust MCP sidecar locally. + + Returns: + ``True`` when the gateway should launch and supervise the Rust sidecar. + """ + return _rust_runtime_managed() + + +def current_mcp_transport_mount() -> str: + """Return which public ``/mcp`` transport is currently mounted. + + Returns: + Runtime label identifying the currently mounted public MCP transport. + """ + return _current_mcp_transport_mount() + + +def should_mount_public_rust_transport() -> bool: + """Return whether public ``/mcp`` should be served directly by Rust. + + Returns: + ``True`` only when the Rust runtime is enabled and Rust can safely own + steady-state public MCP session traffic. + """ + return _should_mount_public_rust_transport() + + +def should_use_rust_public_session_stack() -> bool: + """Return whether Rust should own the effective public MCP session stack. + + Returns: + ``True`` only when the public MCP transport and session semantics should + stay on the Rust-backed path. + """ + return _should_use_rust_public_session_stack() + + +def current_mcp_runtime_mode() -> str: + """Return the current MCP runtime mode label used for health and UI surfaces. + + Returns: + Human-readable runtime mode label for diagnostics and UI reporting. + """ + return _current_mcp_runtime_mode() + + +def current_mcp_session_core_mode() -> str: + """Return which runtime currently owns MCP session metadata. + + Returns: + ``"rust"`` when the Rust session core is enabled, otherwise ``"python"``. + """ + return _current_mcp_session_core_mode() + + +def current_mcp_event_store_mode() -> str: + """Return which runtime currently owns MCP resumable event-store semantics. + + Returns: + ``"rust"`` when the Rust event store is enabled, otherwise ``"python"``. + """ + return _current_mcp_event_store_mode() + + +def current_mcp_resume_core_mode() -> str: + """Return which runtime currently owns public MCP replay/resume behavior. + + Returns: + ``"rust"`` when Rust owns replay/resume, otherwise ``"python"``. + """ + return _current_mcp_resume_core_mode() + + +def current_mcp_live_stream_core_mode() -> str: + """Return which runtime currently owns non-resume public GET ``/mcp`` SSE behavior. + + Returns: + ``"rust"`` when Rust owns live GET ``/mcp`` streaming, otherwise ``"python"``. + """ + return _current_mcp_live_stream_core_mode() + + +def current_mcp_affinity_core_mode() -> str: + """Return which runtime currently owns MCP multi-worker session-affinity forwarding. + + Returns: + ``"rust"`` when Rust owns session-affinity forwarding, otherwise ``"python"``. + """ + return _current_mcp_affinity_core_mode() + + +def current_mcp_session_auth_reuse_mode() -> str: + """Return which runtime currently owns MCP session-bound auth-context reuse. + + Returns: + ``"rust"`` when Rust session auth reuse is enabled, otherwise ``"python"``. + """ + return _current_mcp_session_auth_reuse_mode() + + +def mcp_runtime_status_payload() -> Dict[str, Any]: + """Return MCP runtime diagnostics for health, UI, and version surfaces. + + Returns: + Diagnostic payload describing the active MCP runtime configuration. + """ + return _mcp_runtime_status_payload() + + def _is_secret(key: str) -> bool: """Identify if an environment variable key likely represents a secret. @@ -563,6 +871,7 @@ def _build_payload( "metrics_cleanup_enabled": getattr(settings, "metrics_cleanup_enabled", True), "metrics_rollup_enabled": getattr(settings, "metrics_rollup_enabled", True), }, + "mcp_runtime": _mcp_runtime_status_payload(), "env": _public_env(), "system": _system_metrics(), } @@ -629,6 +938,7 @@ def _render_html(payload: Dict[str, Any]) -> str: ... "database": {"dialect": "sqlite", "reachable": True}, ... "redis": {"available": False}, ... "settings": {"cache_type": "memory"}, + ... "mcp_runtime": {"mode": "python", "mounted": "python"}, ... "system": {"cpu_count": 4}, ... "env": {"PATH": "/usr/bin"} ... } @@ -646,6 +956,8 @@ def _render_html(payload: Dict[str, Any]) -> str: True >>> '

Database

' in html True + >>> '

MCP Runtime

' in html + True >>> '