From 076b3dc63061071b6d43d118a5181b19f20d698c Mon Sep 17 00:00:00 2001 From: Jonathan Springer Date: Thu, 9 Oct 2025 18:36:27 +0100 Subject: [PATCH 1/3] Fix invalid configuration --- src/databeak/server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/databeak/server.py b/src/databeak/server.py index 3dfc49a..40aef3f 100644 --- a/src/databeak/server.py +++ b/src/databeak/server.py @@ -14,7 +14,6 @@ # This module will tweak the JSON schema validator to accept relaxed types from databeak.core.json_schema_validate import initialize_relaxed_validation -from databeak.core.settings import DatabeakSettings # Local imports from databeak.servers.column_server import column_server @@ -94,7 +93,7 @@ def data_cleaning_prompt(session_id: str) -> str: # ============================================================================ -@smithery.server(config_schema=DatabeakSettings) +@smithery.server() def create_server() -> FastMCP: """Create and return the FastMCP server instance.""" # Initialize FastMCP server From 38ba4ed5d4bc10eef4762b1af56352e78b57eafe Mon Sep 17 00:00:00 2001 From: Jonathan Springer Date: Fri, 10 Oct 2025 19:25:02 +0100 Subject: [PATCH 2/3] fix: smithery OIDC authentication --- README.md | 38 ++++++--- docs/installation.md | 83 ++++++++++--------- src/databeak/server.py | 43 +++++++++- .../servers/test_discovery_server_coverage.py | 28 +++---- 4 files changed, 130 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 14e67bd..82b5113 100644 --- a/README.md +++ b/README.md @@ -40,11 +40,7 @@ Add this to your MCP Settings file: "mcpServers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ] + "args": ["databeak"] } } } @@ -58,14 +54,18 @@ specific configuration examples. ### HTTP Mode (Advanced) -For HTTP-based AI clients or custom deployments: +For HTTP-based AI clients or custom deployments with OIDC authentication: ```bash # Run in HTTP mode -uv run databeak --transport http --host 0.0.0.0 --port 8000 - -# Access server at http://localhost:8000/mcp -# Health check at http://localhost:8000/health +uvx databeak --transport http --host 0.0.0.0 --port 8000 + +# With OIDC authentication +export DATABEAK_OIDC_CONFIG_URL="https://auth.example.com/.well-known/openid-configuration" +export DATABEAK_OIDC_CLIENT_ID="databeak-client" +export DATABEAK_OIDC_CLIENT_SECRET="your-secret" +export DATABEAK_OIDC_BASE_URL="https://databeak.example.com" +uvx databeak --transport http --host 0.0.0.0 --port 8000 ``` ### Quick Test @@ -97,6 +97,8 @@ Once configured, ask your AI assistant: Configure DataBeak behavior with environment variables (all use `DATABEAK_` prefix): +### Core Configuration + | Variable | Default | Description | | ------------------------------------- | --------- | ---------------------------------- | | `DATABEAK_SESSION_TIMEOUT` | 3600 | Session timeout (seconds) | @@ -106,6 +108,22 @@ prefix): | `DATABEAK_URL_TIMEOUT_SECONDS` | 30 | URL download timeout | | `DATABEAK_HEALTH_MEMORY_THRESHOLD_MB` | 2048 | Health monitoring memory threshold | +### Authentication (HTTP Mode Only) + +For HTTP mode deployments, DataBeak supports OpenID Connect authentication. All +four variables must be set to enable OIDC (not applicable for stdio mode): + +| Variable | Required | Description | +| ----------------------------- | -------- | -------------------------------- | +| `DATABEAK_OIDC_CONFIG_URL` | Yes | OIDC discovery configuration URL | +| `DATABEAK_OIDC_CLIENT_ID` | Yes | OAuth2 client ID | +| `DATABEAK_OIDC_CLIENT_SECRET` | Yes | OAuth2 client secret | +| `DATABEAK_OIDC_BASE_URL` | Yes | Application base URL for OAuth2 | + +**Note**: OIDC authentication is only used in HTTP transport mode. If any OIDC +variable is set but not all four, DataBeak will log an error and authentication +will not be enabled. + See [settings.py](src/databeak/core/settings.py) for complete configuration options. diff --git a/docs/installation.md b/docs/installation.md index 207b23f..c4a7e8f 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -21,8 +21,8 @@ and client configuration. The fastest way to install and run DataBeak: ```bash -# Install and run directly from GitHub -uvx --from git+https://github.com/jonpspri/databeak.git databeak +# Install and run from PyPI +uvx databeak ``` ### Using uv @@ -47,8 +47,8 @@ uv run databeak ### Using pip ```bash -# Install directly from GitHub -pip install git+https://github.com/jonpspri/databeak.git +# Install from PyPI +pip install databeak # Run the server databeak @@ -68,15 +68,10 @@ Settings): "mcpServers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ], + "args": ["databeak"], "env": { - "DATABEAK_MAX_FILE_SIZE_MB": "2048", - "DATABEAK_SESSION_TIMEOUT": "7200", - "DATABEAK_CHUNK_SIZE": "20000" + "DATABEAK_MAX_DOWNLOAD_SIZE_MB": "200", + "DATABEAK_SESSION_TIMEOUT": "7200" } } } @@ -92,11 +87,7 @@ Edit `~/.continue/config.json`: "mcpServers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ] + "args": ["databeak"] } } } @@ -111,11 +102,7 @@ Add to VS Code settings (`settings.json`): "cline.mcpServers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ] + "args": ["databeak"] } } } @@ -130,11 +117,7 @@ Edit `~/.windsurf/mcp_servers.json`: "mcpServers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ] + "args": ["databeak"] } } } @@ -149,11 +132,7 @@ Edit `~/.config/zed/settings.json`: "experimental.mcp_servers": { "databeak": { "command": "uvx", - "args": [ - "--from", - "git+https://github.com/jonpspri/databeak.git", - "databeak" - ] + "args": ["databeak"] } } } @@ -163,6 +142,8 @@ Edit `~/.config/zed/settings.json`: Configure DataBeak behavior with these environment variables: +### Core Configuration + | Variable | Default | Description | | --------------------------------------------- | ------- | ---------------------------------------- | | `DATABEAK_MAX_FILE_SIZE_MB` | 1024 | Maximum file size in MB | @@ -175,6 +156,36 @@ Configure DataBeak behavior with these environment variables: | `DATABEAK_MAX_VALIDATION_VIOLATIONS` | 1000 | Max validation violations to report | | `DATABEAK_MAX_ANOMALY_SAMPLE_SIZE` | 10000 | Max sample size for anomaly detection | +### HTTP Mode Configuration + +For HTTP transport mode (`--transport http`), additional configuration options +are available: + +#### OIDC Authentication (HTTP Mode Only) + +OpenID Connect authentication for secure HTTP deployments. All four variables +must be set to enable OIDC: + +| Variable | Required | Description | +| ----------------------------- | -------- | -------------------------------- | +| `DATABEAK_OIDC_CONFIG_URL` | Yes | OIDC discovery configuration URL | +| `DATABEAK_OIDC_CLIENT_ID` | Yes | OAuth2 client ID | +| `DATABEAK_OIDC_CLIENT_SECRET` | Yes | OAuth2 client secret | +| `DATABEAK_OIDC_BASE_URL` | Yes | Application base URL for OAuth2 | + +**Example HTTP deployment with OIDC:** + +```bash +export DATABEAK_OIDC_CONFIG_URL="https://auth.example.com/.well-known/openid-configuration" +export DATABEAK_OIDC_CLIENT_ID="databeak-client" +export DATABEAK_OIDC_CLIENT_SECRET="your-secret" +export DATABEAK_OIDC_BASE_URL="https://databeak.example.com" +uvx databeak --transport http --host 0.0.0.0 --port 8000 +``` + +**Note**: OIDC authentication is only applicable for HTTP transport mode. Stdio +mode (default for MCP clients) does not use OIDC authentication. + ## Verification ### Test the Installation @@ -194,8 +205,7 @@ DATABEAK_LOG_LEVEL=DEBUG uv run databeak npm install -g @modelcontextprotocol/inspector # Test the server -mcp-inspector uvx --from \ - git+https://github.com/jonpspri/databeak.git databeak +mcp-inspector uvx databeak ``` ### Verify in Your AI Client @@ -210,9 +220,8 @@ mcp-inspector uvx --from \ #### Server not starting -- Check Python version: `python --version` (must be 3.10+) -- Verify installation: - `uvx --from \ git+https://github.com/jonpspri/databeak.git databeak --version` +- Check Python version: `python --version` (must be 3.12+) +- Verify installation: `uvx databeak --version` - Check logs with debug level #### Client can't connect diff --git a/src/databeak/server.py b/src/databeak/server.py index 40aef3f..cf06a8b 100644 --- a/src/databeak/server.py +++ b/src/databeak/server.py @@ -8,6 +8,8 @@ from pathlib import Path from fastmcp import FastMCP +from fastmcp.server.auth.oidc_proxy import OIDCProxy +from pydantic_settings import BaseSettings, SettingsConfigDict from smithery.decorators import smithery from databeak._version import __version__ @@ -31,6 +33,45 @@ logger = logging.getLogger(__name__) +class OidcSettings(BaseSettings): + """Settings for OIDC authentication.""" + + model_config = SettingsConfigDict(env_prefix="DATABEAK_OIDC_") + + config_url: str | None = None + client_id: str | None = None + client_secret: str | None = None + base_url: str | None = None + + +oidc_settings = OidcSettings() +auth: OIDCProxy | None = None + +if ( + oidc_settings.config_url + and oidc_settings.client_id + and oidc_settings.client_secret + and oidc_settings.base_url +): + auth = OIDCProxy( + config_url=oidc_settings.config_url, + client_id=oidc_settings.client_id, + client_secret=oidc_settings.client_secret, + base_url=oidc_settings.base_url, + ) +elif any( + [ + oidc_settings.config_url, + oidc_settings.client_id, + oidc_settings.client_secret, + oidc_settings.base_url, + ] +): + logger.error( + "Incomplete OIDC configuration. All of config_url, client_id, client_secret, and base_url must be set." + ) + + # ============================================================================ # HELPER FUNCTIONS # ============================================================================ @@ -97,7 +138,7 @@ def data_cleaning_prompt(session_id: str) -> str: def create_server() -> FastMCP: """Create and return the FastMCP server instance.""" # Initialize FastMCP server - mcp = FastMCP("DataBeak", instructions=_load_instructions(), version=__version__) + mcp = FastMCP("DataBeak", auth=auth, instructions=_load_instructions(), version=__version__) # Mount specialized servers mcp.mount(system_server) mcp.mount(io_server) diff --git a/tests/unit/servers/test_discovery_server_coverage.py b/tests/unit/servers/test_discovery_server_coverage.py index dbeed7f..1314e36 100644 --- a/tests/unit/servers/test_discovery_server_coverage.py +++ b/tests/unit/servers/test_discovery_server_coverage.py @@ -1,15 +1,12 @@ """Comprehensive unit tests for discovery_server module to improve coverage.""" import uuid -from unittest.mock import Mock, patch import numpy as np import pandas as pd import pytest from fastmcp import Context -from fastmcp.exceptions import ToolError -import databeak from databeak.core.session import get_session_manager from databeak.exceptions import ColumnNotFoundError, InvalidParameterError from databeak.servers.discovery_server import ( @@ -621,23 +618,26 @@ async def test_inspect_around_with_context(self, outliers_ctx: Context) -> None: class TestErrorHandling: """Test error handling across all functions.""" - @pytest.mark.skip(reason="TODO: Update error message expectations") async def test_no_data_loaded(self) -> None: """Test all functions when no data is loaded.""" - with patch.object(databeak, "session_manager") as manager: - session = Mock() - session.has_data.return_value = False - manager.return_value.get_session.return_value = session + # Create a real session without data + from databeak.exceptions import NoDataLoadedError - ctx = create_mock_context("no-data") + no_data_session_id = str(uuid.uuid4()) + manager = get_session_manager() + # Create session but don't load data + _session = manager.get_or_create_session(no_data_session_id) + + ctx = create_mock_context(no_data_session_id) - with pytest.raises(ToolError): - await detect_outliers(ctx) + # Both functions should raise NoDataLoadedError + with pytest.raises(NoDataLoadedError): + await detect_outliers(ctx) - ctx = create_mock_context("no-data") + ctx = create_mock_context(no_data_session_id) - with pytest.raises(ToolError): - await profile_data(ctx) + with pytest.raises(NoDataLoadedError): + await profile_data(ctx) async def test_edge_cases(self, outliers_ctx: Context) -> None: """Test various edge cases.""" From a7f0f490f9c6092bdc772670e758ed6be66ba55f Mon Sep 17 00:00:00 2001 From: Jonathan Springer Date: Mon, 24 Nov 2025 07:09:31 +0000 Subject: [PATCH 3/3] WIP --- .gitignore | 1 + src/databeak/__init__.py | 6 ++++++ src/databeak/core/settings.py | 7 +++++-- src/databeak/server.py | 35 +++++++++++++++++------------------ 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/.gitignore b/.gitignore index 9275ff5..9c9b2c7 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ ipython_config.py # Environments .env +.envrc .venv env/ venv/ diff --git a/src/databeak/__init__.py b/src/databeak/__init__.py index 548deff..f0f67c2 100644 --- a/src/databeak/__init__.py +++ b/src/databeak/__init__.py @@ -2,7 +2,13 @@ __author__ = "Jonathan Springer" +import logging + from ._version import __version__ +from .core.settings import get_settings from .server import main +logging.getLogger("databeak").setLevel(get_settings().log_level) +logging.getLogger("mcp").setLevel(get_settings().log_level) + __all__ = ["__version__", "main"] diff --git a/src/databeak/core/settings.py b/src/databeak/core/settings.py index 8afa30c..38e302f 100644 --- a/src/databeak/core/settings.py +++ b/src/databeak/core/settings.py @@ -5,7 +5,7 @@ import threading from pydantic import Field -from pydantic_settings import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict class DatabeakSettings(BaseSettings): @@ -32,6 +32,9 @@ class DatabeakSettings(BaseSettings): - Various thresholds for quality checks and anomaly detection """ + # Logging + log_level: str = Field(default="INFO", description="Logging level") + # Session management session_timeout: int = Field(default=3600, description="Session timeout in seconds") session_capacity_warning_threshold: float = Field( @@ -110,7 +113,7 @@ class DatabeakSettings(BaseSettings): default=100, description="Multiplier for converting ratios to percentages" ) - model_config = {"env_prefix": "DATABEAK_", "case_sensitive": False} + model_config = SettingsConfigDict(env_prefix="DATABEAK_") _settings: DatabeakSettings | None = None diff --git a/src/databeak/server.py b/src/databeak/server.py index cf06a8b..a889112 100644 --- a/src/databeak/server.py +++ b/src/databeak/server.py @@ -96,12 +96,14 @@ def _load_instructions() -> str: # All tools have been migrated to specialized servers # No direct tool registration needed - using server composition pattern +mcp = FastMCP("DataBeak", auth=auth, instructions=_load_instructions(), version=__version__) # ============================================================================ # PROMPTS # ============================================================================ +@mcp.prompt def analyze_csv_prompt(session_id: str, analysis_type: str = "summary") -> str: """Generate a prompt to analyze CSV data.""" return f"""Please analyze the CSV data in session {session_id}. @@ -116,6 +118,7 @@ def analyze_csv_prompt(session_id: str, analysis_type: str = "summary") -> str: """ +@mcp.prompt def data_cleaning_prompt(session_id: str) -> str: """Generate a prompt for data cleaning suggestions.""" return f"""Review the data in session {session_id} and suggest cleaning operations. @@ -133,25 +136,21 @@ def data_cleaning_prompt(session_id: str) -> str: # MAIN ENTRY POINT # ============================================================================ +# Mount specialized servers +mcp.mount(system_server) +mcp.mount(io_server) +mcp.mount(row_operations_server) +mcp.mount(statistics_server) +mcp.mount(discovery_server) +mcp.mount(validation_server) +mcp.mount(transformation_server) +mcp.mount(column_server) +mcp.mount(column_text_server) + @smithery.server() -def create_server() -> FastMCP: - """Create and return the FastMCP server instance.""" - # Initialize FastMCP server - mcp = FastMCP("DataBeak", auth=auth, instructions=_load_instructions(), version=__version__) - # Mount specialized servers - mcp.mount(system_server) - mcp.mount(io_server) - mcp.mount(row_operations_server) - mcp.mount(statistics_server) - mcp.mount(discovery_server) - mcp.mount(validation_server) - mcp.mount(transformation_server) - mcp.mount(column_server) - mcp.mount(column_text_server) - - mcp.prompt()(analyze_csv_prompt) - mcp.prompt()(data_cleaning_prompt) +def create_smithery_server() -> FastMCP: + """Create and return the Smithery FastMCP server instance.""" return mcp @@ -199,7 +198,7 @@ def main() -> None: run_args["port"] = args.port # Run the server - create_server().run(**run_args) + mcp.run(**run_args) # type: ignore[arg-type] if __name__ == "__main__":