pinterest · baumandm · Apr 15, 2026
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -54,6 +54,20 @@ services:
         command: './querybook/scripts/runservice scheduler --pidfile="/opt/celerybeat.pid"'
         volumes: *querybook-volumes
         depends_on: *querybook-depends-on
+    mcp:
+        container_name: querybook_mcp
+        image: *querybook-image
+        tty: true
+        stdin_open: true
+        command: "./querybook/scripts/runservice mcp"
+        ports:
+            - "${MCP_PORT:-8771}:${MCP_PORT:-8771}"
+        expose:
+            - "${MCP_PORT:-8771}"
+        environment:
+            MCP_PORT: "${MCP_PORT:-8771}"
+        volumes: *querybook-volumes
+        depends_on: *querybook-depends-on
     redis:
         container_name: querybook_redis
         image: redis:5.0.9

diff --git a/querybook/config/querybook_default_config.yaml b/querybook/config/querybook_default_config.yaml
@@ -114,6 +114,9 @@ GITHUB_REPO_NAME: ~
 GITHUB_BRANCH: 'main'
 GITHUB_CRYPTO_SECRET: ''
 
+# --------------- MCP Server ---------------
+MCP_PORT: 8771
+
 # --------------- Cache Control ---------------
 # Cache control settings for HTTP responses on static assets
 # Maximum age in seconds for static assets (CSS, JS, images, fonts)

diff --git a/querybook/migrations/versions/320ccafaa979_add_mcp_event_type.py b/querybook/migrations/versions/320ccafaa979_add_mcp_event_type.py
@@ -0,0 +1,63 @@
+"""add MCP event type
+
+Revision ID: 320ccafaa979
+Revises: a1b2c3d4e5f6
+Create Date: 2026-03-04 14:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+# revision identifiers, used by Alembic.
+revision = '320ccafaa979'
+down_revision = 'a1b2c3d4e5f6'
+branch_labels = None
+depends_on = None
+
+
+# Define the old and new EventType enum types
+old_event_type_enum = sa.Enum("API", "WEBSOCKET", "VIEW", "CLICK", name="eventtype")
+new_event_type_enum = sa.Enum("API", "WEBSOCKET", "VIEW", "CLICK", "MCP", name="eventtype")
+
+
+def upgrade():
+    """Add MCP to EventType enum"""
+    conn = op.get_bind()
+    dialect = conn.dialect.name
+
+    if dialect == "postgresql":
+        # PostgreSQL: Add new enum value to existing 'eventtype' enum
+        op.execute("ALTER TYPE eventtype ADD VALUE 'MCP'")
+    else:
+        # Other Databases (e.g., MySQL, SQLite): Alter 'event_log.event_type' column to use the new enum
+        op.alter_column(
+            "event_log",
+            "event_type",
+            existing_type=old_event_type_enum,
+            type_=new_event_type_enum,
+        )
+
+
+def downgrade():
+    """Remove MCP from EventType enum"""
+    bind = op.get_bind()
+    dialect = bind.dialect.name
+
+    if dialect == "postgresql":
+        # PostgreSQL: does not support removing enum values directly
+        # We need to create a new enum without 'MCP', rename the old one, and update the column
+        op.execute("ALTER TYPE eventtype RENAME TO eventtype_old")
+        old_event_type_enum.create(bind, checkfirst=True)
+        op.execute(
+            "ALTER TABLE event_log ALTER COLUMN event_type TYPE eventtype USING event_type::text::eventtype"
+        )
+        op.execute("DROP TYPE eventtype_old")
+    else:
+        # Other Databases (e.g., MySQL, SQLite): Revert 'event_log.event_type' column to the old enum
+        op.alter_column(
+            "event_log",
+            "event_type",
+            existing_type=new_event_type_enum,
+            type_=old_event_type_enum,
+        )
diff --git a/querybook/scripts/runservice b/querybook/scripts/runservice
@@ -41,6 +41,12 @@ case "$SERVICE_NAME" in
 "flower")
     COMMAND="celery -A tasks.all_tasks flower"
     ;;
+"mcp")
+    COMMAND="watchmedo auto-restart -d querybook -p '*.py' -R -- python3 querybook/server/run_mcp.py"
+    ;;
+"prod_mcp")
+    COMMAND="python3 querybook/server/run_mcp.py"
+    ;;
 esac
 
 if [ -z "$COMMAND" ]; then

diff --git a/querybook/server/const/event_log.py b/querybook/server/const/event_log.py
@@ -11,6 +11,8 @@ class EventType(Enum):
     VIEW = "VIEW"
     # a UI element gets clicked
     CLICK = "CLICK"
+    # MCP (Model Context Protocol) operation
+    MCP = "MCP"
 
 
 class FrontendEvent(TypedDict):

diff --git a/querybook/server/env.py b/querybook/server/env.py
@@ -170,6 +170,9 @@ class QuerybookSettings(object):
     GITHUB_BRANCH = get_env_config("GITHUB_BRANCH")
     GITHUB_CRYPTO_SECRET = get_env_config("GITHUB_CRYPTO_SECRET")
 
+    # MCP Server
+    MCP_PORT = int(get_env_config("MCP_PORT") or 8771)
+
     # Cache Control
     CACHE_CONTROL_MAX_AGE = int(
         get_env_config("CACHE_CONTROL_MAX_AGE") or "604800"

diff --git a/querybook/server/lib/mcp/auth.py b/querybook/server/lib/mcp/auth.py
@@ -0,0 +1,29 @@
+import hashlib
+
+from fastmcp.server.auth import AccessToken, TokenVerifier
+
+from app.db import DBSession
+from models.admin import APIAccessToken
+
+
+class QuerybookTokenVerifier(TokenVerifier):
+    """Validate Querybook API keys (SHA-512 hashed) against the database."""
+
+    async def verify_token(self, token: str) -> AccessToken | None:
+        token_hash = hashlib.sha512(token.encode("utf-8")).hexdigest()
+        with DBSession() as session:
+            api_token = (
+                session.query(APIAccessToken)
+                .filter(APIAccessToken.token == token_hash)
+                .filter(APIAccessToken.enabled.is_(True))
+                .first()
+            )
+            if api_token is None:
+                return None
+            return AccessToken(
+                token=token,
+                client_id=str(api_token.creator_uid),
+                scopes=[],
+                expires_at=None,
+                claims={"creator_uid": api_token.creator_uid},
+            )
diff --git a/querybook/server/lib/mcp/lib/comments.py b/querybook/server/lib/mcp/lib/comments.py
@@ -0,0 +1,204 @@
+"""Comment utility functions for MCP tools and resources."""
+
+from collections import defaultdict
+
+from logic.comment import get_comment_by_id, get_comments_by_data_cell_id
+from logic.datadoc import get_data_cell_by_id
+from logic.datadoc_permission import user_can_read, DocDoesNotExist
+from models.comment import Comment, CommentReaction, DataCellComment
+from models.datadoc import DataDocDataCell
+
+
+def serialize_reaction(reaction) -> dict:
+    """Serialize a CommentReaction model to dict."""
+    return {
+        "id": reaction.id,
+        "reaction": reaction.reaction,
+        "created_by": reaction.created_by,
+        "created_by_resource_uri": f"querybook://user/{reaction.created_by}",
+    }
+
+
+def serialize_comment(comment, reactions: list = None) -> dict:
+    """Serialize a Comment model to dict.
+
+    Args:
+        comment: Comment model object
+        reactions: Optional list of CommentReaction objects for this comment
+    """
+    return {
+        "id": comment.id,
+        "text": "" if comment.archived else comment.text,
+        "created_by": comment.created_by,
+        "created_by_resource_uri": f"querybook://user/{comment.created_by}",
+        "created_at": comment.created_at.isoformat() if comment.created_at else None,
+        "updated_at": comment.updated_at.isoformat() if comment.updated_at else None,
+        "archived": comment.archived,
+        "parent_comment_id": comment.parent_comment_id,
+        "reactions": [serialize_reaction(r) for r in reactions] if reactions else [],
+        "resource_uri": f"querybook://comment/{comment.id}",
+    }
+
+
+def serialize_comments(
+    comments: list[Comment],
+    session,
+    include_threads: bool = True,
+) -> list[dict]:
+    """Serialize comments with optional thread replies and reactions.
+
+    Args:
+        comments: List of Comment objects to serialize
+        session: Database session
+        include_threads: If True, include thread replies for each comment
+
+    Returns:
+        List of serialized comment dicts with reactions and optional thread replies
+    """
+    if not comments:
+        return []
+
+    # Collect all comment IDs (top-level)
+    all_comment_ids = [c.id for c in comments]
+    replies_by_parent = defaultdict(list)
+
+    if include_threads:
+        # Fetch thread replies for all comments
+        comment_ids = [c.id for c in comments]
+        thread_replies = (
+            session.query(Comment)
+            .filter(Comment.parent_comment_id.in_(comment_ids))
+            .order_by(Comment.created_at)
+            .all()
+        )
+
+        # Group thread replies by parent_comment_id
+        for reply in thread_replies:
+            replies_by_parent[reply.parent_comment_id].append(reply)
+            all_comment_ids.append(reply.id)
+
+    # Batch fetch all reactions for all comments (both top-level and replies)
+    reactions_by_comment = defaultdict(list)
+    if all_comment_ids:
+        reactions = (
+            session.query(CommentReaction)
+            .filter(CommentReaction.comment_id.in_(all_comment_ids))
+            .all()
+        )
+        for reaction in reactions:
+            reactions_by_comment[reaction.comment_id].append(reaction)
+
+    # Serialize all comments
+    result = []
+    for comment in comments:
+        comment_dict = serialize_comment(
+            comment, reactions_by_comment.get(comment.id, [])
+        )
+
+        # Add thread replies if they exist
+        if include_threads and comment.id in replies_by_parent:
+            comment_dict["replies"] = [
+                serialize_comment(reply, reactions_by_comment.get(reply.id, []))
+                for reply in replies_by_parent[comment.id]
+            ]
+
+        result.append(comment_dict)
+
+    return result
+
+
+def get_comment_data(comment_id: int, uid: int, session) -> dict:
+    """Get comment data with thread replies and reactions.
+
+    Args:
+        comment_id: Comment ID
+        uid: User ID for permission checking
+        session: Database session
+
+    Returns:
+        Serialized comment dict with replies and reactions
+
+    Raises:
+        ValueError: If comment not found or user lacks permission
+    """
+    comment = get_comment_by_id(comment_id, session=session)
+    if not comment:
+        raise ValueError(f"Comment {comment_id} not found.")
+
+    # Walk up to the root comment if this is a thread reply
+    root_comment = comment
+    if root_comment.parent_comment_id:
+        root_comment = get_comment_by_id(
+            root_comment.parent_comment_id, session=session
+        )
+        if not root_comment:
+            raise ValueError("Parent comment not found.")
+
+    # Find the DataDoc cell this comment belongs to
+    doc_cell_comment = (
+        session.query(DataCellComment)
+        .filter(DataCellComment.comment_id == root_comment.id)
+        .first()
+    )
+    if doc_cell_comment:
+        doc_cell = (
+            session.query(DataDocDataCell)
+            .filter(DataDocDataCell.data_cell_id == doc_cell_comment.data_cell_id)
+            .first()
+        )
+        if doc_cell:
+            try:
+                if not user_can_read(doc_cell.data_doc_id, uid, session=session):
+                    raise ValueError(
+                        "You do not have access to this comment's DataDoc."
+                    )
+            except DocDoesNotExist:
+                raise ValueError("The DataDoc for this comment was not found.")
+
+    # Serialize with threads and reactions
+    result = serialize_comments([comment], session, include_threads=True)
+    return result[0] if result else {}
+
+
+def get_datadoc_cell_comments_data(
+    cell_id: int, uid: int, include_threads: bool, session
+) -> list[dict]:
+    """Get comments for a DataDoc cell with permission checking.
+
+    Args:
+        cell_id: DataDoc cell ID
+        uid: User ID for permission checking
+        include_threads: Include thread replies for each comment
+        session: Database session
+
+    Returns:
+        List of serialized comment dicts
+
+    Raises:
+        ValueError: If cell not found or user lacks permission
+    """
+    # Permission check
+    cell = get_data_cell_by_id(cell_id, session=session)
+    if not cell:
+        raise ValueError(f"DataDoc cell {cell_id} not found.")
+
+    doc_cell = (
+        session.query(DataDocDataCell)
+        .filter(DataDocDataCell.data_cell_id == cell_id)
+        .first()
+    )
+    if not doc_cell:
+        raise ValueError(f"DataDoc cell {cell_id} is not associated with a DataDoc.")
+    datadoc_id = doc_cell.data_doc_id
+
+    try:
+        if not user_can_read(datadoc_id, uid, session=session):
+            raise ValueError("You do not have access to this DataDoc.")
+    except DocDoesNotExist:
+        raise ValueError(f"DataDoc {datadoc_id} not found.")
+
+    # Get comments
+    comments = get_comments_by_data_cell_id(cell_id, session=session)
+
+    # Serialize with threads and reactions
+    return serialize_comments(comments, session, include_threads=include_threads)