Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,20 @@ services:
command: './querybook/scripts/runservice scheduler --pidfile="/opt/celerybeat.pid"'
volumes: *querybook-volumes
depends_on: *querybook-depends-on
mcp:
container_name: querybook_mcp
image: *querybook-image
tty: true
stdin_open: true
command: "./querybook/scripts/runservice mcp"
ports:
- "${MCP_PORT:-8771}:${MCP_PORT:-8771}"
expose:
- "${MCP_PORT:-8771}"
environment:
MCP_PORT: "${MCP_PORT:-8771}"
volumes: *querybook-volumes
depends_on: *querybook-depends-on
redis:
container_name: querybook_redis
image: redis:5.0.9
Expand Down
3 changes: 3 additions & 0 deletions querybook/config/querybook_default_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ GITHUB_REPO_NAME: ~
GITHUB_BRANCH: 'main'
GITHUB_CRYPTO_SECRET: ''

# --------------- MCP Server ---------------
MCP_PORT: 8771

# --------------- Cache Control ---------------
# Cache control settings for HTTP responses on static assets
# Maximum age in seconds for static assets (CSS, JS, images, fonts)
Expand Down
63 changes: 63 additions & 0 deletions querybook/migrations/versions/320ccafaa979_add_mcp_event_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""add MCP event type

Revision ID: 320ccafaa979
Revises: a1b2c3d4e5f6
Create Date: 2026-03-04 14:00:00.000000

"""

from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = '320ccafaa979'
down_revision = 'a1b2c3d4e5f6'
branch_labels = None
depends_on = None


# Define the old and new EventType enum types
old_event_type_enum = sa.Enum("API", "WEBSOCKET", "VIEW", "CLICK", name="eventtype")
new_event_type_enum = sa.Enum("API", "WEBSOCKET", "VIEW", "CLICK", "MCP", name="eventtype")


def upgrade():
"""Add MCP to EventType enum"""
conn = op.get_bind()
dialect = conn.dialect.name

if dialect == "postgresql":
# PostgreSQL: Add new enum value to existing 'eventtype' enum
op.execute("ALTER TYPE eventtype ADD VALUE 'MCP'")
else:
# Other Databases (e.g., MySQL, SQLite): Alter 'event_log.event_type' column to use the new enum
op.alter_column(
"event_log",
"event_type",
existing_type=old_event_type_enum,
type_=new_event_type_enum,
)


def downgrade():
"""Remove MCP from EventType enum"""
bind = op.get_bind()
dialect = bind.dialect.name

if dialect == "postgresql":
# PostgreSQL: does not support removing enum values directly
# We need to create a new enum without 'MCP', rename the old one, and update the column
op.execute("ALTER TYPE eventtype RENAME TO eventtype_old")
old_event_type_enum.create(bind, checkfirst=True)
op.execute(
"ALTER TABLE event_log ALTER COLUMN event_type TYPE eventtype USING event_type::text::eventtype"
)
op.execute("DROP TYPE eventtype_old")
else:
# Other Databases (e.g., MySQL, SQLite): Revert 'event_log.event_type' column to the old enum
op.alter_column(
"event_log",
"event_type",
existing_type=new_event_type_enum,
type_=old_event_type_enum,
)
6 changes: 6 additions & 0 deletions querybook/scripts/runservice
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ case "$SERVICE_NAME" in
"flower")
COMMAND="celery -A tasks.all_tasks flower"
;;
"mcp")
COMMAND="watchmedo auto-restart -d querybook -p '*.py' -R -- python3 querybook/server/run_mcp.py"
;;
"prod_mcp")
COMMAND="python3 querybook/server/run_mcp.py"
;;
esac

if [ -z "$COMMAND" ]; then
Expand Down
2 changes: 2 additions & 0 deletions querybook/server/const/event_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class EventType(Enum):
VIEW = "VIEW"
# a UI element gets clicked
CLICK = "CLICK"
# MCP (Model Context Protocol) operation
MCP = "MCP"


class FrontendEvent(TypedDict):
Expand Down
3 changes: 3 additions & 0 deletions querybook/server/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,9 @@ class QuerybookSettings(object):
GITHUB_BRANCH = get_env_config("GITHUB_BRANCH")
GITHUB_CRYPTO_SECRET = get_env_config("GITHUB_CRYPTO_SECRET")

# MCP Server
MCP_PORT = int(get_env_config("MCP_PORT") or 8771)

# Cache Control
CACHE_CONTROL_MAX_AGE = int(
get_env_config("CACHE_CONTROL_MAX_AGE") or "604800"
Expand Down
29 changes: 29 additions & 0 deletions querybook/server/lib/mcp/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import hashlib

from fastmcp.server.auth import AccessToken, TokenVerifier

from app.db import DBSession
from models.admin import APIAccessToken


class QuerybookTokenVerifier(TokenVerifier):
"""Validate Querybook API keys (SHA-512 hashed) against the database."""

async def verify_token(self, token: str) -> AccessToken | None:
token_hash = hashlib.sha512(token.encode("utf-8")).hexdigest()
with DBSession() as session:
api_token = (
session.query(APIAccessToken)
.filter(APIAccessToken.token == token_hash)
.filter(APIAccessToken.enabled.is_(True))
.first()
)
if api_token is None:
return None
return AccessToken(
token=token,
client_id=str(api_token.creator_uid),
scopes=[],
expires_at=None,
claims={"creator_uid": api_token.creator_uid},
)
204 changes: 204 additions & 0 deletions querybook/server/lib/mcp/lib/comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""Comment utility functions for MCP tools and resources."""

from collections import defaultdict

from logic.comment import get_comment_by_id, get_comments_by_data_cell_id
from logic.datadoc import get_data_cell_by_id
from logic.datadoc_permission import user_can_read, DocDoesNotExist
from models.comment import Comment, CommentReaction, DataCellComment
from models.datadoc import DataDocDataCell


def serialize_reaction(reaction) -> dict:
"""Serialize a CommentReaction model to dict."""
return {
"id": reaction.id,
"reaction": reaction.reaction,
"created_by": reaction.created_by,
"created_by_resource_uri": f"querybook://user/{reaction.created_by}",
}


def serialize_comment(comment, reactions: list = None) -> dict:
"""Serialize a Comment model to dict.

Args:
comment: Comment model object
reactions: Optional list of CommentReaction objects for this comment
"""
return {
"id": comment.id,
"text": "" if comment.archived else comment.text,
"created_by": comment.created_by,
"created_by_resource_uri": f"querybook://user/{comment.created_by}",
"created_at": comment.created_at.isoformat() if comment.created_at else None,
"updated_at": comment.updated_at.isoformat() if comment.updated_at else None,
"archived": comment.archived,
"parent_comment_id": comment.parent_comment_id,
"reactions": [serialize_reaction(r) for r in reactions] if reactions else [],
"resource_uri": f"querybook://comment/{comment.id}",
}


def serialize_comments(
comments: list[Comment],
session,
include_threads: bool = True,
) -> list[dict]:
"""Serialize comments with optional thread replies and reactions.

Args:
comments: List of Comment objects to serialize
session: Database session
include_threads: If True, include thread replies for each comment

Returns:
List of serialized comment dicts with reactions and optional thread replies
"""
if not comments:
return []

# Collect all comment IDs (top-level)
all_comment_ids = [c.id for c in comments]
replies_by_parent = defaultdict(list)

if include_threads:
# Fetch thread replies for all comments
comment_ids = [c.id for c in comments]
thread_replies = (
session.query(Comment)
.filter(Comment.parent_comment_id.in_(comment_ids))
.order_by(Comment.created_at)
.all()
)

# Group thread replies by parent_comment_id
for reply in thread_replies:
replies_by_parent[reply.parent_comment_id].append(reply)
all_comment_ids.append(reply.id)

# Batch fetch all reactions for all comments (both top-level and replies)
reactions_by_comment = defaultdict(list)
if all_comment_ids:
reactions = (
session.query(CommentReaction)
.filter(CommentReaction.comment_id.in_(all_comment_ids))
.all()
)
for reaction in reactions:
reactions_by_comment[reaction.comment_id].append(reaction)

# Serialize all comments
result = []
for comment in comments:
comment_dict = serialize_comment(
comment, reactions_by_comment.get(comment.id, [])
)

# Add thread replies if they exist
if include_threads and comment.id in replies_by_parent:
comment_dict["replies"] = [
serialize_comment(reply, reactions_by_comment.get(reply.id, []))
for reply in replies_by_parent[comment.id]
]

result.append(comment_dict)

return result


def get_comment_data(comment_id: int, uid: int, session) -> dict:
"""Get comment data with thread replies and reactions.

Args:
comment_id: Comment ID
uid: User ID for permission checking
session: Database session

Returns:
Serialized comment dict with replies and reactions

Raises:
ValueError: If comment not found or user lacks permission
"""
comment = get_comment_by_id(comment_id, session=session)
if not comment:
raise ValueError(f"Comment {comment_id} not found.")

# Walk up to the root comment if this is a thread reply
root_comment = comment
if root_comment.parent_comment_id:
root_comment = get_comment_by_id(
root_comment.parent_comment_id, session=session
)
if not root_comment:
raise ValueError("Parent comment not found.")

# Find the DataDoc cell this comment belongs to
doc_cell_comment = (
session.query(DataCellComment)
.filter(DataCellComment.comment_id == root_comment.id)
.first()
)
if doc_cell_comment:
doc_cell = (
session.query(DataDocDataCell)
.filter(DataDocDataCell.data_cell_id == doc_cell_comment.data_cell_id)
.first()
)
if doc_cell:
try:
if not user_can_read(doc_cell.data_doc_id, uid, session=session):
raise ValueError(
"You do not have access to this comment's DataDoc."
)
except DocDoesNotExist:
raise ValueError("The DataDoc for this comment was not found.")

# Serialize with threads and reactions
result = serialize_comments([comment], session, include_threads=True)
return result[0] if result else {}


def get_datadoc_cell_comments_data(
cell_id: int, uid: int, include_threads: bool, session
) -> list[dict]:
"""Get comments for a DataDoc cell with permission checking.

Args:
cell_id: DataDoc cell ID
uid: User ID for permission checking
include_threads: Include thread replies for each comment
session: Database session

Returns:
List of serialized comment dicts

Raises:
ValueError: If cell not found or user lacks permission
"""
# Permission check
cell = get_data_cell_by_id(cell_id, session=session)
if not cell:
raise ValueError(f"DataDoc cell {cell_id} not found.")

doc_cell = (
session.query(DataDocDataCell)
.filter(DataDocDataCell.data_cell_id == cell_id)
.first()
)
if not doc_cell:
raise ValueError(f"DataDoc cell {cell_id} is not associated with a DataDoc.")
datadoc_id = doc_cell.data_doc_id

try:
if not user_can_read(datadoc_id, uid, session=session):
raise ValueError("You do not have access to this DataDoc.")
except DocDoesNotExist:
raise ValueError(f"DataDoc {datadoc_id} not found.")

# Get comments
comments = get_comments_by_data_cell_id(cell_id, session=session)

# Serialize with threads and reactions
return serialize_comments(comments, session, include_threads=include_threads)
Loading
Loading