-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Fix RCE vulnerability in Pixeltable MCP tools (eval/exec injection) #231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
21b6e67
c51cf96
875c7dc
72a21c5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,135 @@ | ||
| import ast | ||
| import operator | ||
| import re | ||
|
|
||
| import pixeltable as pxt | ||
| from mcp.server.fastmcp import FastMCP | ||
|
|
||
| mcp = FastMCP("Pixeltable") | ||
|
|
||
|
|
||
| # Safe expression evaluator to avoid eval()/exec() with untrusted input. | ||
| # Only allows attribute access on the table object, basic operators, and literals. | ||
|
|
||
| _SAFE_OPERATORS = { | ||
| ast.Add: operator.add, | ||
| ast.Sub: operator.sub, | ||
| ast.Mult: operator.mul, | ||
| ast.Div: operator.truediv, | ||
| ast.FloorDiv: operator.floordiv, | ||
| ast.Mod: operator.mod, | ||
| ast.Eq: operator.eq, | ||
| ast.NotEq: operator.ne, | ||
| ast.Lt: operator.lt, | ||
| ast.LtE: operator.le, | ||
| ast.Gt: operator.gt, | ||
| ast.GtE: operator.ge, | ||
| ast.USub: operator.neg, | ||
| ast.Not: operator.not_, | ||
| } | ||
|
|
||
| # Validate identifiers to prevent injection through crafted attribute names | ||
| _IDENTIFIER_RE = re.compile(r"^[A-Za-z][A-Za-z0-9_]*$") | ||
|
|
||
|
|
||
| def _safe_eval(expression: str, table): | ||
| """Evaluate a simple expression safely against a Pixeltable table. | ||
|
|
||
| Supports: table.column references, arithmetic/comparison operators, | ||
| string/number/boolean literals, and parenthesized sub-expressions. | ||
|
|
||
| Raises ValueError for any unsupported or potentially dangerous construct. | ||
| """ | ||
| if len(expression) > 2000: | ||
| raise ValueError("Expression too long (max 2000 characters).") | ||
| try: | ||
| tree = ast.parse(expression, mode="eval") | ||
| except SyntaxError as e: | ||
| raise ValueError(f"Invalid expression syntax: {e}") from e | ||
|
|
||
| return _eval_node(tree.body, {"table": table}) | ||
|
|
||
|
|
||
| def _eval_node(node, namespace): | ||
| """Recursively evaluate an AST node using only safe operations.""" | ||
|
|
||
| # Literals: numbers, strings, booleans, None | ||
| if isinstance(node, ast.Constant): | ||
| if not isinstance(node.value, (int, float, str, bool, type(None))): | ||
| raise ValueError( | ||
| f"Unsupported literal type: {type(node.value).__name__}" | ||
| ) | ||
| return node.value | ||
|
|
||
| # Variable lookup (only 'table' allowed; True/False/None are ast.Constant on 3.8+) | ||
| if isinstance(node, ast.Name): | ||
| if node.id == "table": | ||
| return namespace["table"] | ||
| raise ValueError( | ||
| f"Unsupported variable '{node.id}'. Only 'table' references are allowed." | ||
| ) | ||
|
|
||
| # Attribute access: table.column_name | ||
| if isinstance(node, ast.Attribute): | ||
| value = _eval_node(node.value, namespace) | ||
| attr = node.attr | ||
| if not _IDENTIFIER_RE.match(attr): | ||
| raise ValueError(f"Invalid attribute name: '{attr}'") | ||
| if attr.startswith("_"): | ||
| raise ValueError(f"Access to private attribute '{attr}' is not allowed.") | ||
| if not hasattr(value, attr): | ||
| raise ValueError(f"Attribute '{attr}' not found.") | ||
| return getattr(value, attr) | ||
|
|
||
| # Binary operations: +, -, *, /, //, %, ==, !=, <, <=, >, >= | ||
| if isinstance(node, ast.BinOp): | ||
| op_func = _SAFE_OPERATORS.get(type(node.op)) | ||
| if op_func is None: | ||
| raise ValueError(f"Unsupported operator: {type(node.op).__name__}") | ||
| left = _eval_node(node.left, namespace) | ||
| right = _eval_node(node.right, namespace) | ||
| return op_func(left, right) | ||
|
|
||
| # Comparison operations (handles chained comparisons like a < b < c correctly) | ||
| if isinstance(node, ast.Compare): | ||
| left = _eval_node(node.left, namespace) | ||
| result = None | ||
| for op, comparator in zip(node.ops, node.comparators): | ||
| op_func = _SAFE_OPERATORS.get(type(op)) | ||
| if op_func is None: | ||
| raise ValueError(f"Unsupported comparison: {type(op).__name__}") | ||
| right = _eval_node(comparator, namespace) | ||
| cmp = op_func(left, right) | ||
| result = cmp if result is None else (result & cmp) | ||
| left = right | ||
| return result | ||
|
|
||
| # Unary operations: -, not | ||
| if isinstance(node, ast.UnaryOp): | ||
| op_func = _SAFE_OPERATORS.get(type(node.op)) | ||
| if op_func is None: | ||
| raise ValueError(f"Unsupported unary operator: {type(node.op).__name__}") | ||
| return op_func(_eval_node(node.operand, namespace)) | ||
|
|
||
| # Boolean operations: and, or | ||
| if isinstance(node, ast.BoolOp): | ||
| if isinstance(node.op, ast.And): | ||
| result = _eval_node(node.values[0], namespace) | ||
| for v in node.values[1:]: | ||
| result = result & _eval_node(v, namespace) | ||
| return result | ||
| if isinstance(node.op, ast.Or): | ||
| result = _eval_node(node.values[0], namespace) | ||
| for v in node.values[1:]: | ||
| result = result | _eval_node(v, namespace) | ||
| return result | ||
|
|
||
| raise ValueError( | ||
| f"Unsupported expression type: {type(node).__name__}. " | ||
| "Only column references, operators, and literals are allowed." | ||
| ) | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def create_table(table_name: str, columns: dict[str, str]) -> str: | ||
| """Create a table in Pixeltable. | ||
|
|
@@ -108,17 +234,15 @@ def add_computed_column(table_name: str, column_name: str, expression: str) -> s | |
| if table is None: | ||
| return f"Error: Table {table_name} not found." | ||
|
|
||
| # Replace 'table.' with the actual table reference | ||
| modified_expr = expression.replace("table.", f"table.") | ||
|
|
||
| # Construct the expression | ||
| column_expr = eval(modified_expr, {"table": table}) | ||
| column_expr = _safe_eval(expression, table) | ||
|
|
||
| # Add the computed column with kwargs format | ||
| kwargs = {column_name: column_expr} | ||
| table.add_computed_column(**kwargs) | ||
|
|
||
| return f"Computed column '{column_name}' added successfully to table '{table_name}'." | ||
| except ValueError as e: | ||
| return f"Error: Invalid expression: {e}" | ||
| except Exception as e: | ||
| return f"Error adding computed column: {str(e)}" | ||
|
|
||
|
|
@@ -143,11 +267,7 @@ def create_view(view_name: str, table_name: str, filter_expr: str = None) -> str | |
| return f"Error: Table {table_name} not found." | ||
|
|
||
| if filter_expr: | ||
| # Replace 'table.' with the actual table reference | ||
| modified_expr = filter_expr.replace("table.", f"{table.name}.") | ||
|
|
||
| # Use eval to convert the string expression to a Python expression | ||
| filter_condition = eval(modified_expr) | ||
| filter_condition = _safe_eval(filter_expr, table) | ||
|
|
||
| # Create the view with the filter | ||
| view = pxt.create_view(view_name, table.where(filter_condition)) | ||
|
|
@@ -156,6 +276,8 @@ def create_view(view_name: str, table_name: str, filter_expr: str = None) -> str | |
| view = pxt.create_view(view_name, table) | ||
|
|
||
| return f"View '{view_name}' created successfully." | ||
| except ValueError as e: | ||
| return f"Error: Invalid filter expression: {e}" | ||
| except Exception as e: | ||
| return f"Error creating view: {str(e)}" | ||
|
|
||
|
|
@@ -196,12 +318,13 @@ def execute_query( | |
|
|
||
| # Apply where clause if provided | ||
| if where_expr: | ||
| modified_expr = where_expr.replace("table.", f"{data_source.name}.") | ||
| where_condition = eval(modified_expr) | ||
| where_condition = _safe_eval(where_expr, data_source) | ||
| query = query.where(where_condition) | ||
|
|
||
| # Apply order by if provided | ||
| if order_by_column: | ||
| if not _IDENTIFIER_RE.match(order_by_column): | ||
| return f"Error: Invalid column name: '{order_by_column}'" | ||
| # Handle ordering on a specific column | ||
| if hasattr(data_source, order_by_column): | ||
| order_col = getattr(data_source, order_by_column) | ||
|
|
@@ -217,6 +340,8 @@ def execute_query( | |
| if select_columns: | ||
| select_args = [] | ||
| for col_name in select_columns: | ||
| if not _IDENTIFIER_RE.match(col_name): | ||
| return f"Error: Invalid column name: '{col_name}'" | ||
| if hasattr(data_source, col_name): | ||
| select_args.append(getattr(data_source, col_name)) | ||
| else: | ||
|
|
@@ -232,42 +357,65 @@ def execute_query( | |
| result_str = result.to_pandas().to_string() | ||
| return f"Query executed successfully:\n\n{result_str}" | ||
|
|
||
| except ValueError as e: | ||
| return f"Error: Invalid expression: {e}" | ||
| except Exception as e: | ||
| return f"Error executing query: {str(e)}" | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def create_query(query_name: str, table_name: str, query_function: str) -> str: | ||
| """Create a named query in Pixeltable. | ||
| def create_query( | ||
| query_name: str, | ||
| table_name: str, | ||
| select_columns: list[str] = None, | ||
| where_expr: str = None, | ||
| ) -> str: | ||
| """Create a named query as a persistent view in Pixeltable. | ||
|
|
||
| Args: | ||
| query_name: The name of the query to create. | ||
| query_name: The name of the query to create (stored as a Pixeltable view). | ||
| table_name: The name of the table the query will operate on. | ||
| query_function: A string representation of the query function body. | ||
| Should be a function that returns a query result. | ||
| select_columns: Optional list of column names to select. | ||
| If None, selects all columns. | ||
| where_expr: Optional filter expression as a string. | ||
| The expression should refer to columns using 'table.column_name'. | ||
|
|
||
| Example: | ||
| create_query( | ||
| "get_active_users", | ||
| "users", | ||
| "return users.where(users.is_active == True).select(users.name, users.email)" | ||
| select_columns=["name", "email"], | ||
| where_expr="table.is_active == True" | ||
| ) | ||
| """ | ||
| try: | ||
| table = pxt.get_table(table_name) | ||
| if table is None: | ||
| return f"Error: Table {table_name} not found." | ||
|
|
||
| # Create a function definition | ||
| func_def = f""" | ||
| @{table.name}.query | ||
| def {query_name}(): | ||
| {query_function} | ||
| """ | ||
| # Build the query using Pixeltable's API directly instead of exec() | ||
| query = table | ||
|
|
||
| if where_expr: | ||
| filter_condition = _safe_eval(where_expr, table) | ||
| query = query.where(filter_condition) | ||
|
|
||
| if select_columns: | ||
| select_args = [] | ||
| for col_name in select_columns: | ||
| if not _IDENTIFIER_RE.match(col_name): | ||
| return f"Error: Invalid column name: '{col_name}'" | ||
| if hasattr(table, col_name): | ||
| select_args.append(getattr(table, col_name)) | ||
| else: | ||
| return f"Error: Column '{col_name}' not found in table '{table_name}'." | ||
| query = query.select(*select_args) | ||
|
|
||
| # Execute the function definition | ||
| exec(func_def) | ||
| # Persist the query as a Pixeltable view so it can be retrieved later | ||
| pxt.create_view(query_name, query) | ||
|
|
||
| return f"Query '{query_name}' created successfully for table '{table_name}'." | ||
|
Comment on lines
+396
to
417
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# First, let's locate the file and examine the full create_query function
git ls-files | grep -E 'tools\.py$' | head -5Repository: patchy631/ai-engineering-hub Length of output: 225 🏁 Script executed: #!/bin/bash
# Read the tools.py file to see the full create_query function and context
wc -l pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 108 🏁 Script executed: #!/bin/bash
# Search for the create_query function definition and the surrounding context
rg -n 'def create_query' -A 60 pixeltable-mcp/base-sdk/tools.py | head -100Repository: patchy631/ai-engineering-hub Length of output: 2217 🏁 Script executed: #!/bin/bash
# Search for any module-level storage, registries, or persistence patterns
rg -n 'QUERY|query.*dict|register.*query|save.*query|named_queries|_queries' pixeltable-mcp/base-sdk/tools.py | head -20Repository: patchy631/ai-engineering-hub Length of output: 54 🏁 Script executed: #!/bin/bash
# Check if there are any class definitions or global state management
rg -n 'class |^[A-Z_]+\s*=\s*{|}' pixeltable-mcp/base-sdk/tools.py | head -20Repository: patchy631/ai-engineering-hub Length of output: 202 🏁 Script executed: #!/bin/bash
# Look for Pixeltable query persistence API calls - check for create_named_query, save, etc.
rg -n 'pxt\.|table\.|query\.' pixeltable-mcp/base-sdk/tools.py | grep -i 'save\|create\|persist\|register'Repository: patchy631/ai-engineering-hub Length of output: 594 🏁 Script executed: #!/bin/bash
# Check git history for the old implementation
git log --oneline -p pixeltable-mcp/base-sdk/tools.py 2>/dev/null | grep -A 30 'def create_query' | head -50Repository: patchy631/ai-engineering-hub Length of output: 1146 🏁 Script executed: #!/bin/bash
# Look at create_view to understand the pattern - does it persist?
rg -n 'def create_view' -A 20 pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 962 🏁 Script executed: #!/bin/bash
# Search for execute_query function to see what it expects
rg -n 'def execute_query' -A 30 pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 1446 🏁 Script executed: #!/bin/bash
# Look for any global query storage or registry
rg -n 'queries\s*=' pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 54 🏁 Script executed: #!/bin/bash
# Search for where query_name is referenced elsewhere in the file
rg -n 'query_name' pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 245 🏁 Script executed: #!/bin/bash
# Check if Pixeltable has a create_named_query or similar API
rg -n 'pxt\.' pixeltable-mcp/base-sdk/tools.py | grep -i queryRepository: patchy631/ai-engineering-hub Length of output: 54 🏁 Script executed: #!/bin/bash
# Look at the full file structure and imports
head -50 pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 1507 🏁 Script executed: #!/bin/bash
# Check the full create_view implementation to see how it persists
rg -n 'def create_view' -A 30 pixeltable-mcp/base-sdk/tools.pyRepository: patchy631/ai-engineering-hub Length of output: 1418 🏁 Script executed: #!/bin/bash
# Search for any reference to "query" in the entire tools.py that might reveal storage patterns
rg -n 'query' pixeltable-mcp/base-sdk/tools.py | head -30Repository: patchy631/ai-engineering-hub Length of output: 1439 🏁 Script executed: #!/bin/bash
# Check if there's any other file that might store queries
git ls-files | grep -E 'tools|query|config' | xargs grep -l 'create_query\|named.*query' 2>/dev/null | head -5Repository: patchy631/ai-engineering-hub Length of output: 144 🌐 Web query:
💡 Result: Pixeltable does not have a In Pixeltable, the closest equivalents to a “named/saved query” are:
Sources: The built query is never persisted — The function constructs a query object but never persists it via Pixeltable's API. Unlike the parallel To fix this, either:
🧰 Tools🪛 Ruff (0.15.1)[warning] 401-401: Consider moving this statement to an (TRY300) 🤖 Prompt for AI Agents |
||
| except ValueError as e: | ||
| return f"Error: Invalid expression: {e}" | ||
| except Exception as e: | ||
| return f"Error creating query: {str(e)}" | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: patchy631/ai-engineering-hub
Length of output: 560
🏁 Script executed:
Repository: patchy631/ai-engineering-hub
Length of output: 340
🏁 Script executed:
Repository: patchy631/ai-engineering-hub
Length of output: 563
🌐 Web query:
Pixeltable NOT operator column filter ~ operator.invert💡 Result:
In Pixeltable, boolean NOT is done with the bitwise-invert operator
~(because Python’snotcan’t be overloaded). (docs.pixeltable.com)Example:
Equivalent using
operator.invert:Use parentheses around comparisons before applying
~(as shown). (docs.pixeltable.com)Citations:
ast.Notmaps tooperator.not_, which breaks Pixeltable filter expressions.Pixeltable's Boolean operators mirror Pandas:
&for AND,|for OR, and~for NOT. Theast.BoolOphandler already correctly uses&/|(bitwise). However,operator.not_is Python'snotoperator, which calls__bool__and returns a plain Pythonbool— not a Pixeltable expression. Per Pixeltable docs, this is intentional: "Python'snotcan't be overloaded, only~can be." This means any expression usingnot table.columnwill either raiseTypeError(if Pixeltable forbids implicit bool conversion) or return a PythonTrue/Falseinstead of the correct negation expression, breaking the downstream.where()call.The downstream effect is in the
ast.UnaryOphandler (lines 108–112), which evaluatesnotvia this mapping.Fix: map
ast.Nottooperator.invert(which calls__invert__, the~operator):Proposed fix
🤖 Prompt for AI Agents