Skip to content

Commit 33b661c

Browse files
Use sqlglot for better checks (#24)
1 parent 7c45a70 commit 33b661c

6 files changed

Lines changed: 470 additions & 15 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies = [
1818
"langchain-ollama>=0.2.3",
1919
"langchain-openai>=0.3.7",
2020
"langgraph>=0.3.12",
21-
"mcp[cli]>=1.3.0",
21+
"mcp>=1.3.0",
2222
"openai>=1.65.3",
2323
"pandas>=2.2.3",
2424
"prompt-toolkit>=3.0.50",
@@ -29,6 +29,7 @@ dependencies = [
2929
"pyyaml>=6.0.2",
3030
"requests>=2.32.3",
3131
"rich>=13.9.4",
32+
"sqlglot>=26.23.0",
3233
"structlog>=25.1.0",
3334
"typer>=0.15.2",
3435
"uvicorn>=0.34.0",

src/dremioai/api/dremio/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ArcticSource(BaseModel):
4242

4343
class Query(BaseModel):
4444
sql: str = Field(..., alias="sql")
45-
context: Optional[Any] = None
45+
context: Optional[List[str]] = None
4646
references: Optional[Dict[str, ArcticSource]] = None
4747

4848

src/dremioai/config/settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class Dremio(BaseModel):
122122
project_id: Optional[str] = None
123123
enable_experimental: Optional[bool] = False # enable experimental tools
124124
oauth2: Optional[OAuth2] = None
125+
allow_dml: Optional[bool] = False
125126
model_config = ConfigDict(validate_assignment=True)
126127

127128
@field_serializer("raw_pat")

src/dremioai/tools/tools.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
from dremioai.api.dremio.catalog import get_schema, get_lineage, get_descriptions
4646
from csv import reader
4747
from io import StringIO
48+
from sqlglot import parse_one
49+
from sqlglot import expressions
4850

4951
logger = log.logger(__name__)
5052

@@ -244,6 +246,31 @@ async def invoke(self) -> Dict[str, Any]:
244246

245247
class RunSqlQuery(Tools):
246248
For: ClassVar[Annotated[ToolType, ToolType.FOR_SELF | ToolType.FOR_DATA_PATTERNS]]
249+
_safe = [
250+
expressions.Select,
251+
expressions.With,
252+
expressions.Union,
253+
]
254+
255+
@staticmethod
256+
def ensure_query_allowed(s: str):
257+
if settings.instance().dremio.allow_dml:
258+
return
259+
260+
try:
261+
q = parse_one(s)
262+
if any(isinstance(q, t) for t in RunSqlQuery._safe):
263+
return
264+
except:
265+
if not re.search(
266+
r"\b(drop|insert|update|truncate|delete|copy into|alter|create)\b",
267+
s,
268+
re.IGNORECASE,
269+
):
270+
return
271+
raise ValueError(
272+
"The query contains a DML statement. Only select queries are allowed"
273+
)
247274

248275
async def invoke(self, s: str) -> Dict[str, List[Any]]:
249276
"""Run a SELECT sql query on the Dremio cluster and return the results.
@@ -253,11 +280,7 @@ async def invoke(self, s: str) -> Dict[str, List[Any]]:
253280
Args:
254281
s: sql query
255282
"""
256-
# TODO: graduate to a more sophisticated SQL parser and check to allow better queries
257-
if re.search(r"(drop|insert|update|truncate|delete)", s, re.IGNORECASE):
258-
raise ValueError(
259-
"The query contains a DML statement. Only select queries are allowed"
260-
)
283+
RunSqlQuery.ensure_query_allowed(s)
261284
try:
262285
s = f"/* dremioai: submitter={self.__class__.__name__} */\n{s}"
263286
df = await sql.run_query(query=s, use_df=True)
@@ -476,6 +499,8 @@ async def invoke(self) -> Dict[str, Any]:
476499
"jvm_gc_pause_seconds": "Indicates how long the JVM was paused for garbage collection, and also is a rubric to know if the system is in use",
477500
"memory_heap_usage": "Indicates the amount of memory used by the JVM",
478501
"memory_heap_committed": "Indicates the amount of memory committed by the JVM",
502+
"dremio_engine_executors": "Number of executors running in the Dremio engine. It correlates to dremio_engine_replica_running using engine_id label",
503+
"dremio_engine_replica_running": "Number of running replicas in the Dremio engine. It correlates to dremio_engine_executors using engine_id label",
479504
}
480505

481506

0 commit comments

Comments
 (0)