Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion databao/core/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ def df(self, *, rows_limit: int | None = None) -> DataFrame | None:
Args:
rows_limit: Optional override for the number of rows to materialize in lazy mode.
"""
return self._materialize_data(rows_limit if rows_limit else self._data_materialized_rows).df
df = self._materialize_data(rows_limit if rows_limit else self._data_materialized_rows).df
# Copy the dataframe to avoid state mutation from outside
return df.copy() if df is not None else None

def plot(
self, request: str | None = None, *, rows_limit: int | None = None, stream: bool | None = None
Expand Down
3 changes: 1 addition & 2 deletions databao/duckdb/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from databao.duckdb.react_tools import AgentResponse, make_duckdb_tool, make_react_duckdb_agent, sql_strip
from databao.duckdb.react_tools import AgentResponse, make_duckdb_tool, make_react_duckdb_agent
from databao.duckdb.utils import describe_duckdb_schema, register_sqlalchemy, sqlalchemy_to_duckdb_mysql

__all__ = [
Expand All @@ -7,6 +7,5 @@
"make_duckdb_tool",
"make_react_duckdb_agent",
"register_sqlalchemy",
"sql_strip",
"sqlalchemy_to_duckdb_mysql",
]
28 changes: 9 additions & 19 deletions databao/duckdb/react_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Any

import pandas as pd
import sqlglot
from duckdb import DuckDBPyConnection
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.tools import tool
Expand All @@ -20,27 +19,18 @@ class AgentResponse(BaseModel):
explanation: str


def sql_strip(query: str) -> str:
"""Strip whitespace and trailing semicolons from SQL query."""
return query.strip().rstrip(";")


def sql_with_limit(sql: str, limit: int) -> str:
"""Ensure the SQL has a LIMIT clause, appending one if missing."""
# TODO Change the limit value if limit is already present in the query?
sql_to_run = sql_strip(sql)
ast = sqlglot.parse_one(sql_to_run, dialect="duckdb")
if not ast.args.get("limit") and isinstance(ast, sqlglot.expressions.Query):
ast_with_limit = ast.limit(limit) # Add outer LIMIT clause
sql_to_run = ast_with_limit.sql()
return sql_to_run
def execute_duckdb_sql(sql: str, con: DuckDBPyConnection, *, limit: int | None = None) -> pd.DataFrame:
# Use duckdb's Relation API to inject a LIMIT clause
rel = con.sql(sql) # A lazy Relation

# TODO Do we want to forbid non-SELECT statements?
# Non-Select queries (CREATE TABLE, etc.) are executed immediately and return None
if rel is None:
return pd.DataFrame()

def execute_duckdb_sql(sql: str, con: DuckDBPyConnection, *, limit: int | None = None) -> pd.DataFrame:
sql_to_run = sql_strip(sql)
if limit is not None:
sql_to_run = sql_with_limit(sql_to_run, limit)
return con.execute(sql_to_run).df()
rel = rel.limit(limit)
return rel.df() # Execute and return DataFrame


def make_duckdb_tool(con: DuckDBPyConnection) -> Any:
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ dependencies = [
"matplotlib>=3.10.0",
"edaplot-vl~=0.1.2",
"diskcache>=5.6.3",
"sqlglot>=27.29.0",
]

[project.optional-dependencies]
Expand Down
44 changes: 0 additions & 44 deletions tests/test_sql_with_limit.py

This file was deleted.

11 changes: 0 additions & 11 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.