Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
python-version: ["3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v5
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
python-version: ["3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v5
Expand Down
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.10
3.12
12 changes: 10 additions & 2 deletions databao/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,18 @@
__version__ = "0.0.0" # Fallback for development mode


from databao.api import new_agent
from databao.api import new_agent, new_agent_v2
from databao.configs.llm import LLMConfig
from databao.core import Agent, ExecutionResult, Executor, Opa, Thread, VisualisationResult, Visualizer
from databao.core import Agent, AgentV1, ExecutionResult, Executor, Opa, Thread, VisualisationResult, Visualizer
from databao.core.v2 import AgentV2, Context, ContextBuilder
from databao.databases import supported_db_types

__all__ = [
"Agent",
"AgentV1",
"AgentV2",
"Context",
"ContextBuilder",
"ExecutionResult",
"Executor",
"LLMConfig",
Expand All @@ -21,4 +27,6 @@
"Visualizer",
"__version__",
"new_agent",
"new_agent_v2",
"supported_db_types",
]
43 changes: 40 additions & 3 deletions databao/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from databao.caches.in_mem_cache import InMemCache
from databao.configs.agent import DEFAULT_AGENT_CONFIG, AgentConfig
from databao.configs.llm import LLMConfig, LLMConfigDirectory
from databao.core import Agent, Cache, Executor, Visualizer
from databao.core import AgentV1, Cache, Executor, Visualizer
from databao.core.v2.agent import AgentV2
from databao.core.v2.context import Context
from databao.executors.lighthouse.executor import LighthouseExecutor
from databao.visualizers.vega_chat import VegaChatVisualizer

Expand All @@ -18,13 +20,48 @@ def new_agent(
stream_plot: bool = False,
lazy_threads: bool = False,
auto_output_modality: bool = True,
) -> Agent:
) -> AgentV1:
"""This is an entry point for users to create a new agent.
Agent can't be modified after it's created. Only new data sources can be added.
"""
llm_config = llm_config if llm_config else LLMConfigDirectory.DEFAULT
agent_config = agent_config if agent_config else DEFAULT_AGENT_CONFIG
return Agent(
return AgentV1(
llm_config,
agent_config,
name=name or "default_agent",
data_executor=data_executor or LighthouseExecutor(),
visualizer=visualizer or VegaChatVisualizer(llm_config),
cache=cache or InMemCache(),
rows_limit=rows_limit,
stream_ask=stream_ask,
stream_plot=stream_plot,
lazy_threads=lazy_threads,
auto_output_modality=auto_output_modality,
)


def new_agent_v2(
context: Context,
name: str | None = None,
llm_config: LLMConfig | None = None,
agent_config: AgentConfig | None = None,
data_executor: Executor | None = None,
visualizer: Visualizer | None = None,
cache: Cache | None = None,
rows_limit: int = 1000,
stream_ask: bool = True,
stream_plot: bool = False,
lazy_threads: bool = False,
auto_output_modality: bool = True,
) -> AgentV2:
"""This is an entry point for users to create a new agent.
Agent can't be modified after it's created. Only new data sources can be added.
"""
llm_config = llm_config if llm_config else LLMConfigDirectory.DEFAULT
agent_config = agent_config if agent_config else DEFAULT_AGENT_CONFIG
return AgentV2(
context,
llm_config,
agent_config,
name=name or "default_agent",
Expand Down
4 changes: 2 additions & 2 deletions databao/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from databao.core.agent import Agent
from databao.core.agent import Agent, AgentV1
from databao.core.cache import Cache
from databao.core.executor import ExecutionResult, Executor
from databao.core.opa import Opa
from databao.core.thread import Thread
from databao.core.visualizer import VisualisationResult, Visualizer

__all__ = ["Agent", "Cache", "ExecutionResult", "Executor", "Opa", "Thread", "VisualisationResult", "Visualizer"]
__all__ = ["Agent", "AgentV1", "Cache", "ExecutionResult", "Executor", "Opa", "Thread", "VisualisationResult", "Visualizer"]
54 changes: 51 additions & 3 deletions databao/core/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Protocol, cast

from duckdb import DuckDBPyConnection
from langchain_core.language_models.chat_models import BaseChatModel
Expand All @@ -17,12 +17,59 @@
from databao.core.visualizer import Visualizer


class Agent:
class Agent(Protocol):
"""An agent manages all databases and Dataframes as well as the context for them.
Agent determines what LLM to use, what executor to use and how to visualize data for all threads.
Several threads can be spawned out of the agent.
"""

def thread(
self,
*,
stream_ask: bool | None = None,
stream_plot: bool | None = None,
lazy: bool | None = None,
auto_output_modality: bool | None = None,
) -> Thread:
"""Start a new thread in this agent."""
...

@property
def sources(self) -> Sources: ...

@property
def dbs(self) -> dict[str, DBDataSource]: ...

@property
def dfs(self) -> dict[str, DFDataSource]: ...

@property
def name(self) -> str: ...

@property
def llm(self) -> BaseChatModel: ...

@property
def llm_config(self) -> "LLMConfig": ...

@property
def agent_config(self) -> "AgentConfig": ...

@property
def executor(self) -> "Executor": ...

@property
def visualizer(self) -> "Visualizer": ...

@property
def cache(self) -> "Cache": ...

@property
def additional_context(self) -> list[str]: ...


class AgentV1(Agent):

def __init__(
self,
llm: "LLMConfig",
Expand Down Expand Up @@ -138,8 +185,9 @@ def thread(
"""Start a new thread in this agent."""
if not self.__sources.dbs and not self.__sources.dfs:
raise ValueError("No databases or dataframes registered in this agent.")
# noinspection PyTypeChecker
return Thread(
self,
cast(Agent, self),
rows_limit=self.__rows_limit,
stream_ask=stream_ask if stream_ask is not None else self.__stream_ask,
stream_plot=stream_plot if stream_plot is not None else self.__stream_plot,
Expand Down
16 changes: 15 additions & 1 deletion databao/core/data_source.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
from dataclasses import dataclass
from typing import Any

import pandas as pd
from databao_context_engine import DatasourceType
from duckdb import DuckDBPyConnection
from sqlalchemy import Connection, Engine


@dataclass
class DBConnectionConfig:
type: DatasourceType
content: dict[str, Any]


DBConnectionRuntime = DuckDBPyConnection | Engine | Connection


DBConnection = DBConnectionConfig | DBConnectionRuntime


@dataclass
class DataSource:
name: str
Expand All @@ -18,7 +32,7 @@ class DFDataSource(DataSource):

@dataclass
class DBDataSource(DataSource):
db_connection: DuckDBPyConnection | Engine | Connection
db_connection: DBConnection


@dataclass
Expand Down
4 changes: 4 additions & 0 deletions databao/core/v2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from databao.core.v2.agent import AgentV2
from databao.core.v2.context import Context, ContextBuilder

__all__ = ["AgentV2", "Context", "ContextBuilder"]
127 changes: 127 additions & 0 deletions databao/core/v2/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from typing import TYPE_CHECKING, cast

from langchain_core.language_models.chat_models import BaseChatModel

from databao.core import Agent
from databao.core.data_source import DBDataSource, DFDataSource, Sources
from databao.core.thread import Thread
from databao.core.v2.context import Context

if TYPE_CHECKING:
from databao.configs.llm import LLMConfig
from databao.core.cache import Cache
from databao.core.executor import Executor
from databao.core.visualizer import Visualizer


# TODO (dce): use Context.search_context
class AgentV2(Agent):
def __init__(
self,
context: Context,
llm: "LLMConfig",
agent_config: "AgentConfig",
data_executor: "Executor",
visualizer: "Visualizer",
cache: "Cache",
*,
name: str = "default_agent",
rows_limit: int,
stream_ask: bool = True,
stream_plot: bool = False,
lazy_threads: bool = False,
auto_output_modality: bool = True,
):
self.__name = name
self.__llm = llm.new_chat_model()
self.__llm_config = llm
self.__agent_config = agent_config

self.__sources: Sources = context.sources

self.__executor = data_executor
self.__visualizer = visualizer
self.__cache = cache

# Thread defaults
self.__rows_limit = rows_limit
self.__lazy_threads = lazy_threads
self.__auto_output_modality = auto_output_modality
self.__stream_ask = stream_ask
self.__stream_plot = stream_plot

self._init_executor()

def _init_executor(self):
for db_source in self.__sources.dbs.values():
self.executor.register_db(db_source)
for df_source in self.__sources.dfs.values():
self.executor.register_df(df_source)

def thread(
self,
*,
stream_ask: bool | None = None,
stream_plot: bool | None = None,
lazy: bool | None = None,
auto_output_modality: bool | None = None,
) -> Thread:
"""Start a new thread in this agent."""
if not self.__sources.dbs and not self.__sources.dfs:
raise ValueError("No databases or dataframes registered in this agent.")
# noinspection PyTypeChecker
return Thread(
cast(Agent, self),
rows_limit=self.__rows_limit,
stream_ask=stream_ask if stream_ask is not None else self.__stream_ask,
stream_plot=stream_plot if stream_plot is not None else self.__stream_plot,
lazy=lazy if lazy is not None else self.__lazy_threads,
auto_output_modality=auto_output_modality
if auto_output_modality is not None
else self.__auto_output_modality,
)

@property
def sources(self) -> Sources:
return self.__sources

@property
def dbs(self) -> dict[str, DBDataSource]:
return dict(self.__sources.dbs)

@property
def dfs(self) -> dict[str, DFDataSource]:
return dict(self.__sources.dfs)

@property
def name(self) -> str:
return self.__name

@property
def llm(self) -> BaseChatModel:
return self.__llm

@property
def llm_config(self) -> "LLMConfig":
return self.__llm_config

@property
def agent_config(self) -> "AgentConfig":
return self.__agent_config

@property
def executor(self) -> "Executor":
return self.__executor

@property
def visualizer(self) -> "Visualizer":
return self.__visualizer

@property
def cache(self) -> "Cache":
return self.__cache

@property
def additional_context(self) -> list[str]:
"""General additional context not specific to any one data source."""
return self.__sources.additional_context
Loading
Loading