JetBrains · Veirisa · Jan 27, 2026
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        python-version: ["3.12", "3.13", "3.14"]
 
     steps:
       - uses: actions/checkout@v5

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
+        python-version: ["3.12", "3.13", "3.14"]
 
     steps:
       - uses: actions/checkout@v5

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.10
+3.12
diff --git a/databao/__init__.py b/databao/__init__.py
@@ -6,12 +6,18 @@
     __version__ = "0.0.0"  # Fallback for development mode
 
 
-from databao.api import new_agent
+from databao.api import new_agent, new_agent_v2
 from databao.configs.llm import LLMConfig
-from databao.core import Agent, ExecutionResult, Executor, Opa, Thread, VisualisationResult, Visualizer
+from databao.core import Agent, AgentV1, ExecutionResult, Executor, Opa, Thread, VisualisationResult, Visualizer
+from databao.core.v2 import AgentV2, Context, ContextBuilder
+from databao.databases import supported_db_types
 
 __all__ = [
     "Agent",
+    "AgentV1",
+    "AgentV2",
+    "Context",
+    "ContextBuilder",
     "ExecutionResult",
     "Executor",
     "LLMConfig",
@@ -21,4 +27,6 @@
     "Visualizer",
     "__version__",
     "new_agent",
+    "new_agent_v2",
+    "supported_db_types",
 ]
diff --git a/databao/api.py b/databao/api.py
@@ -1,7 +1,9 @@
 from databao.caches.in_mem_cache import InMemCache
 from databao.configs.agent import DEFAULT_AGENT_CONFIG, AgentConfig
 from databao.configs.llm import LLMConfig, LLMConfigDirectory
-from databao.core import Agent, Cache, Executor, Visualizer
+from databao.core import AgentV1, Cache, Executor, Visualizer
+from databao.core.v2.agent import AgentV2
+from databao.core.v2.context import Context
 from databao.executors.lighthouse.executor import LighthouseExecutor
 from databao.visualizers.vega_chat import VegaChatVisualizer
 
@@ -18,13 +20,48 @@ def new_agent(
     stream_plot: bool = False,
     lazy_threads: bool = False,
     auto_output_modality: bool = True,
-) -> Agent:
+) -> AgentV1:
     """This is an entry point for users to create a new agent.
     Agent can't be modified after it's created. Only new data sources can be added.
     """
     llm_config = llm_config if llm_config else LLMConfigDirectory.DEFAULT
     agent_config = agent_config if agent_config else DEFAULT_AGENT_CONFIG
-    return Agent(
+    return AgentV1(
+        llm_config,
+        agent_config,
+        name=name or "default_agent",
+        data_executor=data_executor or LighthouseExecutor(),
+        visualizer=visualizer or VegaChatVisualizer(llm_config),
+        cache=cache or InMemCache(),
+        rows_limit=rows_limit,
+        stream_ask=stream_ask,
+        stream_plot=stream_plot,
+        lazy_threads=lazy_threads,
+        auto_output_modality=auto_output_modality,
+    )
+
+
+def new_agent_v2(
+    context: Context,
+    name: str | None = None,
+    llm_config: LLMConfig | None = None,
+    agent_config: AgentConfig | None = None,
+    data_executor: Executor | None = None,
+    visualizer: Visualizer | None = None,
+    cache: Cache | None = None,
+    rows_limit: int = 1000,
+    stream_ask: bool = True,
+    stream_plot: bool = False,
+    lazy_threads: bool = False,
+    auto_output_modality: bool = True,
+) -> AgentV2:
+    """This is an entry point for users to create a new agent.
+    Agent can't be modified after it's created. Only new data sources can be added.
+    """
+    llm_config = llm_config if llm_config else LLMConfigDirectory.DEFAULT
+    agent_config = agent_config if agent_config else DEFAULT_AGENT_CONFIG
+    return AgentV2(
+        context,
         llm_config,
         agent_config,
         name=name or "default_agent",

diff --git a/databao/core/__init__.py b/databao/core/__init__.py
@@ -1,8 +1,8 @@
-from databao.core.agent import Agent
+from databao.core.agent import Agent, AgentV1
 from databao.core.cache import Cache
 from databao.core.executor import ExecutionResult, Executor
 from databao.core.opa import Opa
 from databao.core.thread import Thread
 from databao.core.visualizer import VisualisationResult, Visualizer
 
-__all__ = ["Agent", "Cache", "ExecutionResult", "Executor", "Opa", "Thread", "VisualisationResult", "Visualizer"]
+__all__ = ["Agent", "AgentV1", "Cache", "ExecutionResult", "Executor", "Opa", "Thread", "VisualisationResult", "Visualizer"]
diff --git a/databao/core/agent.py b/databao/core/agent.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Protocol, cast
 
 from duckdb import DuckDBPyConnection
 from langchain_core.language_models.chat_models import BaseChatModel
@@ -17,12 +17,59 @@
     from databao.core.visualizer import Visualizer
 
 
-class Agent:
+class Agent(Protocol):
     """An agent manages all databases and Dataframes as well as the context for them.
     Agent determines what LLM to use, what executor to use and how to visualize data for all threads.
     Several threads can be spawned out of the agent.
     """
 
+    def thread(
+        self,
+        *,
+        stream_ask: bool | None = None,
+        stream_plot: bool | None = None,
+        lazy: bool | None = None,
+        auto_output_modality: bool | None = None,
+    ) -> Thread:
+        """Start a new thread in this agent."""
+        ...
+
+    @property
+    def sources(self) -> Sources: ...
+
+    @property
+    def dbs(self) -> dict[str, DBDataSource]: ...
+
+    @property
+    def dfs(self) -> dict[str, DFDataSource]: ...
+
+    @property
+    def name(self) -> str: ...
+
+    @property
+    def llm(self) -> BaseChatModel: ...
+
+    @property
+    def llm_config(self) -> "LLMConfig": ...
+
+    @property
+    def agent_config(self) -> "AgentConfig": ...
+
+    @property
+    def executor(self) -> "Executor": ...
+
+    @property
+    def visualizer(self) -> "Visualizer": ...
+
+    @property
+    def cache(self) -> "Cache": ...
+
+    @property
+    def additional_context(self) -> list[str]: ...
+
+
+class AgentV1(Agent):
+
     def __init__(
         self,
         llm: "LLMConfig",
@@ -138,8 +185,9 @@ def thread(
         """Start a new thread in this agent."""
         if not self.__sources.dbs and not self.__sources.dfs:
             raise ValueError("No databases or dataframes registered in this agent.")
+        # noinspection PyTypeChecker
         return Thread(
-            self,
+            cast(Agent, self),
             rows_limit=self.__rows_limit,
             stream_ask=stream_ask if stream_ask is not None else self.__stream_ask,
             stream_plot=stream_plot if stream_plot is not None else self.__stream_plot,

diff --git a/databao/core/data_source.py b/databao/core/data_source.py
@@ -1,10 +1,24 @@
 from dataclasses import dataclass
+from typing import Any
 
 import pandas as pd
+from databao_context_engine import DatasourceType
 from duckdb import DuckDBPyConnection
 from sqlalchemy import Connection, Engine
 
 
+@dataclass
+class DBConnectionConfig:
+    type: DatasourceType
+    content: dict[str, Any]
+
+
+DBConnectionRuntime = DuckDBPyConnection | Engine | Connection
+
+
+DBConnection = DBConnectionConfig | DBConnectionRuntime
+
+
 @dataclass
 class DataSource:
     name: str
@@ -18,7 +32,7 @@ class DFDataSource(DataSource):
 
 @dataclass
 class DBDataSource(DataSource):
-    db_connection: DuckDBPyConnection | Engine | Connection
+    db_connection: DBConnection
 
 
 @dataclass

diff --git a/databao/core/v2/__init__.py b/databao/core/v2/__init__.py
@@ -0,0 +1,4 @@
+from databao.core.v2.agent import AgentV2
+from databao.core.v2.context import Context, ContextBuilder
+
+__all__ = ["AgentV2", "Context", "ContextBuilder"]
diff --git a/databao/core/v2/agent.py b/databao/core/v2/agent.py
@@ -0,0 +1,127 @@
+from typing import TYPE_CHECKING, cast
+
+from langchain_core.language_models.chat_models import BaseChatModel
+
+from databao.core import Agent
+from databao.core.data_source import DBDataSource, DFDataSource, Sources
+from databao.core.thread import Thread
+from databao.core.v2.context import Context
+
+if TYPE_CHECKING:
+    from databao.configs.llm import LLMConfig
+    from databao.core.cache import Cache
+    from databao.core.executor import Executor
+    from databao.core.visualizer import Visualizer
+
+
+# TODO (dce): use Context.search_context
+class AgentV2(Agent):
+    def __init__(
+        self,
+        context: Context,
+        llm: "LLMConfig",
+        agent_config: "AgentConfig",
+        data_executor: "Executor",
+        visualizer: "Visualizer",
+        cache: "Cache",
+        *,
+        name: str = "default_agent",
+        rows_limit: int,
+        stream_ask: bool = True,
+        stream_plot: bool = False,
+        lazy_threads: bool = False,
+        auto_output_modality: bool = True,
+    ):
+        self.__name = name
+        self.__llm = llm.new_chat_model()
+        self.__llm_config = llm
+        self.__agent_config = agent_config
+
+        self.__sources: Sources = context.sources
+
+        self.__executor = data_executor
+        self.__visualizer = visualizer
+        self.__cache = cache
+
+        # Thread defaults
+        self.__rows_limit = rows_limit
+        self.__lazy_threads = lazy_threads
+        self.__auto_output_modality = auto_output_modality
+        self.__stream_ask = stream_ask
+        self.__stream_plot = stream_plot
+
+        self._init_executor()
+
+    def _init_executor(self):
+        for db_source in self.__sources.dbs.values():
+            self.executor.register_db(db_source)
+        for df_source in self.__sources.dfs.values():
+            self.executor.register_df(df_source)
+
+    def thread(
+        self,
+        *,
+        stream_ask: bool | None = None,
+        stream_plot: bool | None = None,
+        lazy: bool | None = None,
+        auto_output_modality: bool | None = None,
+    ) -> Thread:
+        """Start a new thread in this agent."""
+        if not self.__sources.dbs and not self.__sources.dfs:
+            raise ValueError("No databases or dataframes registered in this agent.")
+        # noinspection PyTypeChecker
+        return Thread(
+            cast(Agent, self),
+            rows_limit=self.__rows_limit,
+            stream_ask=stream_ask if stream_ask is not None else self.__stream_ask,
+            stream_plot=stream_plot if stream_plot is not None else self.__stream_plot,
+            lazy=lazy if lazy is not None else self.__lazy_threads,
+            auto_output_modality=auto_output_modality
+            if auto_output_modality is not None
+            else self.__auto_output_modality,
+        )
+
+    @property
+    def sources(self) -> Sources:
+        return self.__sources
+
+    @property
+    def dbs(self) -> dict[str, DBDataSource]:
+        return dict(self.__sources.dbs)
+
+    @property
+    def dfs(self) -> dict[str, DFDataSource]:
+        return dict(self.__sources.dfs)
+
+    @property
+    def name(self) -> str:
+        return self.__name
+
+    @property
+    def llm(self) -> BaseChatModel:
+        return self.__llm
+
+    @property
+    def llm_config(self) -> "LLMConfig":
+        return self.__llm_config
+
+    @property
+    def agent_config(self) -> "AgentConfig":
+        return self.__agent_config
+
+    @property
+    def executor(self) -> "Executor":
+        return self.__executor
+
+    @property
+    def visualizer(self) -> "Visualizer":
+        return self.__visualizer
+
+    @property
+    def cache(self) -> "Cache":
+        return self.__cache
+
+    @property
+    def additional_context(self) -> list[str]:
+        """General additional context not specific to any one data source."""
+        return self.__sources.additional_context