sinaptik-ai · gventuri · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/pandasai/__init__.py b/pandasai/__init__.py
@@ -121,7 +121,7 @@ def clear_cache(filename: str = None):
     cache.clear()
 
 
-def chat(query: str, *dataframes: List[DataFrame]):
+def chat(query: str, *dataframes: DataFrame):
     """
     Start a new chat interaction with the assistant on Dataframe(s).
 

diff --git a/pandasai/agent/base.py b/pandasai/agent/base.py
@@ -2,6 +2,9 @@
 import warnings
 from typing import Any, List, Optional, Union
 
+import duckdb
+import pandas as pd
+
 from pandasai.core.cache import Cache
 from pandasai.core.code_execution.code_executor import CodeExecutor
 from pandasai.core.code_generation.base import CodeGenerator
@@ -23,6 +26,7 @@
 from pandasai.vectorstores.vectorstore import VectorStore
 
 from ..config import Config
+from ..constants import LOCAL_SOURCE_TYPES
 from .state import AgentState
 
 
@@ -102,12 +106,43 @@ def execute_code(self, code: str) -> dict:
         """Execute the generated code."""
         self._state.logger.log(f"Executing code: {code}")
         code_executor = CodeExecutor(self._state.config)
-        code_executor.add_to_env(
-            "execute_sql_query", self._state.dfs[0].execute_sql_query
-        )
+        code_executor.add_to_env("execute_sql_query", self._execute_sql_query)
 
         return code_executor.execute_and_return_result(code)
 
+    def _execute_local_sql_query(self, query: str) -> pd.DataFrame:
+        try:
+            # Use a context manager to ensure the connection is closed
+            with duckdb.connect() as con:
+                # Register all DataFrames in the state
+                for df in self._state.dfs:
+                    con.register(df.name, df)
+
+                # Execute the query and fetch the result as a pandas DataFrame
+                result = con.sql(query).df()
+
+            return result
+        except duckdb.Error as e:
+            raise RuntimeError(f"SQL execution failed: {e}") from e
+
+    def _execute_sql_query(self, query: str) -> pd.DataFrame:
+        """
+        Executes an SQL query on registered DataFrames.
+
+        Args:
+            query (str): The SQL query to execute.
+
+        Returns:
+            pd.DataFrame: The result of the SQL query as a pandas DataFrame.
+        """
+        if not self._state.dfs:
+            raise ValueError("No DataFrames available to register for query execution.")
+
+        if self._state.dfs[0].schema.source.type in LOCAL_SOURCE_TYPES:
+            return self._execute_local_sql_query(query)
+        else:
+            return self._state.dfs[0].execute_sql_query(query)
+
     def execute_with_retries(self, code: str) -> Any:
         """Execute the code with retry logic."""
         max_retries = self._state.config.max_retries

diff --git a/pandasai/agent/state.py b/pandasai/agent/state.py
@@ -9,7 +9,7 @@
 from pandasai.config import Config, ConfigManager
 from pandasai.constants import DEFAULT_CACHE_DIRECTORY, DEFAULT_CHART_DIRECTORY
 from pandasai.core.cache import Cache
-from pandasai.data_loader.schema_validator import is_schema_source_same
+from pandasai.data_loader.semantic_layer_schema import is_schema_source_same
 from pandasai.exceptions import InvalidConfigError
 from pandasai.helpers.folder import Folder
 from pandasai.helpers.logger import Logger

diff --git a/pandasai/data_loader/schema_validator.py b/pandasai/data_loader/schema_validator.py
diff --git a/pandasai/dataframe/base.py b/pandasai/dataframe/base.py
@@ -229,13 +229,6 @@ def pull(self):
 
         print(f"Dataset pulled successfully from path: {self.path}")
 
-    def execute_sql_query(self, query: str) -> pd.DataFrame:
-        import duckdb
-
-        db = duckdb.connect(":memory:")
-        db.register(self.name, self)
-        return db.query(query).df()
-
     @staticmethod
     def get_column_type(column_dtype) -> Optional[str]:
         """

diff --git a/tests/unit_tests/agent/test_agent.py b/tests/unit_tests/agent/test_agent.py
@@ -1,12 +1,14 @@
 import os
 from typing import Optional
-from unittest.mock import MagicMock, Mock, patch
+from unittest.mock import MagicMock, Mock, mock_open, patch
 
 import pandas as pd
 import pytest
 
+from pandasai import DatasetLoader, VirtualDataFrame
 from pandasai.agent.base import Agent
 from pandasai.config import Config, ConfigManager
+from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema
 from pandasai.dataframe.base import DataFrame
 from pandasai.exceptions import CodeExecutionError
 from pandasai.llm.fake import FakeLLM
@@ -15,6 +17,24 @@
 class TestAgent:
     "Unit tests for Agent class"
 
+    @pytest.fixture
+    def mysql_schema(self):
+        raw_schema = {
+            "name": "countries",
+            "source": {
+                "type": "mysql",
+                "connection": {
+                    "host": "localhost",
+                    "port": 3306,
+                    "database": "test_db",
+                    "user": "test_user",
+                    "password": "test_password",
+                },
+                "table": "countries",
+            },
+        }
+        return SemanticLayerSchema(**raw_schema)
+
     @pytest.fixture
     def sample_df(self) -> DataFrame:
         return DataFrame(
@@ -429,3 +449,52 @@ def test_train_method_with_code_but_no_queries(self, agent):
         codes = ["code1", "code2"]
         with pytest.raises(ValueError):
             agent.train(codes)
+
+    def test_execute_local_sql_query_success(self, agent):
+        query = "SELECT count(*) as total from countries;"
+        expected_result = pd.DataFrame({"total": [4]})
+        result = agent._execute_local_sql_query(query)
+        pd.testing.assert_frame_equal(result, expected_result)
+
+    def test_execute_local_sql_query_failure(self, agent):
+        with pytest.raises(RuntimeError, match="SQL execution failed"):
+            agent._execute_local_sql_query("wrong query;")
+
+    def test_execute_sql_query_success_local(self, agent):
+        query = "SELECT count(*) as total from countries;"
+        expected_result = pd.DataFrame({"total": [4]})
+        result = agent._execute_sql_query(query)
+        pd.testing.assert_frame_equal(result, expected_result)
+
+    @patch("os.path.exists", return_value=True)
+    def test_execute_sql_query_success_virtual_dataframe(
+        self, mock_exists, agent, mysql_schema, sample_df
+    ):
+        query = "SELECT count(*) as total from countries;"
+        loader = DatasetLoader()
+        expected_result = pd.DataFrame({"total": [4]})
+
+        with patch(
+            "builtins.open", mock_open(read_data=str(mysql_schema.to_yaml()))
+        ), patch(
+            "pandasai.data_loader.loader.DatasetLoader.execute_query"
+        ) as mock_query:
+            # Set up the mock for both the sample data and the query result
+            mock_query.side_effect = [sample_df, expected_result]
+
+            virtual_dataframe = loader.load("test/users")
+            agent._state.dfs = [virtual_dataframe]
+
+            pd.testing.assert_frame_equal(virtual_dataframe.head(), sample_df)
+            result = agent._execute_sql_query(query)
+            pd.testing.assert_frame_equal(result, expected_result)
+
+            # Verify execute_query was called appropriately
+            assert mock_query.call_count == 2  # Once for head(), once for the SQL query
+
+    def test_execute_sql_query_error_no_dataframe(self, agent):
+        query = "SELECT count(*) as total from countries;"
+        agent._state.dfs = None
+
+        with pytest.raises(ValueError, match="No DataFrames available"):
+            agent._execute_sql_query(query)
diff --git a/tests/unit_tests/dataframe/test_loader.py b/tests/unit_tests/dataframe/test_loader.py
@@ -1,11 +1,8 @@
 import logging
-import sys
-from datetime import datetime, timedelta
 from unittest.mock import mock_open, patch
 
 import pandas as pd
 import pytest
-import yaml
 
 from pandasai.data_loader.loader import DatasetLoader
 from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema

diff --git a/tests/unit_tests/helpers/test_session.py b/tests/unit_tests/helpers/test_session.py
@@ -8,6 +8,7 @@
 from pandasai.helpers.session import Session, get_pandaai_session
 
 
+@patch("pandasai.os.environ", {})
 def test_session_init_without_api_key():
     """Test that Session initialization raises PandaAIApiKeyError when no API key is provided"""
     with pytest.raises(PandaAIApiKeyError) as exc_info:
@@ -18,6 +19,7 @@ def test_session_init_without_api_key():
     )
 
 
+@patch("pandasai.os.environ", {})
 def test_session_init_with_none_api_key():
     """Test that Session initialization raises PandaAIApiKeyError when API key is None"""
     with pytest.raises(PandaAIApiKeyError) as exc_info:
@@ -28,18 +30,21 @@ def test_session_init_with_none_api_key():
     )
 
 
+@patch("pandasai.os.environ", {})
 def test_session_init_with_api_key():
     """Test that Session initialization works with a valid API key"""
     session = Session(api_key="test-key")
     assert session._api_key == "test-key"
 
 
+@patch("pandasai.os.environ", {})
 def test_session_init_with_default_api_url():
     """Test that Session initialization uses DEFAULT_API_URL when no URL is provided"""
     session = Session(api_key="test-key")
     assert session._endpoint_url == DEFAULT_API_URL
 
 
+@patch("pandasai.os.environ", {})
 def test_session_init_with_custom_api_url():
     """Test that Session initialization uses provided URL"""
     custom_url = "https://custom.api.url"
@@ -64,6 +69,7 @@ def test_session_init_with_env_api_url():
     assert session._endpoint_url == "https://env.api.url"
 
 
+@patch("pandasai.os.environ", {})
 def test_get_pandaai_session_without_credentials():
     """Test that get_pandaai_session raises PandaAIApiKeyError when no credentials are provided"""
     with pytest.raises(PandaAIApiKeyError) as exc_info:

diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py
@@ -190,7 +190,10 @@ def test_load_successful_zip_extraction(
         mock_zip_file.return_value.__enter__.return_value.extractall.assert_called_once()
         assert isinstance(result, MagicMock)
 
-    def test_load_without_api_credentials(self):
+    @patch("pandasai.os.environ", {})
+    def test_load_without_api_credentials(
+        self,
+    ):
         """Test that load raises PandaAIApiKeyError when no API credentials are provided"""
         with pytest.raises(PandaAIApiKeyError) as exc_info:
             pandasai.load("test/dataset")