Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(Agent): adding test to improve coverage #1528

Merged
merged 4 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandasai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def clear_cache(filename: str = None):
cache.clear()


def chat(query: str, *dataframes: List[DataFrame]):
def chat(query: str, *dataframes: DataFrame):
"""
Start a new chat interaction with the assistant on Dataframe(s).

Expand Down
41 changes: 38 additions & 3 deletions pandasai/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
import warnings
from typing import Any, List, Optional, Union

import duckdb
import pandas as pd

from pandasai.core.cache import Cache
from pandasai.core.code_execution.code_executor import CodeExecutor
from pandasai.core.code_generation.base import CodeGenerator
Expand All @@ -23,6 +26,7 @@
from pandasai.vectorstores.vectorstore import VectorStore

from ..config import Config
from ..constants import LOCAL_SOURCE_TYPES
from .state import AgentState


Expand Down Expand Up @@ -102,12 +106,43 @@ def execute_code(self, code: str) -> dict:
"""Execute the generated code."""
self._state.logger.log(f"Executing code: {code}")
code_executor = CodeExecutor(self._state.config)
code_executor.add_to_env(
"execute_sql_query", self._state.dfs[0].execute_sql_query
)
code_executor.add_to_env("execute_sql_query", self._execute_sql_query)

return code_executor.execute_and_return_result(code)

def _execute_local_sql_query(self, query: str) -> pd.DataFrame:
try:
# Use a context manager to ensure the connection is closed
with duckdb.connect() as con:
# Register all DataFrames in the state
for df in self._state.dfs:
con.register(df.name, df)

# Execute the query and fetch the result as a pandas DataFrame
result = con.sql(query).df()

return result
except duckdb.Error as e:
raise RuntimeError(f"SQL execution failed: {e}") from e

def _execute_sql_query(self, query: str) -> pd.DataFrame:
"""
Executes an SQL query on registered DataFrames.

Args:
query (str): The SQL query to execute.

Returns:
pd.DataFrame: The result of the SQL query as a pandas DataFrame.
"""
if not self._state.dfs:
raise ValueError("No DataFrames available to register for query execution.")

if self._state.dfs[0].schema.source.type in LOCAL_SOURCE_TYPES:
return self._execute_local_sql_query(query)
else:
return self._state.dfs[0].execute_sql_query(query)

def execute_with_retries(self, code: str) -> Any:
"""Execute the code with retry logic."""
max_retries = self._state.config.max_retries
Expand Down
2 changes: 1 addition & 1 deletion pandasai/agent/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pandasai.config import Config, ConfigManager
from pandasai.constants import DEFAULT_CACHE_DIRECTORY, DEFAULT_CHART_DIRECTORY
from pandasai.core.cache import Cache
from pandasai.data_loader.schema_validator import is_schema_source_same
from pandasai.data_loader.semantic_layer_schema import is_schema_source_same
from pandasai.exceptions import InvalidConfigError
from pandasai.helpers.folder import Folder
from pandasai.helpers.logger import Logger
Expand Down
9 changes: 0 additions & 9 deletions pandasai/data_loader/schema_validator.py

This file was deleted.

7 changes: 0 additions & 7 deletions pandasai/dataframe/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,6 @@ def pull(self):

print(f"Dataset pulled successfully from path: {self.path}")

def execute_sql_query(self, query: str) -> pd.DataFrame:
import duckdb

db = duckdb.connect(":memory:")
db.register(self.name, self)
return db.query(query).df()

@staticmethod
def get_column_type(column_dtype) -> Optional[str]:
"""
Expand Down
71 changes: 70 additions & 1 deletion tests/unit_tests/agent/test_agent.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
from typing import Optional
from unittest.mock import MagicMock, Mock, patch
from unittest.mock import MagicMock, Mock, mock_open, patch

import pandas as pd
import pytest

from pandasai import DatasetLoader, VirtualDataFrame
from pandasai.agent.base import Agent
from pandasai.config import Config, ConfigManager
from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema
from pandasai.dataframe.base import DataFrame
from pandasai.exceptions import CodeExecutionError
from pandasai.llm.fake import FakeLLM
Expand All @@ -15,6 +17,24 @@
class TestAgent:
"Unit tests for Agent class"

@pytest.fixture
def mysql_schema(self):
raw_schema = {
"name": "countries",
"source": {
"type": "mysql",
"connection": {
"host": "localhost",
"port": 3306,
"database": "test_db",
"user": "test_user",
"password": "test_password",
},
"table": "countries",
},
}
return SemanticLayerSchema(**raw_schema)

@pytest.fixture
def sample_df(self) -> DataFrame:
return DataFrame(
Expand Down Expand Up @@ -429,3 +449,52 @@ def test_train_method_with_code_but_no_queries(self, agent):
codes = ["code1", "code2"]
with pytest.raises(ValueError):
agent.train(codes)

def test_execute_local_sql_query_success(self, agent):
query = "SELECT count(*) as total from countries;"
expected_result = pd.DataFrame({"total": [4]})
result = agent._execute_local_sql_query(query)
pd.testing.assert_frame_equal(result, expected_result)

def test_execute_local_sql_query_failure(self, agent):
with pytest.raises(RuntimeError, match="SQL execution failed"):
agent._execute_local_sql_query("wrong query;")

def test_execute_sql_query_success_local(self, agent):
query = "SELECT count(*) as total from countries;"
expected_result = pd.DataFrame({"total": [4]})
result = agent._execute_sql_query(query)
pd.testing.assert_frame_equal(result, expected_result)

@patch("os.path.exists", return_value=True)
def test_execute_sql_query_success_virtual_dataframe(
self, mock_exists, agent, mysql_schema, sample_df
):
query = "SELECT count(*) as total from countries;"
loader = DatasetLoader()
expected_result = pd.DataFrame({"total": [4]})

with patch(
"builtins.open", mock_open(read_data=str(mysql_schema.to_yaml()))
), patch(
"pandasai.data_loader.loader.DatasetLoader.execute_query"
) as mock_query:
# Set up the mock for both the sample data and the query result
mock_query.side_effect = [sample_df, expected_result]

virtual_dataframe = loader.load("test/users")
agent._state.dfs = [virtual_dataframe]

pd.testing.assert_frame_equal(virtual_dataframe.head(), sample_df)
result = agent._execute_sql_query(query)
pd.testing.assert_frame_equal(result, expected_result)

# Verify execute_query was called appropriately
assert mock_query.call_count == 2 # Once for head(), once for the SQL query

def test_execute_sql_query_error_no_dataframe(self, agent):
query = "SELECT count(*) as total from countries;"
agent._state.dfs = None

with pytest.raises(ValueError, match="No DataFrames available"):
agent._execute_sql_query(query)
3 changes: 0 additions & 3 deletions tests/unit_tests/dataframe/test_loader.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import logging
import sys
from datetime import datetime, timedelta
from unittest.mock import mock_open, patch

import pandas as pd
import pytest
import yaml

from pandasai.data_loader.loader import DatasetLoader
from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema
Expand Down
6 changes: 6 additions & 0 deletions tests/unit_tests/helpers/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pandasai.helpers.session import Session, get_pandaai_session


@patch("pandasai.os.environ", {})
def test_session_init_without_api_key():
"""Test that Session initialization raises PandaAIApiKeyError when no API key is provided"""
with pytest.raises(PandaAIApiKeyError) as exc_info:
Expand All @@ -18,6 +19,7 @@ def test_session_init_without_api_key():
)


@patch("pandasai.os.environ", {})
def test_session_init_with_none_api_key():
"""Test that Session initialization raises PandaAIApiKeyError when API key is None"""
with pytest.raises(PandaAIApiKeyError) as exc_info:
Expand All @@ -28,18 +30,21 @@ def test_session_init_with_none_api_key():
)


@patch("pandasai.os.environ", {})
def test_session_init_with_api_key():
"""Test that Session initialization works with a valid API key"""
session = Session(api_key="test-key")
assert session._api_key == "test-key"


@patch("pandasai.os.environ", {})
def test_session_init_with_default_api_url():
"""Test that Session initialization uses DEFAULT_API_URL when no URL is provided"""
session = Session(api_key="test-key")
assert session._endpoint_url == DEFAULT_API_URL


@patch("pandasai.os.environ", {})
def test_session_init_with_custom_api_url():
"""Test that Session initialization uses provided URL"""
custom_url = "https://custom.api.url"
Expand All @@ -64,6 +69,7 @@ def test_session_init_with_env_api_url():
assert session._endpoint_url == "https://env.api.url"


@patch("pandasai.os.environ", {})
def test_get_pandaai_session_without_credentials():
"""Test that get_pandaai_session raises PandaAIApiKeyError when no credentials are provided"""
with pytest.raises(PandaAIApiKeyError) as exc_info:
Expand Down
5 changes: 4 additions & 1 deletion tests/unit_tests/test_pandasai_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,10 @@ def test_load_successful_zip_extraction(
mock_zip_file.return_value.__enter__.return_value.extractall.assert_called_once()
assert isinstance(result, MagicMock)

def test_load_without_api_credentials(self):
@patch("pandasai.os.environ", {})
def test_load_without_api_credentials(
self,
):
"""Test that load raises PandaAIApiKeyError when no API credentials are provided"""
with pytest.raises(PandaAIApiKeyError) as exc_info:
pandasai.load("test/dataset")
Expand Down