33from importlib .metadata import entry_points
44
55import duckdb
6+ import pandas as pd
67import pytest
7- from sqlalchemy import create_engine , text
8- from sqlalchemy .engine import Connection
98
109# Obtain a list of all deployed entry point scripts to test:
1110ENTRY_POINTS = [
@@ -26,22 +25,9 @@ def test_extractor_scripts(script_runner, ep):
2625 assert ret .success # nosec: B101
2726
2827
29- def _get_sqlite_tables (sqlite_conn ) -> set [str ]:
30- """Return set of all tables in SQLITE db."""
31- tables = sqlite_conn .execute (
32- text (
33- "SELECT name FROM sqlite_master "
34- "WHERE type='table' AND name NOT LIKE 'sqlite_%';"
35- )
36- ).fetchall ()
37- return {table_name for (table_name ,) in tables }
38-
39-
40- def _get_duckdb_tables (duckdb_conn ) -> set [str ]:
28+ def _get_tables (conn ) -> set [str ]:
4129 """Return set of all tables in duckdb."""
42- tables = duckdb_conn .execute (
43- "SELECT table_name FROM information_schema.tables"
44- ).fetchall ()
30+ tables = conn .execute ("SELECT table_name FROM information_schema.tables" ).fetchall ()
4531 return {table_name for (table_name ,) in tables }
4632
4733
@@ -50,9 +36,6 @@ def _find_empty_tables(db_conn, tables: set[str]) -> list[str]:
5036 empty_tables = []
5137 for table_name in tables :
5238 query = f"SELECT COUNT(*) FROM '{ table_name } ';" # noqa: S608
53- if isinstance (db_conn , Connection ):
54- query = text (query )
55-
5639 if db_conn .execute (query ).fetchone ()[0 ] == 0 :
5740 empty_tables .append (table_name )
5841 return empty_tables
@@ -95,15 +78,13 @@ def test_extract_example_filings(script_runner, tmp_path, test_dir):
9578 assert ret .success
9679
9780 # Sanity check the sqlite/duckdb outputs
98- sqlite_uri = f"sqlite:///{ sqlite_path .absolute ()} "
99- sqlite_engine = create_engine (sqlite_uri )
10081 with (
101- sqlite_engine . begin ( ) as sqlite_conn ,
82+ duckdb . connect ( sqlite_path ) as sqlite_conn ,
10283 duckdb .connect (duckdb_path ) as duckdb_conn ,
10384 ):
10485 # Check for tables that only exist in either sqlite/duckdb but not both
105- sqlite_tables = _get_sqlite_tables (sqlite_conn )
106- duckdb_tables = _get_duckdb_tables (duckdb_conn )
86+ sqlite_tables = _get_tables (sqlite_conn )
87+ duckdb_tables = _get_tables (duckdb_conn )
10788
10889 extra_sqlite_tables = sqlite_tables - duckdb_tables
10990 extra_duckdb_tables = duckdb_tables - sqlite_tables
@@ -118,6 +99,17 @@ def test_extract_example_filings(script_runner, tmp_path, test_dir):
11899 assert empty_sqlite_tables == []
119100 assert empty_duckdb_tables == []
120101
102+ # Check that tables are identical
103+ # SQLite/duckdb have nuanced dtype differences, so ignore types
104+ for table in sorted (sqlite_tables ):
105+ pd .testing .assert_frame_equal (
106+ sqlite_conn .table (table ).df (),
107+ duckdb_conn .table (table ).df (),
108+ check_like = True ,
109+ check_dtype = False ,
110+ check_exact = True ,
111+ )
112+
121113
122114@pytest .mark .script_launch_mode ("inprocess" )
123115def test_extract_example_filings_bad_form (script_runner , tmp_path , test_dir ):
0 commit comments