Skip to content

Commit 908bd2f

Browse files
committed
feat: implement option 'delete_rows' of argument 'if_exists' in 'DataFrame.to_sql' API.
1 parent d4dff29 commit 908bd2f

File tree

4 files changed

+115
-18
lines changed

4 files changed

+115
-18
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ Other enhancements
7171
- :meth:`Series.str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
7272
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
7373
- :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`)
74+
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
7475
- Errors occurring during SQL I/O will now throw a generic :class:`.DatabaseError` instead of the raw Exception type from the underlying driver manager library (:issue:`60748`)
7576
- Implemented :meth:`Series.str.isascii` and :meth:`Series.str.isascii` (:issue:`59091`)
7677
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)

pandas/core/generic.py

+6
Original file line numberDiff line numberDiff line change
@@ -2801,6 +2801,12 @@ def to_sql(
28012801
Databases supported by SQLAlchemy [1]_ are supported. Tables can be
28022802
newly created, appended to, or overwritten.
28032803
2804+
.. warning::
2805+
The pandas library does not attempt to sanitize inputs provided via a to_sql call.
2806+
Please refer to the documentation for the underlying database driver to see if it
2807+
will properly prevent injection, or alternatively be advised of a security risk when
2808+
executing arbitrary commands in a to_sql call.
2809+
28042810
Parameters
28052811
----------
28062812
name : str

pandas/io/sql.py

+51-15
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676

7777
from sqlalchemy import Table
7878
from sqlalchemy.sql.expression import (
79+
Delete,
7980
Select,
8081
TextClause,
8182
)
@@ -738,7 +739,7 @@ def to_sql(
738739
name: str,
739740
con,
740741
schema: str | None = None,
741-
if_exists: Literal["fail", "replace", "append"] = "fail",
742+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
742743
index: bool = True,
743744
index_label: IndexLabel | None = None,
744745
chunksize: int | None = None,
@@ -750,6 +751,12 @@ def to_sql(
750751
"""
751752
Write records stored in a DataFrame to a SQL database.
752753
754+
.. warning::
755+
The pandas library does not attempt to sanitize inputs provided via a to_sql call.
756+
Please refer to the documentation for the underlying database driver to see if it
757+
will properly prevent injection, or alternatively be advised of a security risk when
758+
executing arbitrary commands in a to_sql call.
759+
753760
Parameters
754761
----------
755762
frame : DataFrame, Series
@@ -764,10 +771,11 @@ def to_sql(
764771
schema : str, optional
765772
Name of SQL schema in database to write to (if database flavor
766773
supports this). If None, use default schema (default).
767-
if_exists : {'fail', 'replace', 'append'}, default 'fail'
774+
if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
768775
- fail: If table exists, do nothing.
769776
- replace: If table exists, drop it, recreate it, and insert data.
770777
- append: If table exists, insert data. Create if does not exist.
778+
- delete_rows: If a table exists, delete all records and insert data.
771779
index : bool, default True
772780
Write DataFrame index as a column.
773781
index_label : str or sequence, optional
@@ -818,7 +826,7 @@ def to_sql(
818826
`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
819827
`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
820828
""" # noqa: E501
821-
if if_exists not in ("fail", "replace", "append"):
829+
if if_exists not in ("fail", "replace", "append", "delete_rows"):
822830
raise ValueError(f"'{if_exists}' is not valid for if_exists")
823831

824832
if isinstance(frame, Series):
@@ -926,7 +934,7 @@ def __init__(
926934
pandas_sql_engine,
927935
frame=None,
928936
index: bool | str | list[str] | None = True,
929-
if_exists: Literal["fail", "replace", "append"] = "fail",
937+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
930938
prefix: str = "pandas",
931939
index_label=None,
932940
schema=None,
@@ -974,11 +982,13 @@ def create(self) -> None:
974982
if self.exists():
975983
if self.if_exists == "fail":
976984
raise ValueError(f"Table '{self.name}' already exists.")
977-
if self.if_exists == "replace":
985+
elif self.if_exists == "replace":
978986
self.pd_sql.drop_table(self.name, self.schema)
979987
self._execute_create()
980988
elif self.if_exists == "append":
981989
pass
990+
elif self.if_exists == "delete_rows":
991+
self.pd_sql.delete_rows(self.name, self.schema)
982992
else:
983993
raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
984994
else:
@@ -997,7 +1007,7 @@ def _execute_insert(self, conn, keys: list[str], data_iter) -> int:
9971007
Each item contains a list of values to be inserted
9981008
"""
9991009
data = [dict(zip(keys, row)) for row in data_iter]
1000-
result = conn.execute(self.table.insert(), data)
1010+
result = self.pd_sql.execute(self.table.insert(), data)
10011011
return result.rowcount
10021012

10031013
def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int:
@@ -1014,7 +1024,7 @@ def _execute_insert_multi(self, conn, keys: list[str], data_iter) -> int:
10141024

10151025
data = [dict(zip(keys, row)) for row in data_iter]
10161026
stmt = insert(self.table).values(data)
1017-
result = conn.execute(stmt)
1027+
result = self.pd_sql.execute(stmt)
10181028
return result.rowcount
10191029

10201030
def insert_data(self) -> tuple[list[str], list[np.ndarray]]:
@@ -1480,7 +1490,7 @@ def to_sql(
14801490
self,
14811491
frame,
14821492
name: str,
1483-
if_exists: Literal["fail", "replace", "append"] = "fail",
1493+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
14841494
index: bool = True,
14851495
index_label=None,
14861496
schema=None,
@@ -1649,7 +1659,7 @@ def run_transaction(self):
16491659
else:
16501660
yield self.con
16511661

1652-
def execute(self, sql: str | Select | TextClause, params=None):
1662+
def execute(self, sql: str | Select | TextClause | Delete, params=None):
16531663
"""Simple passthrough to SQLAlchemy connectable"""
16541664
from sqlalchemy.exc import SQLAlchemyError
16551665

@@ -1874,7 +1884,7 @@ def prep_table(
18741884
self,
18751885
frame,
18761886
name: str,
1877-
if_exists: Literal["fail", "replace", "append"] = "fail",
1887+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
18781888
index: bool | str | list[str] | None = True,
18791889
index_label=None,
18801890
schema=None,
@@ -1951,7 +1961,7 @@ def to_sql(
19511961
self,
19521962
frame,
19531963
name: str,
1954-
if_exists: Literal["fail", "replace", "append"] = "fail",
1964+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
19551965
index: bool = True,
19561966
index_label=None,
19571967
schema: str | None = None,
@@ -1969,10 +1979,11 @@ def to_sql(
19691979
frame : DataFrame
19701980
name : string
19711981
Name of SQL table.
1972-
if_exists : {'fail', 'replace', 'append'}, default 'fail'
1982+
if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
19731983
- fail: If table exists, do nothing.
19741984
- replace: If table exists, drop it, recreate it, and insert data.
19751985
- append: If table exists, insert data. Create if does not exist.
1986+
- delete_rows: If a table exists, delete all records and insert data.
19761987
index : boolean, default True
19771988
Write DataFrame index as a column.
19781989
index_label : string or sequence, default None
@@ -2069,6 +2080,16 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None:
20692080
self.get_table(table_name, schema).drop(bind=self.con)
20702081
self.meta.clear()
20712082

2083+
def delete_rows(self, table_name: str, schema: str | None = None) -> None:
2084+
schema = schema or self.meta.schema
2085+
if self.has_table(table_name, schema):
2086+
self.meta.reflect(
2087+
bind=self.con, only=[table_name], schema=schema, views=True
2088+
)
2089+
table = self.get_table(table_name, schema)
2090+
self.execute(table.delete()).close()
2091+
self.meta.clear()
2092+
20722093
def _create_sql_schema(
20732094
self,
20742095
frame: DataFrame,
@@ -2304,7 +2325,7 @@ def to_sql(
23042325
self,
23052326
frame,
23062327
name: str,
2307-
if_exists: Literal["fail", "replace", "append"] = "fail",
2328+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
23082329
index: bool = True,
23092330
index_label=None,
23102331
schema: str | None = None,
@@ -2326,6 +2347,7 @@ def to_sql(
23262347
- fail: If table exists, do nothing.
23272348
- replace: If table exists, drop it, recreate it, and insert data.
23282349
- append: If table exists, insert data. Create if does not exist.
2350+
- delete_rows: If a table exists, delete all records and insert data.
23292351
index : boolean, default True
23302352
Write DataFrame index as a column.
23312353
index_label : string or sequence, default None
@@ -2379,6 +2401,9 @@ def to_sql(
23792401
self.execute(sql_statement).close()
23802402
elif if_exists == "append":
23812403
mode = "append"
2404+
elif if_exists == "delete_rows":
2405+
mode = "append"
2406+
self.delete_rows(name, schema)
23822407

23832408
try:
23842409
tbl = pa.Table.from_pandas(frame, preserve_index=index)
@@ -2416,6 +2441,11 @@ def has_table(self, name: str, schema: str | None = None) -> bool:
24162441

24172442
return False
24182443

2444+
def delete_rows(self, name: str, schema: str | None = None) -> None:
2445+
table_name = f"{schema}.{name}" if schema else name
2446+
if self.has_table(name, schema):
2447+
self.execute(f"DELETE FROM {table_name}").close()
2448+
24192449
def _create_sql_schema(
24202450
self,
24212451
frame: DataFrame,
@@ -2790,10 +2820,11 @@ def to_sql(
27902820
frame: DataFrame
27912821
name: string
27922822
Name of SQL table.
2793-
if_exists: {'fail', 'replace', 'append'}, default 'fail'
2823+
if_exists: {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
27942824
fail: If table exists, do nothing.
27952825
replace: If table exists, drop it, recreate it, and insert data.
27962826
append: If table exists, insert data. Create if it does not exist.
2827+
delete_rows: If a table exists, delete all records and insert data.
27972828
index : bool, default True
27982829
Write DataFrame index as a column
27992830
index_label : string or sequence, default None
@@ -2867,7 +2898,12 @@ def get_table(self, table_name: str, schema: str | None = None) -> None:
28672898

28682899
def drop_table(self, name: str, schema: str | None = None) -> None:
28692900
drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"
2870-
self.execute(drop_sql)
2901+
self.execute(drop_sql).close()
2902+
2903+
def delete_rows(self, name: str, schema: str | None = None) -> None:
2904+
delete_sql = f"DELETE FROM {_get_valid_sqlite_name(name)}"
2905+
if self.has_table(name, schema):
2906+
self.execute(delete_sql).close()
28712907

28722908
def _create_sql_schema(
28732909
self,

pandas/tests/io/test_sql.py

+57-3
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,9 @@ def test_to_sql(conn, method, test_frame1, request):
10681068

10691069

10701070
@pytest.mark.parametrize("conn", all_connectable)
1071-
@pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)])
1071+
@pytest.mark.parametrize(
1072+
"mode, num_row_coef", [("replace", 1), ("append", 2), ("delete_rows", 1)]
1073+
)
10721074
def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request):
10731075
conn = request.getfixturevalue(conn)
10741076
with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
@@ -2698,6 +2700,58 @@ def test_drop_table(conn, request):
26982700
assert not insp.has_table("temp_frame")
26992701

27002702

2703+
@pytest.mark.parametrize("conn_name", all_connectable)
2704+
def test_delete_rows_success(conn_name, test_frame1, request):
2705+
table_name = "temp_frame"
2706+
conn = request.getfixturevalue(conn_name)
2707+
2708+
with pandasSQL_builder(conn) as pandasSQL:
2709+
with pandasSQL.run_transaction():
2710+
assert pandasSQL.to_sql(test_frame1, table_name) == test_frame1.shape[0]
2711+
2712+
with pandasSQL.run_transaction():
2713+
assert pandasSQL.delete_rows(table_name) is None
2714+
2715+
assert count_rows(conn, table_name) == 0
2716+
assert pandasSQL.has_table("temp_frame")
2717+
2718+
2719+
@pytest.mark.parametrize("conn_name", all_connectable)
2720+
def test_delete_rows_is_atomic(conn_name, request):
2721+
sqlalchemy = pytest.importorskip("sqlalchemy")
2722+
2723+
table_name = "temp_frame"
2724+
table_stmt = f"CREATE TABLE {table_name} (a INTEGER, b INTEGER UNIQUE NOT NULL)"
2725+
2726+
if conn_name != "sqlite_buildin" and "adbc" not in conn_name:
2727+
table_stmt = sqlalchemy.text(table_stmt)
2728+
2729+
# setting dtype is mandatory for adbc related tests
2730+
original_df = DataFrame({"a": [1, 2], "b": [3, 4]}, dtype="int32")
2731+
replacing_df = DataFrame({"a": [5, 6, 7], "b": [8, 8, 8]}, dtype="int32")
2732+
2733+
conn = request.getfixturevalue(conn_name)
2734+
pandasSQL = pandasSQL_builder(conn)
2735+
2736+
with pandasSQL.run_transaction() as cur:
2737+
cur.execute(table_stmt)
2738+
2739+
with pandasSQL.run_transaction():
2740+
pandasSQL.to_sql(original_df, table_name, if_exists="append", index=False)
2741+
2742+
# inserting duplicated values in a UNIQUE constraint column
2743+
with pytest.raises(pd.errors.DatabaseError):
2744+
with pandasSQL.run_transaction():
2745+
pandasSQL.to_sql(
2746+
replacing_df, table_name, if_exists="delete_rows", index=False
2747+
)
2748+
2749+
# failed "delete_rows" is rolled back preserving original data
2750+
with pandasSQL.run_transaction():
2751+
result_df = pandasSQL.read_query(f"SELECT * FROM {table_name}", dtype="int32")
2752+
tm.assert_frame_equal(result_df, original_df)
2753+
2754+
27012755
@pytest.mark.parametrize("conn", all_connectable)
27022756
def test_roundtrip(conn, request, test_frame1):
27032757
if conn == "sqlite_str":
@@ -3409,8 +3463,8 @@ def test_to_sql_with_negative_npinf(conn, request, input):
34093463
mark = pytest.mark.xfail(reason="GH 36465")
34103464
request.applymarker(mark)
34113465

3412-
msg = "inf cannot be used with MySQL"
3413-
with pytest.raises(ValueError, match=msg):
3466+
msg = "Execution failed on sql"
3467+
with pytest.raises(pd.errors.DatabaseError, match=msg):
34143468
df.to_sql(name="foobar", con=conn, index=False)
34153469
else:
34163470
assert df.to_sql(name="foobar", con=conn, index=False) == 1

0 commit comments

Comments
 (0)