Skip to content

Commit b71c0d9

Browse files
committed
feat: implement option 'delete_rows' of argument 'if_exists' in 'DataFrame.to_sql' API.
1 parent f105eef commit b71c0d9

File tree

3 files changed

+107
-12
lines changed

3 files changed

+107
-12
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Other enhancements
5959
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
6060
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
6161
- :meth:`str.get_dummies` now accepts a ``dtype`` parameter to specify the dtype of the resulting DataFrame (:issue:`47872`)
62+
- Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`).
6263
- Multiplying two :class:`DateOffset` objects will now raise a ``TypeError`` instead of a ``RecursionError`` (:issue:`59442`)
6364
- Restore support for reading Stata 104-format and enable reading 103-format dta files (:issue:`58554`)
6465
- Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)

pandas/io/sql.py

+45-11
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def to_sql(
738738
name: str,
739739
con,
740740
schema: str | None = None,
741-
if_exists: Literal["fail", "replace", "append"] = "fail",
741+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
742742
index: bool = True,
743743
index_label: IndexLabel | None = None,
744744
chunksize: int | None = None,
@@ -764,10 +764,11 @@ def to_sql(
764764
schema : str, optional
765765
Name of SQL schema in database to write to (if database flavor
766766
supports this). If None, use default schema (default).
767-
if_exists : {'fail', 'replace', 'append'}, default 'fail'
767+
if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
768768
- fail: If table exists, do nothing.
769769
- replace: If table exists, drop it, recreate it, and insert data.
770770
- append: If table exists, insert data. Create if does not exist.
771+
- delete_rows: If a table exists, delete all records and insert data.
771772
index : bool, default True
772773
Write DataFrame index as a column.
773774
index_label : str or sequence, optional
@@ -818,7 +819,7 @@ def to_sql(
818819
`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
819820
`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
820821
""" # noqa: E501
821-
if if_exists not in ("fail", "replace", "append"):
822+
if if_exists not in ("fail", "replace", "append", "delete_rows"):
822823
raise ValueError(f"'{if_exists}' is not valid for if_exists")
823824

824825
if isinstance(frame, Series):
@@ -926,7 +927,7 @@ def __init__(
926927
pandas_sql_engine,
927928
frame=None,
928929
index: bool | str | list[str] | None = True,
929-
if_exists: Literal["fail", "replace", "append"] = "fail",
930+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
930931
prefix: str = "pandas",
931932
index_label=None,
932933
schema=None,
@@ -974,11 +975,13 @@ def create(self) -> None:
974975
if self.exists():
975976
if self.if_exists == "fail":
976977
raise ValueError(f"Table '{self.name}' already exists.")
977-
if self.if_exists == "replace":
978+
elif self.if_exists == "replace":
978979
self.pd_sql.drop_table(self.name, self.schema)
979980
self._execute_create()
980981
elif self.if_exists == "append":
981982
pass
983+
elif self.if_exists == "delete_rows":
984+
self.pd_sql.delete_rows(self.name, self.schema)
982985
else:
983986
raise ValueError(f"'{self.if_exists}' is not valid for if_exists")
984987
else:
@@ -1480,7 +1483,7 @@ def to_sql(
14801483
self,
14811484
frame,
14821485
name: str,
1483-
if_exists: Literal["fail", "replace", "append"] = "fail",
1486+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
14841487
index: bool = True,
14851488
index_label=None,
14861489
schema=None,
@@ -1866,7 +1869,7 @@ def prep_table(
18661869
self,
18671870
frame,
18681871
name: str,
1869-
if_exists: Literal["fail", "replace", "append"] = "fail",
1872+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
18701873
index: bool | str | list[str] | None = True,
18711874
index_label=None,
18721875
schema=None,
@@ -1943,7 +1946,7 @@ def to_sql(
19431946
self,
19441947
frame,
19451948
name: str,
1946-
if_exists: Literal["fail", "replace", "append"] = "fail",
1949+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
19471950
index: bool = True,
19481951
index_label=None,
19491952
schema: str | None = None,
@@ -1961,10 +1964,11 @@ def to_sql(
19611964
frame : DataFrame
19621965
name : string
19631966
Name of SQL table.
1964-
if_exists : {'fail', 'replace', 'append'}, default 'fail'
1967+
if_exists : {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
19651968
- fail: If table exists, do nothing.
19661969
- replace: If table exists, drop it, recreate it, and insert data.
19671970
- append: If table exists, insert data. Create if does not exist.
1971+
- delete_rows: If a table exists, delete all records and insert data.
19681972
index : boolean, default True
19691973
Write DataFrame index as a column.
19701974
index_label : string or sequence, default None
@@ -2061,6 +2065,18 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None:
20612065
self.get_table(table_name, schema).drop(bind=self.con)
20622066
self.meta.clear()
20632067

2068+
def delete_rows(self, table_name: str, schema: str | None = None) -> None:
2069+
schema = schema or self.meta.schema
2070+
if self.has_table(table_name, schema):
2071+
self.meta.reflect(
2072+
bind=self.con, only=[table_name], schema=schema, views=True
2073+
)
2074+
with self.run_transaction() as con:
2075+
table = self.get_table(table_name, schema)
2076+
con.execute(table.delete())
2077+
2078+
self.meta.clear()
2079+
20642080
def _create_sql_schema(
20652081
self,
20662082
frame: DataFrame,
@@ -2296,7 +2312,7 @@ def to_sql(
22962312
self,
22972313
frame,
22982314
name: str,
2299-
if_exists: Literal["fail", "replace", "append"] = "fail",
2315+
if_exists: Literal["fail", "replace", "append", "delete_rows"] = "fail",
23002316
index: bool = True,
23012317
index_label=None,
23022318
schema: str | None = None,
@@ -2318,6 +2334,7 @@ def to_sql(
23182334
- fail: If table exists, do nothing.
23192335
- replace: If table exists, drop it, recreate it, and insert data.
23202336
- append: If table exists, insert data. Create if does not exist.
2337+
- delete_rows: If a table exists, delete all records and insert data.
23212338
index : boolean, default True
23222339
Write DataFrame index as a column.
23232340
index_label : string or sequence, default None
@@ -2335,6 +2352,7 @@ def to_sql(
23352352
engine : {'auto', 'sqlalchemy'}, default 'auto'
23362353
Raises NotImplementedError if not set to 'auto'
23372354
"""
2355+
23382356
if index_label:
23392357
raise NotImplementedError(
23402358
"'index_label' is not implemented for ADBC drivers"
@@ -2368,6 +2386,9 @@ def to_sql(
23682386
cur.execute(f"DROP TABLE {table_name}")
23692387
elif if_exists == "append":
23702388
mode = "append"
2389+
elif if_exists == "delete_rows":
2390+
mode = "append"
2391+
self.delete_rows(name, schema)
23712392

23722393
import pyarrow as pa
23732394

@@ -2402,6 +2423,12 @@ def has_table(self, name: str, schema: str | None = None) -> bool:
24022423

24032424
return False
24042425

2426+
def delete_rows(self, name: str, schema: str | None = None) -> None:
2427+
delete_sql = f"DELETE FROM {schema}.{name}" if schema else f"DELETE FROM {name}"
2428+
if self.has_table(name, schema):
2429+
with self.con.cursor() as cur:
2430+
cur.execute(delete_sql)
2431+
24052432
def _create_sql_schema(
24062433
self,
24072434
frame: DataFrame,
@@ -2769,10 +2796,11 @@ def to_sql(
27692796
frame: DataFrame
27702797
name: string
27712798
Name of SQL table.
2772-
if_exists: {'fail', 'replace', 'append'}, default 'fail'
2799+
if_exists: {'fail', 'replace', 'append', 'delete_rows'}, default 'fail'
27732800
fail: If table exists, do nothing.
27742801
replace: If table exists, drop it, recreate it, and insert data.
27752802
append: If table exists, insert data. Create if it does not exist.
2803+
delete_rows: If a table exists, delete all records and insert data.
27762804
index : bool, default True
27772805
Write DataFrame index as a column
27782806
index_label : string or sequence, default None
@@ -2848,6 +2876,12 @@ def drop_table(self, name: str, schema: str | None = None) -> None:
28482876
drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"
28492877
self.execute(drop_sql)
28502878

2879+
def delete_rows(self, name: str, schema: str | None = None) -> None:
2880+
delete_sql = f"DELETE FROM {_get_valid_sqlite_name(name)}"
2881+
if self.has_table(name, schema):
2882+
with self.run_transaction() as cur:
2883+
cur.execute(delete_sql)
2884+
28512885
def _create_sql_schema(
28522886
self,
28532887
frame,

pandas/tests/io/test_sql.py

+61-1
Original file line numberDiff line numberDiff line change
@@ -1068,7 +1068,9 @@ def test_to_sql(conn, method, test_frame1, request):
10681068

10691069

10701070
@pytest.mark.parametrize("conn", all_connectable)
1071-
@pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)])
1071+
@pytest.mark.parametrize(
1072+
"mode, num_row_coef", [("replace", 1), ("append", 2), ("delete_rows", 1)]
1073+
)
10721074
def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request):
10731075
conn = request.getfixturevalue(conn)
10741076
with pandasSQL_builder(conn, need_transaction=True) as pandasSQL:
@@ -2698,6 +2700,64 @@ def test_drop_table(conn, request):
26982700
assert not insp.has_table("temp_frame")
26992701

27002702

2703+
@pytest.mark.parametrize("conn", all_connectable)
2704+
def test_delete_rows_success(conn, test_frame1, request):
2705+
table_name = "temp_frame"
2706+
conn = request.getfixturevalue(conn)
2707+
pandasSQL = pandasSQL_builder(conn)
2708+
2709+
with pandasSQL.run_transaction():
2710+
assert pandasSQL.to_sql(test_frame1, table_name) == test_frame1.shape[0]
2711+
2712+
with pandasSQL.run_transaction():
2713+
assert pandasSQL.delete_rows(table_name) is None
2714+
2715+
assert count_rows(conn, table_name) == 0
2716+
assert pandasSQL.has_table("temp_frame")
2717+
2718+
2719+
@pytest.mark.parametrize("conn", all_connectable)
2720+
def test_delete_rows_is_atomic(conn, request):
2721+
import adbc_driver_manager
2722+
import sqlalchemy
2723+
2724+
if "sqlite" in conn:
2725+
reason = "This test relies on strict column types, SQLite has a dynamic one"
2726+
request.applymarker(
2727+
pytest.mark.xfail(
2728+
reason=reason,
2729+
strict=True,
2730+
)
2731+
)
2732+
2733+
table_name = "temp_frame"
2734+
original_df = DataFrame({"a": [1, 2, 3]})
2735+
replacing_df = DataFrame({"a": ["a", "b", "c", "d"]})
2736+
2737+
conn = request.getfixturevalue(conn)
2738+
pandasSQL = pandasSQL_builder(conn)
2739+
2740+
if isinstance(conn, adbc_driver_manager.dbapi.Connection):
2741+
expected_exception = adbc_driver_manager.ProgrammingError
2742+
else:
2743+
expected_exception = sqlalchemy.exc.DataError
2744+
2745+
with pandasSQL.run_transaction():
2746+
pandasSQL.to_sql(original_df, table_name, if_exists="fail", index=False)
2747+
2748+
# trying to insert string values into a integer column
2749+
with pytest.raises(expected_exception):
2750+
with pandasSQL.run_transaction():
2751+
pandasSQL.to_sql(
2752+
replacing_df, table_name, if_exists="delete_rows", index=False
2753+
)
2754+
2755+
# "delete_rows" is rolled back preserving the original data
2756+
with pandasSQL.run_transaction():
2757+
result_df = pandasSQL.read_query(f"SELECT * FROM {table_name}")
2758+
tm.assert_frame_equal(result_df, original_df)
2759+
2760+
27012761
@pytest.mark.parametrize("conn", all_connectable)
27022762
def test_roundtrip(conn, request, test_frame1):
27032763
if conn == "sqlite_str":

0 commit comments

Comments
 (0)