Rewrite the Pandas to JSON conversion (#108)

mwouts · web-flow · commit 5da57bc68c18 · 2022-11-06T01:17:54.000Z
* Rewrite _formatted_values
* Test on Python 3.11 as well
* Turn most warnings into errors
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -35,7 +35,7 @@ jobs:
     if: ${{ needs.skip_duplicate.outputs.should_skip == 'false' }}
     strategy:
       matrix:
-        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10"]
+        python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10", "3.11"]
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,7 +1,15 @@
 ITables ChangeLog
 =================
 
-1.3.1 (2022-11-01)
+1.3.2 (2022-11-06)
+------------------
+
+**Fixed**
+- We have reimplemented the function that encodes the Pandas dataframes to JSON
+to avoid triggering FutureWarnings when using `pandas>=1.5` ([#107](https://github.com/mwouts/itables/issues/107)).
+
+
+1.3.1 (2022-11-05)
 ------------------
 
 **Added**
diff --git a/itables/javascript.py b/itables/javascript.py
@@ -101,31 +101,49 @@ def init_notebook_mode(
         )
 
 
+def _format_column(x):
+    if x.dtype.kind == "O":
+        return x.astype(str)
+
+    if x.dtype.kind == "f":
+        x = np.array(fmt.format_array(x.values, None))
+        try:
+            return x.astype(float)
+        except ValueError:
+            pass
+
+    return x
+
+
 def _formatted_values(df):
-    """Return the table content as a list of lists for DataTables"""
-    formatted_df = df.copy()
+    """Format the values in the table and return the data, row by row, as requested by DataTables"""
     # We iterate over columns using an index rather than the column name
     # to avoid an issue in case of duplicated column names #89
-    for j, col in enumerate(formatted_df):
-        x = formatted_df.iloc[:, j]
-        if x.dtype.kind in ["b", "i", "s"]:
-            continue
-
-        if x.dtype.kind == "O":
-            formatted_df.iloc[:, j] = formatted_df.iloc[:, j].astype(str)
-            continue
-
-        formatted_df.iloc[:, j] = np.array(fmt.format_array(x.values, None))
-        if x.dtype.kind == "f":
-            try:
-                formatted_df.iloc[:, j] = formatted_df.iloc[:, j].astype(float)
-            except ValueError:
-                pass
+    return list(
+        zip(
+            *(
+                _format_column(df.iloc[:, j]).tolist()
+                for j, col in enumerate(df.columns)
+            )
+        )
+    )
 
-    rows = formatted_df.values.tolist()
 
-    # Replace pd.NA with None
-    return [[cell if cell is not pd.NA else None for cell in row] for row in rows]
+class TableValuesEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if obj is pd.NA:
+            return None
+        if isinstance(obj, np.bool_):
+            return bool(obj)
+        if isinstance(obj, np.integer):
+            return int(obj)
+        if isinstance(obj, np.floating):
+            return float(obj)
+        if isinstance(obj, pd.Timedelta):
+            return str(obj)
+        if isinstance(obj, pd.Timestamp):
+            return str(obj)
+        return json.JSONEncoder.default(self, obj)
 
 
 def _table_header(
@@ -352,7 +370,7 @@ def to_html_datatable(df=None, tableId=None, connected=True, **kwargs):
 
     # Export the table data to JSON and include this in the HTML
     data = _formatted_values(df.reset_index() if showIndex else df)
-    dt_data = json.dumps(data)
+    dt_data = json.dumps(data, cls=TableValuesEncoder)
     output = replace_value(output, "const data = [];", f"const data = {dt_data};")
 
     return output
diff --git a/itables/sample_dfs.py b/itables/sample_dfs.py
@@ -84,6 +84,9 @@ def get_dict_of_test_dfs(N=100, M=100):
                 "timedelta": [timedelta(days=2), timedelta(seconds=50)],
             }
         ),
+        "date_range": pd.DataFrame(
+            {"timestamps": pd.date_range("now", periods=5, freq="S")}
+        ),
         "object": pd.DataFrame(
             {"dict": [{"a": 1}, {"b": 2, "c": 3}], "list": [["a"], [1, 2]]}
         ),
diff --git a/itables/version.py b/itables/version.py
@@ -1,3 +1,3 @@
 """ITables' version number"""
 
-__version__ = "1.3.1"
+__version__ = "1.3.2"
diff --git a/setup.py b/setup.py
@@ -64,5 +64,6 @@
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
     ],
 )
diff --git a/tests/test_sample_dfs.py b/tests/test_sample_dfs.py
@@ -1,7 +1,10 @@
+import json
+
 import pandas as pd
 import pytest
 
 from itables import show
+from itables.javascript import TableValuesEncoder, _format_column
 from itables.sample_dfs import (
     get_countries,
     get_dict_of_test_dfs,
@@ -10,6 +13,13 @@
     get_population,
 )
 
+# Make sure that displaying a dataframe does not trigger a warning  #107
+pytestmark = [
+    pytest.mark.filterwarnings("error"),
+    # Seen on the CI on Py38 and Py39
+    pytest.mark.filterwarnings("ignore::ResourceWarning"),
+]
+
 
 def test_get_countries():
     df = get_countries()
@@ -37,6 +47,12 @@ def test_show_test_dfs(df_name, df):
     show(df)
 
 
+@pytest.mark.parametrize("series_name,series", get_dict_of_test_series().items())
+def test_format_column(series_name, series):
+    values = _format_column(series).tolist()
+    json.dumps(values, cls=TableValuesEncoder)
+
+
 @pytest.mark.parametrize("series_name,series", get_dict_of_test_series().items())
 def test_show_test_series(series_name, series):
     show(series)

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""ITables' version number"""`
`2`	`2`
`3`		`-__version__ = "1.3.1"`
	`3`	`+__version__ = "1.3.2"`
Original file line number	Diff line number	Diff line change
`@@ -64,5 +64,6 @@`
`64`	`64`	`"Programming Language :: Python :: 3.8",`
`65`	`65`	`"Programming Language :: Python :: 3.9",`
`66`	`66`	`"Programming Language :: Python :: 3.10",`
	`67`	`+ "Programming Language :: Python :: 3.11",`
`67`	`68`	`],`
`68`	`69`	`)`