Skip to content

Commit 5da57bc

Browse files
authored
Rewrite the Pandas to JSON conversion (#108)
* Rewrite _formatted_values * Test on Python 3.11 as well * Turn most warnings into errors
1 parent f2fd659 commit 5da57bc

File tree

7 files changed

+70
-24
lines changed

7 files changed

+70
-24
lines changed

.github/workflows/continuous-integration.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
if: ${{ needs.skip_duplicate.outputs.should_skip == 'false' }}
3636
strategy:
3737
matrix:
38-
python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10"]
38+
python-version: [ 3.6, 3.7, 3.8, 3.9, "3.10", "3.11"]
3939
runs-on: ubuntu-latest
4040
steps:
4141
- name: Checkout

docs/changelog.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
ITables ChangeLog
22
=================
33

4-
1.3.1 (2022-11-01)
4+
1.3.2 (2022-11-06)
5+
------------------
6+
7+
**Fixed**
8+
- We have reimplemented the function that encodes the Pandas dataframes to JSON
9+
to avoid triggering FutureWarnings when using `pandas>=1.5` ([#107](https://github.com/mwouts/itables/issues/107)).
10+
11+
12+
1.3.1 (2022-11-05)
513
------------------
614

715
**Added**

itables/javascript.py

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -101,31 +101,49 @@ def init_notebook_mode(
101101
)
102102

103103

104+
def _format_column(x):
105+
if x.dtype.kind == "O":
106+
return x.astype(str)
107+
108+
if x.dtype.kind == "f":
109+
x = np.array(fmt.format_array(x.values, None))
110+
try:
111+
return x.astype(float)
112+
except ValueError:
113+
pass
114+
115+
return x
116+
117+
104118
def _formatted_values(df):
105-
"""Return the table content as a list of lists for DataTables"""
106-
formatted_df = df.copy()
119+
"""Format the values in the table and return the data, row by row, as requested by DataTables"""
107120
# We iterate over columns using an index rather than the column name
108121
# to avoid an issue in case of duplicated column names #89
109-
for j, col in enumerate(formatted_df):
110-
x = formatted_df.iloc[:, j]
111-
if x.dtype.kind in ["b", "i", "s"]:
112-
continue
113-
114-
if x.dtype.kind == "O":
115-
formatted_df.iloc[:, j] = formatted_df.iloc[:, j].astype(str)
116-
continue
117-
118-
formatted_df.iloc[:, j] = np.array(fmt.format_array(x.values, None))
119-
if x.dtype.kind == "f":
120-
try:
121-
formatted_df.iloc[:, j] = formatted_df.iloc[:, j].astype(float)
122-
except ValueError:
123-
pass
122+
return list(
123+
zip(
124+
*(
125+
_format_column(df.iloc[:, j]).tolist()
126+
for j, col in enumerate(df.columns)
127+
)
128+
)
129+
)
124130

125-
rows = formatted_df.values.tolist()
126131

127-
# Replace pd.NA with None
128-
return [[cell if cell is not pd.NA else None for cell in row] for row in rows]
132+
class TableValuesEncoder(json.JSONEncoder):
133+
def default(self, obj):
134+
if obj is pd.NA:
135+
return None
136+
if isinstance(obj, np.bool_):
137+
return bool(obj)
138+
if isinstance(obj, np.integer):
139+
return int(obj)
140+
if isinstance(obj, np.floating):
141+
return float(obj)
142+
if isinstance(obj, pd.Timedelta):
143+
return str(obj)
144+
if isinstance(obj, pd.Timestamp):
145+
return str(obj)
146+
return json.JSONEncoder.default(self, obj)
129147

130148

131149
def _table_header(
@@ -352,7 +370,7 @@ def to_html_datatable(df=None, tableId=None, connected=True, **kwargs):
352370

353371
# Export the table data to JSON and include this in the HTML
354372
data = _formatted_values(df.reset_index() if showIndex else df)
355-
dt_data = json.dumps(data)
373+
dt_data = json.dumps(data, cls=TableValuesEncoder)
356374
output = replace_value(output, "const data = [];", f"const data = {dt_data};")
357375

358376
return output

itables/sample_dfs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ def get_dict_of_test_dfs(N=100, M=100):
8484
"timedelta": [timedelta(days=2), timedelta(seconds=50)],
8585
}
8686
),
87+
"date_range": pd.DataFrame(
88+
{"timestamps": pd.date_range("now", periods=5, freq="S")}
89+
),
8790
"object": pd.DataFrame(
8891
{"dict": [{"a": 1}, {"b": 2, "c": 3}], "list": [["a"], [1, 2]]}
8992
),

itables/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""ITables' version number"""
22

3-
__version__ = "1.3.1"
3+
__version__ = "1.3.2"

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,5 +64,6 @@
6464
"Programming Language :: Python :: 3.8",
6565
"Programming Language :: Python :: 3.9",
6666
"Programming Language :: Python :: 3.10",
67+
"Programming Language :: Python :: 3.11",
6768
],
6869
)

tests/test_sample_dfs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import json
2+
13
import pandas as pd
24
import pytest
35

46
from itables import show
7+
from itables.javascript import TableValuesEncoder, _format_column
58
from itables.sample_dfs import (
69
get_countries,
710
get_dict_of_test_dfs,
@@ -10,6 +13,13 @@
1013
get_population,
1114
)
1215

16+
# Make sure that displaying a dataframe does not trigger a warning #107
17+
pytestmark = [
18+
pytest.mark.filterwarnings("error"),
19+
# Seen on the CI on Py38 and Py39
20+
pytest.mark.filterwarnings("ignore::ResourceWarning"),
21+
]
22+
1323

1424
def test_get_countries():
1525
df = get_countries()
@@ -37,6 +47,12 @@ def test_show_test_dfs(df_name, df):
3747
show(df)
3848

3949

50+
@pytest.mark.parametrize("series_name,series", get_dict_of_test_series().items())
51+
def test_format_column(series_name, series):
52+
values = _format_column(series).tolist()
53+
json.dumps(values, cls=TableValuesEncoder)
54+
55+
4056
@pytest.mark.parametrize("series_name,series", get_dict_of_test_series().items())
4157
def test_show_test_series(series_name, series):
4258
show(series)

0 commit comments

Comments
 (0)