Skip to content

Commit 5de5756

Browse files
committed
convert integer columns with large values to str
1 parent a4a39f7 commit 5de5756

File tree

10 files changed

+132
-13
lines changed

10 files changed

+132
-13
lines changed

docs/changelog.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,13 @@ ITables ChangeLog
44
1.5.2 (2023-03-??)
55
------------------
66

7-
**Added**
8-
- We have added a CI configuration where we test `itables` against `pandas` in pre-release versions
9-
107
**Fixed**
8+
- Integers that are too big for Javascript are converted to str ([#152](https://github.com/mwouts/itables/issues/152))
119
- If a downsampling occurs, the downsampling message is displayed even if the table only has a few rows
1210

11+
**Added**
12+
- We have added a CI configuration where we test `itables` against `pandas` in pre-release versions
13+
1314

1415
1.5.1 (2023-03-12)
1516
------------------

docs/polars_dataframes.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,13 @@ show(dict_of_test_dfs["long_column_names"], scrollX=True)
140140
```{code-cell}
141141
show(dict_of_test_dfs["named_column_index"])
142142
```
143+
144+
## big_integers
145+
146+
```{code-cell}
147+
import itables.options as opt
148+
149+
opt.warn_on_int_to_str_conversion = False
150+
151+
show(dict_of_test_dfs["big_integers"])
152+
```

docs/sample_dataframes.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,13 @@ show(dict_of_test_dfs["duplicated_columns"])
151151
```{code-cell}
152152
show(dict_of_test_dfs["named_column_index"])
153153
```
154+
155+
## big_integers
156+
157+
```{code-cell}
158+
import itables.options as opt
159+
160+
opt.warn_on_int_to_str_conversion = False
161+
162+
show(dict_of_test_dfs["big_integers"])
163+
```

itables/datatables_format.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@
55
import pandas as pd
66
import pandas.io.formats.format as fmt
77

8-
import itables.options as opt
8+
try:
9+
import polars as pl
10+
except ImportError:
11+
pl = None
12+
13+
14+
JS_MAX_SAFE_INTEGER = 2**53 - 1
15+
JS_MIN_SAFE_INTEGER = -(2**53 - 1)
916

1017

1118
def _format_column(x):
@@ -48,7 +55,7 @@ def default(self, obj):
4855
warnings.warn(
4956
"Unexpected type '{}' for '{}'.\n"
5057
"You can report this warning at https://github.com/mwouts/itables/issues\n"
51-
"To ignore the warning, please run:\n"
58+
"To silence this warning, please run:\n"
5259
" import itables.options as opt\n"
5360
" opt.warn_on_unexpected_types = False".format(type(obj), obj),
5461
category=RuntimeWarning,
@@ -58,8 +65,48 @@ def default(self, obj):
5865
return TableValuesEncoder
5966

6067

61-
def datatables_rows(df, count=None):
68+
def convert_bigints_to_str(df, warn_on_int_to_str_conversion):
69+
"""In Javascript, integers have to remain between JS_MIN_SAFE_INTEGER and JS_MAX_SAFE_INTEGER."""
70+
converted = []
71+
for i, col in enumerate(df.columns):
72+
try:
73+
x = df.iloc[:, i]
74+
if (
75+
x.dtype.kind == "i"
76+
and (
77+
~x.isnull()
78+
& ((x < JS_MIN_SAFE_INTEGER) | (x > JS_MAX_SAFE_INTEGER))
79+
).any()
80+
):
81+
df.iloc[:, i] = x.astype(str)
82+
converted.append(col)
83+
except AttributeError:
84+
x = df[col]
85+
if (
86+
x.dtype in pl.INTEGER_DTYPES
87+
and ((x < JS_MIN_SAFE_INTEGER) | (x > JS_MAX_SAFE_INTEGER)).any()
88+
):
89+
df = df.with_columns(pl.col(col).cast(pl.Utf8))
90+
converted.append(col)
91+
92+
if converted and warn_on_int_to_str_conversion:
93+
warnings.warn(
94+
"The columns {} contains integers that are too large for Javascript.\n"
95+
"They have been converted to str.\n"
96+
"To silence this warning, please run:\n"
97+
" import itables.options as opt\n"
98+
" opt.warn_on_int_to_str_conversion = False".format(converted)
99+
)
100+
101+
return df
102+
103+
104+
def datatables_rows(
105+
df, count=None, warn_on_unexpected_types=False, warn_on_int_to_str_conversion=False
106+
):
62107
"""Format the values in the table and return the data, row by row, as requested by DataTables"""
108+
df = convert_bigints_to_str(df, warn_on_int_to_str_conversion)
109+
63110
# We iterate over columns using an index rather than the column name
64111
# to avoid an issue in case of duplicated column names #89
65112
if count is None or len(df.columns) == count:
@@ -73,7 +120,7 @@ def datatables_rows(df, count=None):
73120
try:
74121
# Pandas DataFrame
75122
data = list(zip(*(empty_columns + [_format_column(x) for _, x in df.items()])))
76-
return json.dumps(data, cls=generate_encoder(opt.warn_on_unexpected_types))
123+
return json.dumps(data, cls=generate_encoder(warn_on_unexpected_types))
77124
except AttributeError:
78125
# Polars DataFrame
79126
data = list(df.iter_rows())

itables/javascript.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def to_html_datatable(df=None, caption=None, tableId=None, connected=True, **kwa
245245
if (
246246
option not in kwargs
247247
and not option.startswith("__")
248-
and option not in ["read_package_file", "warn_on_unexpected_types"]
248+
and option not in ["read_package_file"]
249249
):
250250
kwargs[option] = getattr(opt, option)
251251

@@ -274,6 +274,8 @@ def to_html_datatable(df=None, caption=None, tableId=None, connected=True, **kwa
274274
maxColumns = kwargs.pop("maxColumns", pd.get_option("display.max_columns") or 0)
275275
eval_functions = kwargs.pop("eval_functions", None)
276276
pre_dt_code = kwargs.pop("pre_dt_code")
277+
warn_on_unexpected_types = kwargs.pop("warn_on_unexpected_types", False)
278+
warn_on_int_to_str_conversion = kwargs.pop("warn_on_int_to_str_conversion", False)
277279

278280
if isinstance(df, (np.ndarray, np.generic)):
279281
df = pd.DataFrame(df)
@@ -390,7 +392,12 @@ def to_html_datatable(df=None, caption=None, tableId=None, connected=True, **kwa
390392
# When the header has an extra column, we add
391393
# an extra empty column in the table data #141
392394
column_count = _column_count_in_header(table_header)
393-
dt_data = datatables_rows(df, column_count)
395+
dt_data = datatables_rows(
396+
df,
397+
column_count,
398+
warn_on_unexpected_types=warn_on_unexpected_types,
399+
warn_on_int_to_str_conversion=warn_on_int_to_str_conversion,
400+
)
394401

395402
output = replace_value(
396403
output, "const data = [];", "const data = {};".format(dt_data)

itables/options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,6 @@
4646

4747
"""Should a warning appear when we have to encode an unexpected type?"""
4848
warn_on_unexpected_types = True
49+
50+
"""Should a warning appear when we convert large integers to str?"""
51+
warn_on_int_to_str_conversion = True

itables/sample_dfs.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,20 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False):
240240
),
241241
),
242242
"named_column_index": pd.DataFrame({"a": [1]}).rename_axis("columns", axis=1),
243+
"big_integers": pd.DataFrame(
244+
{
245+
"bigint": [
246+
1234567890123456789,
247+
2345678901234567890,
248+
3456789012345678901,
249+
],
250+
"expected": [
251+
"1234567890123456789",
252+
"2345678901234567890",
253+
"3456789012345678901",
254+
],
255+
}
256+
),
243257
}
244258

245259
if polars:

tests/test_datatables_format.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@
6464
.T.reset_index(),
6565
[[None, "a", 1, 2]],
6666
),
67+
(
68+
pd.DataFrame(
69+
{
70+
"long": [
71+
1234567890123456789,
72+
2345678901234567890,
73+
3456789012345678901,
74+
]
75+
}
76+
),
77+
'[["1234567890123456789"], ["2345678901234567890"], ["3456789012345678901"]]',
78+
),
6779
],
6880
ids=[
6981
"bool",
@@ -80,6 +92,7 @@
8092
"object_dict",
8193
"df_with_named_column_axis",
8294
"transposed_df",
95+
"big_integers",
8396
],
8497
)
8598
def test_datatables_rows(df, expected):
@@ -95,7 +108,10 @@ def test_datatables_rows(df, expected):
95108
)
96109
column_count = _column_count_in_header(table_header)
97110
actual = datatables_rows(df, count=column_count)
98-
assert actual == json.dumps(expected)
111+
if isinstance(expected, str):
112+
assert actual == expected
113+
else:
114+
assert actual == json.dumps(expected)
99115

100116

101117
@pytest.mark.skipif(

tests/test_javascript.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ def test_warn_on_unexpected_types_not_in_html(df):
66
assert "warn_on_unexpected_types" not in html
77

88

9+
def test_warn_on_int_to_str_conversion_not_in_html(df):
10+
html = to_html_datatable(df)
11+
assert "warn_on_int_to_str_conversion" not in html
12+
13+
914
def test_df_fits_in_one_page(df, lengthMenu):
1015
kwargs = dict(lengthMenu=lengthMenu)
1116
kwargs = {key: value for key, value in kwargs.items() if value is not None}

tests/test_sample_dfs.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,15 @@ def kwargs_remove_none(**kwargs):
5252
return {key: value for key, value in kwargs.items() if value is not None}
5353

5454

55-
def test_show_test_dfs(df, lengthMenu):
55+
def test_show_test_dfs(df, lengthMenu, monkeypatch):
56+
if "bigint" in df.columns:
57+
monkeypatch.setattr("itables.options.warn_on_int_to_str_conversion", False)
5658
show(df, **kwargs_remove_none(lengthMenu=lengthMenu))
5759

5860

59-
def test_to_html_datatable(df, lengthMenu):
61+
def test_to_html_datatable(df, lengthMenu, monkeypatch):
62+
if "bigint" in df.columns:
63+
monkeypatch.setattr("itables.options.warn_on_int_to_str_conversion", False)
6064
to_html_datatable(df, **kwargs_remove_none(lengthMenu=lengthMenu))
6165

6266

@@ -73,7 +77,9 @@ def test_format_column(series_name, series):
7377

7478

7579
@pytest.mark.parametrize("series_name,series", get_dict_of_test_series().items())
76-
def test_show_test_series(series_name, series):
80+
def test_show_test_series(series_name, series, monkeypatch):
81+
if "bigint" in series_name:
82+
monkeypatch.setattr("itables.options.warn_on_int_to_str_conversion", False)
7783
show(series)
7884

7985

0 commit comments

Comments
 (0)