Skip to content

Errors on Pandas IntegerArray dtypes #683

Open
@alexkyllo

Description

@alexkyllo

It looks like when two or more geoms are plotted and the input data is a Pandas IntegerArray, specifically a pandas.Int64Dtype. It's throwing the TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe'' error.

Found this out because for some reason my database driver's "to pandas" feature returns 64 bit int columns as this type instead of as a numpy.int64.

from plotnine import *
import numpy as np
import pandas as pd

x = pd.array(np.arange(0, 10), dtype=pd.Int64Dtype())
y = x**2

(
    ggplot(mapping=aes(x=x, y=y)) +
        geom_point() +
        geom_line()
)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/core/formatters.py:706](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/core/formatters.py:706), in PlainTextFormatter.__call__(self, obj)
    699 stream = StringIO()
    700 printer = pretty.RepresentationPrinter(stream, self.verbose,
    701     self.max_width, self.newline,
    702     max_seq_length=self.max_seq_length,
    703     singleton_pprinters=self.singleton_printers,
    704     type_pprinters=self.type_printers,
    705     deferred_pprinters=self.deferred_printers)
--> 706 printer.pretty(obj)
    707 printer.flush()
    708 return stream.getvalue()

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/lib/pretty.py:410](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/lib/pretty.py:410), in RepresentationPrinter.pretty(self, obj)
    407                         return meth(obj, self, cycle)
    408                 if cls is not object \
    409                         and callable(cls.__dict__.get('__repr__')):
--> 410                     return _repr_pprint(obj, self, cycle)
    412     return _default_pprint(obj, self, cycle)
    413 finally:

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/lib/pretty.py:778](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/IPython/lib/pretty.py:778), in _repr_pprint(obj, p, cycle)
    776 """A pprint that just redirects to the normal repr function."""
    777 # Find newlines and replace them with p.break_()
--> 778 output = repr(obj)
    779 lines = output.splitlines()
    780 with p.group():

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:95](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:95), in ggplot.__repr__(self)
     91 def __repr__(self):
     92     """
     93     Print/show the plot
     94     """
---> 95     self.__str__()
     96     return '<ggplot: (%d)>' % self.__hash__()

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:86](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:86), in ggplot.__str__(self)
     82 def __str__(self):
     83     """
     84     Print/show the plot
     85     """
---> 86     self.draw(show=True)
     88     # Return and empty string so that print(p) is "pretty"
     89     return ''

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:203](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:203), in ggplot.draw(self, return_ggplot, show)
    201 self = deepcopy(self)
    202 with plot_context(self, show=show):
--> 203     self._build()
    205     # setup
    206     figure, axs = self._create_figure()

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:299](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/ggplot.py:299), in ggplot._build(self)
    295 scales.add_missing(('x', 'y'))
    297 # Map and train positions so that statistics have access
    298 # to ranges and all positions are numeric
--> 299 layout.train_position(layers, scales)
    300 layout.map_position(layers)
    302 # Apply and map statistics

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/facets/layout.py:88](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/facets/layout.py:88), in Layout.train_position(self, layers, scales)
     85     result = self.facet.init_scales(layout, None, scales.y)
     86     self.panel_scales_y = result.y
---> 88 self.facet.train_position_scales(self, layers)

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/facets/facet.py:238](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/facets/facet.py:238), in facet.train_position_scales(self, layout, layers)
    236     # the scale index for each data point
    237     SCALE_X = _layout['SCALE_X'].iloc[match_id].tolist()
--> 238     panel_scales_x.train(data, x_vars, SCALE_X)
    240 if panel_scales_y:
    241     y_vars = list(set(panel_scales_y[0].aesthetics) &
    242                   set(data.columns))

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/scales.py:128](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/scales.py:128), in Scales.train(self, data, vars, idx)
    126 for i, sc in enumerate(self, start=1):
    127     bool_idx = (i == idx)
--> 128     sc.train(data.loc[bool_idx, col])

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/scale.py:703](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/scale.py:703), in scale_continuous.train(self, x)
    700 if not len(x):
    701     return
--> 703 self.range.train(x)

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/range.py:33](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/plotnine/scales/range.py:33), in RangeContinuous.train(self, x)
     29 def train(self, x):
     30     """
     31     Train continuous range
     32     """
---> 33     self.range = scale_continuous.train(x, self.range)

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/mizani/scale.py:108](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/mizani/scale.py:108), in scale_continuous.train(cls, new_data, old)
    105 if old is not None:
    106     new_data = np.hstack([new_data, old])
--> 108 return min_max(new_data, na_rm=True, finite=True)

File [~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/mizani/utils.py:85](https://vscode-remote+wsl-002bubuntu-002d20-002e04.vscode-resource.vscode-cdn.net/home/alex/Python-Team-Queries/UsageAnalysis/mdu/~/.pyenv/versions/3.10.8/lib/python3.10/site-packages/mizani/utils.py:85), in min_max(x, na_rm, finite)
     82     x = np.asarray(x)
     84 if na_rm and finite:
---> 85     x = x[np.isfinite(x)]
     86 elif not na_rm and np.any(np.isnan(x)):
     87     return np.nan, np.nan

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions