Skip to content

Commit b4252c3

Browse files
committed
Assorted cleanups
1 parent 25087f7 commit b4252c3

File tree

3 files changed

+7
-27
lines changed

3 files changed

+7
-27
lines changed

pandas/core/arrays/list_.py

+6-20
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class ListDtype(ArrowDtype):
7474
An ExtensionDtype suitable for storing homogeneous lists of data.
7575
"""
7676

77-
_is_immutable = True # TODO(wayd): should we allow mutability?
77+
_is_immutable = True
7878

7979
def __init__(self, value_dtype: pa.DataType) -> None:
8080
super().__init__(pa.large_list(value_dtype))
@@ -100,10 +100,7 @@ def name(self) -> str: # type: ignore[override]
100100
"""
101101
A string identifying the data type.
102102
"""
103-
# TODO: reshaping tests require the name list to match the large_list
104-
# implementation; assumedly there are some astype(str(dtype)) casts
105-
# going on. Should fix so this can just be "list[...]" for end user
106-
return f"large_list[{self.pyarrow_dtype.value_type!s}]"
103+
return f"list[{self.pyarrow_dtype.value_type!s}]"
107104

108105
@property
109106
def kind(self) -> str:
@@ -124,7 +121,6 @@ def construct_array_type(cls) -> type_t[ListArray]:
124121
return ListArray
125122

126123
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
127-
# TODO(wayd): should we implemented value type support?
128124
for dtype in dtypes:
129125
if (
130126
isinstance(dtype, ListDtype)
@@ -153,8 +149,7 @@ def __init__(
153149
if isinstance(values, (pa.Array, pa.ChunkedArray)):
154150
parent_type = values.type
155151
if not isinstance(parent_type, (pa.ListType, pa.LargeListType)):
156-
# Ideally could cast here, but I don't think pyarrow implements
157-
# many list casts
152+
# TODO: maybe implement native casts in pyarrow
158153
new_values = [
159154
[x.as_py()] if x.is_valid else None for x in values
160155
]
@@ -164,12 +159,10 @@ def __init__(
164159
else:
165160
value_type = pa.array(values).type.value_type
166161

167-
# Internally always use large_string instead of string
168162
if value_type == pa.string():
169163
value_type = pa.large_string()
170164

171165
if not isinstance(values, pa.ChunkedArray):
172-
# To support NA, we need to create an Array first :-(
173166
arr = pa.array(values, type=pa.large_list(value_type), from_pandas=True)
174167
self._pa_array = pa.chunked_array(arr, type=pa.large_list(value_type))
175168
else:
@@ -200,8 +193,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
200193
values = pa.array(scalars, from_pandas=True)
201194

202195
if values.type == "null" and dtype is not None:
203-
# TODO: the sequencing here seems wrong; just making the tests pass for now
204-
# but this needs a comprehensive review
205196
pa_type = string_to_pyarrow_type(str(dtype))
206197
values = pa.array(values, type=pa_type)
207198

@@ -232,8 +223,6 @@ def _box_pa(
232223
return cls._box_pa_array(value, pa_type)
233224

234225
def __getitem__(self, item):
235-
# PyArrow does not support NumPy's selection with an equal length
236-
# mask, so let's convert those to integral positions if needed
237226
if isinstance(item, (np.ndarray, ExtensionArray)):
238227
if is_bool_dtype(item.dtype):
239228
mask_len = len(item)
@@ -305,9 +294,6 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
305294
ExtensionDtype.empty
306295
ExtensionDtype.empty is the 'official' public version of this API.
307296
"""
308-
# Implementer note: while ExtensionDtype.empty is the public way to
309-
# call this method, it is still required to implement this `_empty`
310-
# method as well (it is called internally in pandas)
311297
if isinstance(shape, tuple):
312298
if len(shape) > 1:
313299
raise ValueError("ListArray may only be 1-D")
@@ -334,9 +320,9 @@ def __eq__(self, other):
334320
elif isinstance(other, (pa.ListScalar, pa.LargeListScalar)):
335321
from pandas.arrays import BooleanArray
336322

337-
# TODO: pyarrow.compute does not implement broadcasting equality
338-
# for an array of lists to a listscalar
339-
# TODO: pyarrow doesn't compare missing values as missing???
323+
# TODO: pyarrow.compute does not implement equal for lists
324+
# https://github.com/apache/arrow/issues/45167
325+
# TODO: pyarrow doesn't compare missing values in Python as missing???
340326
# arr = pa.array([1, 2, None])
341327
# pc.equal(arr, arr[2]) returns all nulls but
342328
# arr[2] == arr[2] returns True

pandas/core/internals/construction.py

-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
import numpy as np
1515
from numpy import ma
16-
import pyarrow as pa
1716

1817
from pandas._config import using_string_dtype
1918

@@ -462,8 +461,6 @@ def treat_as_nested(data, dtype) -> bool:
462461
len(data) > 0
463462
and is_list_like(data[0])
464463
and getattr(data[0], "ndim", 1) == 1
465-
# TODO(wayd): hack so pyarrow list elements don't expand
466-
and not isinstance(data[0], pa.ListScalar)
467464
and not isinstance(dtype, ListDtype)
468465
and not (isinstance(data, ExtensionArray) and data.ndim == 2)
469466
)

pandas/core/internals/managers.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -1976,10 +1976,7 @@ def from_blocks(
19761976

19771977
@classmethod
19781978
def from_array(
1979-
cls,
1980-
array: ArrayLike,
1981-
index: Index,
1982-
refs: BlockValuesRefs | None = None,
1979+
cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None
19831980
) -> SingleBlockManager:
19841981
"""
19851982
Constructor for if we have an array that is not yet a Block.

0 commit comments

Comments
 (0)