@@ -74,7 +74,7 @@ class ListDtype(ArrowDtype):
74
74
An ExtensionDtype suitable for storing homogeneous lists of data.
75
75
"""
76
76
77
- _is_immutable = True # TODO(wayd): should we allow mutability?
77
+ _is_immutable = True
78
78
79
79
def __init__ (self , value_dtype : pa .DataType ) -> None :
80
80
super ().__init__ (pa .large_list (value_dtype ))
@@ -100,10 +100,7 @@ def name(self) -> str: # type: ignore[override]
100
100
"""
101
101
A string identifying the data type.
102
102
"""
103
- # TODO: reshaping tests require the name list to match the large_list
104
- # implementation; assumedly there are some astype(str(dtype)) casts
105
- # going on. Should fix so this can just be "list[...]" for end user
106
- return f"large_list[{ self .pyarrow_dtype .value_type !s} ]"
103
+ return f"list[{ self .pyarrow_dtype .value_type !s} ]"
107
104
108
105
@property
109
106
def kind (self ) -> str :
@@ -124,7 +121,6 @@ def construct_array_type(cls) -> type_t[ListArray]:
124
121
return ListArray
125
122
126
123
def _get_common_dtype (self , dtypes : list [DtypeObj ]) -> DtypeObj | None :
127
- # TODO(wayd): should we implemented value type support?
128
124
for dtype in dtypes :
129
125
if (
130
126
isinstance (dtype , ListDtype )
@@ -153,8 +149,7 @@ def __init__(
153
149
if isinstance (values , (pa .Array , pa .ChunkedArray )):
154
150
parent_type = values .type
155
151
if not isinstance (parent_type , (pa .ListType , pa .LargeListType )):
156
- # Ideally could cast here, but I don't think pyarrow implements
157
- # many list casts
152
+ # TODO: maybe implement native casts in pyarrow
158
153
new_values = [
159
154
[x .as_py ()] if x .is_valid else None for x in values
160
155
]
@@ -164,12 +159,10 @@ def __init__(
164
159
else :
165
160
value_type = pa .array (values ).type .value_type
166
161
167
- # Internally always use large_string instead of string
168
162
if value_type == pa .string ():
169
163
value_type = pa .large_string ()
170
164
171
165
if not isinstance (values , pa .ChunkedArray ):
172
- # To support NA, we need to create an Array first :-(
173
166
arr = pa .array (values , type = pa .large_list (value_type ), from_pandas = True )
174
167
self ._pa_array = pa .chunked_array (arr , type = pa .large_list (value_type ))
175
168
else :
@@ -200,8 +193,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
200
193
values = pa .array (scalars , from_pandas = True )
201
194
202
195
if values .type == "null" and dtype is not None :
203
- # TODO: the sequencing here seems wrong; just making the tests pass for now
204
- # but this needs a comprehensive review
205
196
pa_type = string_to_pyarrow_type (str (dtype ))
206
197
values = pa .array (values , type = pa_type )
207
198
@@ -232,8 +223,6 @@ def _box_pa(
232
223
return cls ._box_pa_array (value , pa_type )
233
224
234
225
def __getitem__ (self , item ):
235
- # PyArrow does not support NumPy's selection with an equal length
236
- # mask, so let's convert those to integral positions if needed
237
226
if isinstance (item , (np .ndarray , ExtensionArray )):
238
227
if is_bool_dtype (item .dtype ):
239
228
mask_len = len (item )
@@ -305,9 +294,6 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
305
294
ExtensionDtype.empty
306
295
ExtensionDtype.empty is the 'official' public version of this API.
307
296
"""
308
- # Implementer note: while ExtensionDtype.empty is the public way to
309
- # call this method, it is still required to implement this `_empty`
310
- # method as well (it is called internally in pandas)
311
297
if isinstance (shape , tuple ):
312
298
if len (shape ) > 1 :
313
299
raise ValueError ("ListArray may only be 1-D" )
@@ -334,9 +320,9 @@ def __eq__(self, other):
334
320
elif isinstance (other , (pa .ListScalar , pa .LargeListScalar )):
335
321
from pandas .arrays import BooleanArray
336
322
337
- # TODO: pyarrow.compute does not implement broadcasting equality
338
- # for an array of lists to a listscalar
339
- # TODO: pyarrow doesn't compare missing values as missing???
323
+ # TODO: pyarrow.compute does not implement equal for lists
324
+ # https://github.com/apache/arrow/issues/45167
325
+ # TODO: pyarrow doesn't compare missing values in Python as missing???
340
326
# arr = pa.array([1, 2, None])
341
327
# pc.equal(arr, arr[2]) returns all nulls but
342
328
# arr[2] == arr[2] returns True
0 commit comments