33 See the LICENSE file in the root of this project for license details.
44'''
55
6+ from __future__ import annotations
7+
68from collections .abc import Iterator
79import json
8- from typing import Any
10+ from typing import Any , Protocol , TYPE_CHECKING
911
1012import pandas as pd
1113import numpy as np
1214
13- from .data_access import read_value , read_field_vectorized
15+ from .data_access import ReadableBuffer , read_value , read_field_vectorized
1416from .errors import CorruptResourceError
1517
18+ if TYPE_CHECKING :
19+ from .structure import Structure
20+
1621SIZE_OFFSET_IN_BITS = 64
1722SIZE_OFFSET_IN_BYTES = SIZE_OFFSET_IN_BITS // 8
1823SIZE_PADDING_IN_BYTES = 8
1924
2025
26+ class ReadStorage (Protocol ):
27+ def get (self , key : str , is_optional : bool = False ) -> Any : ...
28+
29+
2130class ResourceBase :
22- def __init__ (self , mem : Any , element_type : type [Any ] ) -> None :
31+ def __init__ (self , mem : ReadableBuffer , element_type : type [Structure ] | None ) -> None :
2332 if len (mem ) < (SIZE_OFFSET_IN_BYTES + SIZE_PADDING_IN_BYTES ):
2433 raise CorruptResourceError ()
2534 self ._mem = memoryview (mem )
@@ -32,9 +41,11 @@ def size_in_bytes(self) -> int:
3241 return len (self ._mem )
3342
3443 def _item_offset (self , index : int ) -> int :
44+ assert self ._element_type is not None
3545 return int (SIZE_OFFSET_IN_BYTES + self ._element_type ._SIZE_IN_BYTES * index )
3646
3747 def _get_item (self , index : int ) -> Any :
48+ assert self ._element_type is not None
3849 offset = self ._item_offset (index )
3950 return self ._element_type (self ._mem , offset )
4051
@@ -67,7 +78,7 @@ def __repr__(self) -> str:
6778 return json .dumps (self ._repr_attributes (), indent = 4 )
6879
6980 @classmethod
70- def open (cls , storage : Any , name : str , initializer : Any , is_optional : bool = False ) -> Any :
81+ def open (cls , storage : ReadStorage , name : str , initializer : Any , is_optional : bool = False ) -> Any :
7182 return cls (storage .get (name , is_optional ), initializer )
7283
7384
@@ -82,6 +93,7 @@ def to_numpy(self, limit: int | None = None) -> Any:
8293 if limit is not None :
8394 sliced = sliced [:limit ]
8495
96+ assert self ._sequence ._element_type is not None
8597 fields = self ._sequence ._element_type ._FIELDS
8698 dtype = self ._sequence ._element_type .dtype ()
8799 result = np .empty (sliced .shape [0 ], dtype = dtype )
@@ -99,6 +111,7 @@ def __iter__(self) -> Iterator[Any]:
99111 yield self ._sequence [i ]
100112
101113 def __getattr__ (self , name : str ) -> pd .DataFrame :
114+ assert self ._sequence ._element_type is not None
102115 try :
103116 field = self ._sequence ._element_type ._FIELDS [name ]
104117 except KeyError :
@@ -112,7 +125,7 @@ def __repr__(self) -> str:
112125
113126
114127class Vector (ResourceBase ):
115- def __init__ (self , mem : Any , element_type : type [Any ]) -> None :
128+ def __init__ (self , mem : ReadableBuffer , element_type : type [Structure ]) -> None :
116129 ResourceBase .__init__ (self , mem , element_type )
117130 size_in_bytes = read_value (self ._mem , 0 , SIZE_OFFSET_IN_BITS , False )
118131 size , rem = divmod (size_in_bytes , self ._type_size_in_bytes )
@@ -122,6 +135,7 @@ def __init__(self, mem: Any, element_type: type[Any]) -> None:
122135 def to_numpy (self ) -> Any :
123136 """Convert entire vector to a numpy structured array (vectorized)."""
124137 raw_2d = self ._as_numpy_2d ()
138+ assert self ._element_type is not None
125139 fields = self ._element_type ._FIELDS
126140 dtype = self ._element_type .dtype ()
127141 result = np .empty (self ._size , dtype = dtype )
@@ -147,11 +161,13 @@ def __getitem__(self, index: int | slice) -> Any:
147161 def __iter__ (self ) -> Iterator [Any ]:
148162 mem = self ._mem
149163 element_type = self ._element_type
164+ assert element_type is not None
150165 size_bytes = self ._type_size_in_bytes
151166 for i in range (self ._size ):
152167 yield element_type (mem , SIZE_OFFSET_IN_BYTES + size_bytes * i )
153168
154169 def __getattr__ (self , name : str ) -> pd .DataFrame :
170+ assert self ._element_type is not None
155171 try :
156172 field = self ._element_type ._FIELDS [name ]
157173 except KeyError :
@@ -178,14 +194,14 @@ def __repr__(self) -> str:
178194
179195
180196class Multivector (ResourceBase ):
181- def __init__ (self , index_mem : Any , mem : Any , index_type : type [Any ], * element_types : type [Any ]) -> None :
197+ def __init__ (self , index_mem : ReadableBuffer , mem : ReadableBuffer , index_type : type [Structure ], * element_types : type [Structure ]) -> None :
182198 self ._index = Vector (index_mem , index_type )
183- self ._mem = mem
199+ self ._mem = memoryview ( mem )
184200 self ._element_types = list (element_types )
185201 self ._index_type = index_type
186202
187203 @classmethod
188- def open (cls , storage : Any , name : str , initializer : Any , is_optional : bool = False ) -> ' Multivector' :
204+ def open (cls , storage : ReadStorage , name : str , initializer : list [ type [ Structure ]] , is_optional : bool = False ) -> Multivector :
189205 return cls (storage .get (name + "_index" , is_optional ),
190206 storage .get (name , is_optional ),
191207 * initializer )
@@ -208,6 +224,7 @@ def __getitem__(self, index: int | slice) -> Any:
208224 type_index = read_value (self ._mem , offset * 8 , 8 , False )
209225 offset += 1
210226 element_type = self ._element_types [type_index ]
227+ assert element_type is not None
211228 element = element_type (self ._mem , offset )
212229 elements .append (element )
213230 offset += element_type ._SIZE_IN_BYTES
@@ -280,6 +297,7 @@ def __iter__(self) -> Iterator[Any]:
280297 yield self ._get_item (i )
281298
282299 def __getattr__ (self , name : str ) -> Any :
300+ assert self ._element_type is not None
283301 offset = self ._item_offset (0 )
284302 return getattr (self ._element_type (self ._mem , offset ), name )
285303
0 commit comments