33from typing import Optional , Union , Literal
44from concurrent .futures import ThreadPoolExecutor , Future , as_completed
55
6+ from lavender_data .serialize import deserialize_sample , DeserializeException
67from lavender_data .client .api import (
78 get_client ,
89 LavenderDataClient ,
@@ -140,6 +141,8 @@ def __init__(
140141 self ._api_key = api_key
141142 self ._api = _api (self ._api_url , self ._api_key )
142143
144+ self ._bytes = 0
145+
143146 self .id = self ._iteration_id
144147
145148 def torch (
@@ -215,19 +218,21 @@ def _set_last_indices(self, sample_or_batch):
215218
216219 def _get_next_item (self ):
217220 try :
218- sample_or_batch = self ._api .get_next_item (
221+ serialized = self ._api .get_next_item (
219222 iteration_id = self ._iteration_id ,
220223 rank = self ._rank ,
221224 no_cache = self ._no_cache ,
222225 max_retry_count = self ._max_retry_count ,
223226 )
227+ self ._bytes += len (serialized )
228+ return deserialize_sample (serialized )
224229 except LavenderDataApiError as e :
225230 if "No more indices to pop" in str (e ):
226231 raise StopIteration
227232 else :
228233 raise e
229-
230- return sample_or_batch
234+ except DeserializeException as e :
235+ raise ValueError ( f"Failed to deserialize sample: { e } " )
231236
232237 def _submit_next_item (self ) -> str :
233238 cache_key = self ._api .submit_next_item (
@@ -240,10 +245,12 @@ def _submit_next_item(self) -> str:
240245
241246 def _get_submitted_result (self , cache_key : str ):
242247 try :
243- return self ._api .get_submitted_result (
248+ serialized = self ._api .get_submitted_result (
244249 iteration_id = self ._iteration_id ,
245250 cache_key = cache_key ,
246251 )
252+ self ._bytes += len (serialized )
253+ return deserialize_sample (serialized )
247254 except LavenderDataApiError as e :
248255 if "Data is still being processed" in str (e ):
249256 return None
@@ -253,6 +260,8 @@ def _get_submitted_result(self, cache_key: str):
253260 raise StopIteration
254261 else :
255262 raise e
263+ except DeserializeException as e :
264+ raise ValueError (f"Failed to deserialize sample: { e } " )
256265
257266 def __next__ (self ):
258267 self ._complete_last_indices ()
0 commit comments