@@ -66,7 +66,7 @@ class AISBatchLoader:
6666 Shar lazy-pointer (``shar_ptr``) sources transparently fall back to
6767 per-object byte-range ``get_reader(byte_range=…)`` calls when the installed
6868 AIStore SDK / cluster doesn't support byte-range entries in
69- ``BatchRequest.add()`` (or when ``force_individual=True``). The MOSS
69+ MOSS GetBatch requests (or when ``force_individual=True``). The MOSS
7070 GetBatch path is preferred when available; the byte-range fallback exists
7171 so non-gzipped lhotse-shar cuts on AIS work even on older deployments.
7272 """
@@ -121,11 +121,11 @@ def _moss_attrs(info: Any) -> tuple[str, str, str, Optional[str]]:
121121 Normalise an AIStore batch request/result entry into ``(bck, provider,
122122 obj_name, archpath)``.
123123
124- Verified empirically against ``aistore==1.23.0``:
125- * ``BatchRequest .requests_list`` items are ``aistore.sdk.batch.types.MossIn``
124+ Verified empirically against ``aistore==1.23.0`` and ``1.25.0`` :
125+ * ``Batch .requests_list`` items are ``aistore.sdk.batch.types.MossIn``
126126 with the original short attribute names (``bck``, ``provider``,
127127 ``obj_name``, ``archpath``).
128- * ``BatchRequest .get()`` yields ``(MossOut, content)`` tuples and
128+ * ``Batch .get()`` yields ``(MossOut, content)`` tuples and
129129 ``MossOut`` carries different attribute names. To stay robust
130130 against further SDK churn, this helper falls back through every
131131 naming convention we've seen in the wild
@@ -159,7 +159,8 @@ def _get_object_from_moss_in(self, moss_in: Any) -> bytes:
159159 Fetch a single object from AIStore using the ObjectNames request info.
160160
161161 This method is used as a fallback when batch operations fail or return empty content.
162- It handles archive extraction if an archpath is specified.
162+ It handles archive extraction if an archpath is specified, and preserves
163+ ``start`` / ``length`` byte-range requests used by Shar lazy pointers.
163164
164165 Args:
165166 moss_in: AIStore ObjectNames request — accepts both ``MossIn`` (from
@@ -173,21 +174,51 @@ def _get_object_from_moss_in(self, moss_in: Any) -> bytes:
173174 Raises:
174175 Exception: If the object cannot be fetched from AIStore.
175176 """
176- from aistore .sdk .archive_config import ArchiveConfig
177-
178177 bck , provider , obj_name , archpath = self ._moss_attrs (moss_in )
178+ start , length = self ._moss_range (moss_in )
179+
180+ obj = self .client .bucket (bck_name = bck , provider = provider ).object (obj_name )
181+
182+ if start is not None or length is not None :
183+ if archpath :
184+ raise AISBatchLoaderError (
185+ "Cannot fall back to direct GET for a request that combines "
186+ f"byte range and archive extraction: { obj_name } /{ archpath } "
187+ )
188+ if start is None or length is None :
189+ raise AISBatchLoaderError (
190+ f"Invalid byte-range request for { obj_name } : "
191+ f"start={ start !r} , length={ length !r} "
192+ )
193+ if length <= 0 :
194+ return b""
195+ end_inclusive = start + length - 1
196+ return obj .get_reader (
197+ byte_range = f"bytes={ start } -{ end_inclusive } "
198+ ).read_all ()
199+
200+ from aistore .sdk .archive_config import ArchiveConfig
179201
180202 config = None
181203 if archpath :
182204 config = ArchiveConfig (archpath = archpath )
183205
184- reader = (
185- self .client .bucket (bck_name = bck , provider = provider )
186- .object (obj_name )
187- .get_reader (archive_config = config )
188- )
206+ reader = obj .get_reader (archive_config = config )
189207 return reader .read_all ()
190208
209+ @staticmethod
210+ def _moss_range (info : Any ) -> tuple [Optional [int ], Optional [int ]]:
211+ """Return ``(start, length)`` byte-range fields from a MOSS request."""
212+ from numbers import Integral
213+
214+ def _int_or_none (name : str ) -> Optional [int ]:
215+ value = getattr (info , name , None )
216+ if isinstance (value , Integral ) and not isinstance (value , bool ):
217+ return int (value )
218+ return None
219+
220+ return _int_or_none ("start" ), _int_or_none ("length" )
221+
191222 def __call__ (self , cuts : CutSet ) -> CutSet :
192223 """
193224 Fetch all data referenced by a CutSet in one AIStore batch operation.
@@ -229,8 +260,8 @@ def __call__(self, cuts: CutSet) -> CutSet:
229260
230261 # Decide once per call whether shar_ptr entries can go through MOSS
231262 # GetBatch byte-range adds. If ``force_individual`` is on, or the SDK
232- # rejects byte-range BatchRequest.add() calls , route shar_ptr through
233- # the per-object byte-range fallback collected in ``shar_ptr_fallback``.
263+ # predates byte-range MOSS support , route shar_ptr through the per-object
264+ # byte-range fallback collected in ``shar_ptr_fallback``.
234265 shar_ptr_uses_batch = (
235266 not self .force_individual
236267 ) and self ._aistore_byte_range_supported ()
@@ -434,13 +465,20 @@ def _individual_get():
434465
435466 # Retry with direct API call if content is empty (from timeout or actual empty response)
436467 if content == b"" :
437- bck_ , provider_ , obj_name_ , archpath_ = self ._moss_attrs (info )
468+ direct_info = (
469+ saved_requests_list [request_idx ]
470+ if request_idx < len (saved_requests_list )
471+ else info
472+ )
473+ bck_ , provider_ , obj_name_ , archpath_ = self ._moss_attrs (
474+ direct_info
475+ )
438476 logger .warning (
439477 f"Object { obj_name_ } /{ archpath_ } from bucket { provider_ } ://{ bck_ } "
440478 f"returned empty content. Retrying with direct AIStore API call."
441479 )
442480 try :
443- content = self ._get_object_from_moss_in (info )
481+ content = self ._get_object_from_moss_in (direct_info )
444482 except Exception as ex :
445483 logger .error (
446484 f"Failed to fetch object { obj_name_ } from bucket "
@@ -478,8 +516,8 @@ def _cuts_have_ais_data(cuts: CutSet) -> bool:
478516 """Return True iff any manifest in ``cuts`` is served from AIStore.
479517
480518 Mirrors the detection conditions in :meth:`_collect_manifest_urls` but
481- without touching ``self.client`` or a ``BatchRequest `` — used to short-
482- circuit :meth:`__call__` when the CutSet has no AIS-backed data, so
519+ without touching ``self.client`` or a ``Batch `` — used to short-circuit
520+ :meth:`__call__` when the CutSet has no AIS-backed data, so
483521 loaders constructed in environments where AIStore isn't configured
484522 still pass cuts through unchanged.
485523 """
@@ -608,32 +646,50 @@ def _collect_manifest_urls(
608646 @lru_cache (maxsize = 1 )
609647 def _aistore_byte_range_supported () -> bool :
610648 """
611- Detect whether the installed aistore SDK accepts byte-range fetch in
612- :meth:`aistore.sdk.batch.batch.BatchRequest.add` *without raising*.
613-
614- Probe: instantiate a ``BatchRequest`` and call ``add(start=0,
615- length=0)`` against a sentinel object. If the SDK validates byte-range
616- usage eagerly with ``NotImplementedError`` (current behaviour, see
617- ``aistore/sdk/batch/batch.py``), this fails locally before any IO.
618- Cached for the process lifetime.
649+ Detect whether the installed aistore SDK/cluster generation supports
650+ byte-range MOSS entries.
651+
652+ ``aistore==1.25.0`` removed the older ``BatchRequest`` class and still
653+ has ``Batch.add(..., start=, length=)`` guarded by ``NotImplementedError``.
654+ The supported API is the lower-level MOSS request schema:
655+ ``Batch.requests_list`` exposes ``MossReq.moss_in`` and ``MossIn``
656+ serializes ``start`` / ``length`` in the GetBatch JSON body. Older SDKs
657+ had partial client-side fields before server support existed, so keep a
658+ conservative version gate and schema check here.
619659 """
620660 try :
621- from aistore .sdk .batch .batch import BatchRequest
661+ import re
662+
663+ import aistore
664+ from aistore .sdk .batch .batch import Batch
665+ from aistore .sdk .batch .types import MossIn , MossReq
622666 except Exception :
623667 return False
624- try :
625- req = BatchRequest ()
626- req .add (object (), start = 0 , length = 0 )
627- except NotImplementedError :
628- return False
629- except TypeError :
630- # ``start`` / ``length`` not in the signature on older SDKs.
668+
669+ m = re .match (r"^(\d+)\.(\d+)\.(\d+)" , getattr (aistore , "__version__" , "" ))
670+ if m is None or tuple (map (int , m .groups ())) < (1 , 25 , 0 ):
631671 return False
672+
673+ try :
674+ descriptor = vars (Batch ).get ("requests_list" )
675+ if not isinstance (descriptor , property ):
676+ return False
677+ if "moss_in" not in MossReq .model_fields :
678+ return False
679+ if not {"start" , "length" }.issubset (MossIn .model_fields ):
680+ return False
681+ probe = MossIn .model_construct (
682+ obj_name = "__lhotse_probe__.tar" ,
683+ bck = "__lhotse_probe__" ,
684+ provider = "ais" ,
685+ start = 0 ,
686+ length = 1 ,
687+ )
688+ dumped = probe .model_dump (by_alias = True , exclude_defaults = True )
632689 except Exception :
633- # Any other exception (e.g. invalid object stub) means the SDK
634- # got past byte-range validation, so the feature is supported.
635- return True
636- return True
690+ return False
691+
692+ return dumped .get ("start" ) == 0 and dumped .get ("length" ) == 1
637693
638694 def _add_shar_ptr_to_batch (
639695 self ,
@@ -648,8 +704,8 @@ def _add_shar_ptr_to_batch(
648704 Schedule a Shar lazy pointer fetch.
649705
650706 When ``shar_ptr_uses_batch`` is True the request is added to the MOSS
651- ``BatchRequest `` via direct ``MossIn.model_construct`` append (see
652- :meth:`_append_moss_in` for why we bypass ``BatchRequest .add``).
707+ ``Batch `` via direct ``MossIn.model_construct`` append (see
708+ :meth:`_append_moss_in` for why we bypass ``Batch .add``).
653709 Otherwise the ``(manifest_idx, bck, provider, obj_name, offset, length)``
654710 tuple is appended to ``shar_ptr_fallback`` so :meth:`__call__` can
655711 drain it via per-object byte-range gets.
@@ -713,11 +769,11 @@ def _append_moss_in(
713769 length : Optional [int ] = None ,
714770 ) -> None :
715771 """Append one MossIn entry to the batch request, bypassing the SDK's
716- ``BatchRequest .add(bucket.object(obj_name), ...)`` path.
772+ ``Batch .add(bucket.object(obj_name), ...)`` path.
717773
718774 Why this bypass exists
719775 ----------------------
720- ``BatchRequest .add`` builds a fresh ``Bucket`` + ``BucketDetails``
776+ ``Batch .add`` builds a fresh ``Bucket`` + ``BucketDetails``
721777 (Pydantic v2) + ``Object`` + ``MossIn`` (Pydantic v2 with field
722778 aliases) per call. With ~45 manifests per minibatch in a Granary
723779 blend, profiling (nsys 2026-05-15, NVTX scope ``ais.collect_urls``)
@@ -739,7 +795,7 @@ def _append_moss_in(
739795 round-trips ``model_construct`` vs the validating constructor
740796 and asserts ``model_dump`` equality.
741797 - ``batch.request.moss_in`` is a non-public attribute. Stable
742- through 1.20.0 → 1.23 .0; bumping the SDK major version requires
798+ through 1.20.0 → 1.25 .0; bumping the SDK major version requires
743799 re-verifying that the field still exists with the same shape.
744800 """
745801 # Local imports kept local: aistore is an optional dependency and
0 commit comments