Skip to content

Commit a4f915a

Browse files
authored
Add class methods wrapper (#331)
* Add class methods wrapper * fix local tests * Don't override store module from rust * exceptions module * Update imports * fix store wrapper imports * Re-export _store for correct type inference * Instantiate subclasses in from_url * Ensure from_url instantiates subclasses * Ensure correct error raised * Convert tests to use store api * lint * Add obspec validation tests * use published obspec * hyperlinks
1 parent 152955a commit a4f915a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1157
-286
lines changed

docs/api/list.md

-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,3 @@
33
::: obstore.list
44
::: obstore.list_with_delimiter
55
::: obstore.list_with_delimiter_async
6-
::: obstore.ListResult
7-
::: obstore.ListStream
8-
::: obstore.ListChunkType

docs/api/store/aws.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# AWS S3
22

33
::: obstore.store.S3Store
4+
options:
5+
inherited_members: true
6+
show_bases: false
47
::: obstore.store.S3Config
58
options:
69
show_if_no_docstring: true

docs/api/store/azure.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Microsoft Azure
22

33
::: obstore.store.AzureStore
4+
options:
5+
inherited_members: true
6+
show_bases: false
47
::: obstore.store.AzureConfig
58
options:
69
show_if_no_docstring: true

docs/api/store/gcs.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Google Cloud Storage
22

33
::: obstore.store.GCSStore
4+
options:
5+
inherited_members: true
6+
show_bases: false
47
::: obstore.store.GCSConfig
58
options:
69
show_if_no_docstring: true

docs/api/store/http.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# HTTP
22

33
::: obstore.store.HTTPStore
4+
options:
5+
inherited_members: true
6+
show_bases: false

docs/api/store/local.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# Local
22

33
::: obstore.store.LocalStore
4+
options:
5+
inherited_members: true
6+
show_bases: false

docs/api/store/memory.md

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
# Memory
22

33
::: obstore.store.MemoryStore
4+
options:
5+
inherited_members: true
6+
show_bases: false

docs/cookbook.md

+13
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,19 @@ store = ... # store of your choice
3939
# Get a stream of Arrow RecordBatches of metadata
4040
list_stream = obs.list(store, prefix="data", return_arrow=True)
4141
for record_batch in list_stream:
42+
# Perform zero-copy conversion to your arrow-backed library of choice
43+
#
44+
# To pyarrow:
45+
# pyarrow.record_batch(record_batch)
46+
#
47+
# To polars:
48+
# polars.DataFrame(record_batch)
49+
#
50+
# To pandas (with Arrow-backed data-types):
51+
# pyarrow.record_batch(record_batch).to_pandas(types_mapper=pd.ArrowDtype)
52+
#
53+
# To arro3:
54+
# arro3.core.RecordBatch(record_batch)
4255
print(record_batch.num_rows)
4356
```
4457

obstore/python/obstore/__init__.py

+45-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,50 @@
11
from typing import TYPE_CHECKING
22

3-
from ._obstore import *
4-
from ._obstore import ___version
3+
from . import store
4+
from ._obstore import (
5+
Bytes,
6+
___version,
7+
copy,
8+
copy_async,
9+
delete,
10+
delete_async,
11+
get,
12+
get_async,
13+
get_range,
14+
get_range_async,
15+
get_ranges,
16+
get_ranges_async,
17+
head,
18+
head_async,
19+
list, # noqa: A004
20+
list_with_delimiter,
21+
list_with_delimiter_async,
22+
open_reader,
23+
open_reader_async,
24+
open_writer,
25+
open_writer_async,
26+
put,
27+
put_async,
28+
rename,
29+
rename_async,
30+
sign,
31+
sign_async,
32+
)
533

634
if TYPE_CHECKING:
7-
from . import exceptions, store
8-
35+
from . import _store, exceptions
36+
from ._obstore import (
37+
HTTP_METHOD,
38+
AsyncReadableFile,
39+
AsyncWritableFile,
40+
Bytes,
41+
BytesStream,
42+
GetResult,
43+
ListChunkType,
44+
ListResult,
45+
ListStream,
46+
ReadableFile,
47+
SignCapableStore,
48+
WritableFile,
49+
)
950
__version__: str = ___version()

obstore/python/obstore/_buffered.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ from obspec._attributes import Attributes
77

88
from ._bytes import Bytes
99
from ._list import ObjectMeta
10-
from .store import ObjectStore
10+
from ._store import ObjectStore
1111

1212
if sys.version_info >= (3, 12):
1313
from collections.abc import Buffer

obstore/python/obstore/_copy.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .store import ObjectStore
1+
from ._store import ObjectStore
22

33
def copy(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None:
44
"""Copy an object from one path to another in the same object store.

obstore/python/obstore/_delete.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Sequence
22

3-
from .store import ObjectStore
3+
from ._store import ObjectStore
44

55
def delete(store: ObjectStore, paths: str | Sequence[str]) -> None:
66
"""Delete the object at the specified location(s).

obstore/python/obstore/_get.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ from obspec._get import GetOptions
66

77
from ._bytes import Bytes
88
from ._list import ObjectMeta
9-
from .store import ObjectStore
9+
from ._store import ObjectStore
1010

1111
class GetResult:
1212
"""Result for a get request.

obstore/python/obstore/_list.pyi

+7-72
Original file line numberDiff line numberDiff line change
@@ -1,77 +1,12 @@
1-
# ruff: noqa: UP006
2-
# ruff: noqa: UP035
3-
# Use `list` instead of `List` for type annotation
4-
# `typing.List` is deprecated, use `list` instead
5-
# ruff: noqa: A001
6-
# Variable `list` is shadowing a Python builtinRuff
1+
# ruff: noqa: A001, UP006, UP035
72

8-
from typing import Generic, List, Literal, Self, TypedDict, TypeVar, overload
3+
from typing import List, Literal, overload
94

105
from arro3.core import RecordBatch, Table
6+
from obspec._list import ListResult, ListStream
117
from obspec._meta import ObjectMeta
128

13-
from .store import ObjectStore
14-
15-
ListChunkType = TypeVar("ListChunkType", List[ObjectMeta], RecordBatch, Table) # noqa: PYI001
16-
"""The data structure used for holding list results.
17-
18-
By default, listing APIs return a `list` of [`ObjectMeta`][obspec.ObjectMeta]. However
19-
for improved performance when listing large buckets, you can pass `return_arrow=True`.
20-
Then an Arrow `RecordBatch` will be returned instead.
21-
22-
This implements [`obspec.ListChunkType_co`][], but is redefined here to specialize the
23-
exact instance of the Arrow return type, given that in the obstore implementation, an
24-
[`arro3.core.RecordBatch`][] or [`arro3.core.Table`][] will always be returned.
25-
"""
26-
27-
class ListResult(TypedDict, Generic[ListChunkType]):
28-
"""Result of a list call.
29-
30-
Includes objects, prefixes (directories) and a token for the next set of results.
31-
Individual result sets may be limited to 1,000 objects based on the underlying
32-
object storage's limitations.
33-
34-
This implements [`obspec.ListResult`][].
35-
"""
36-
37-
common_prefixes: List[str]
38-
"""Prefixes that are common (like directories)"""
39-
40-
objects: ListChunkType
41-
"""Object metadata for the listing"""
42-
43-
class ListStream(Generic[ListChunkType]):
44-
"""A stream of [ObjectMeta][obspec.ObjectMeta] that can be polled in a sync or
45-
async fashion.
46-
47-
This implements [`obspec.ListStream`][].
48-
""" # noqa: D205
49-
50-
def __aiter__(self) -> Self:
51-
"""Return `Self` as an async iterator."""
52-
53-
def __iter__(self) -> Self:
54-
"""Return `Self` as an async iterator."""
55-
56-
async def collect_async(self) -> ListChunkType:
57-
"""Collect all remaining ObjectMeta objects in the stream.
58-
59-
This ignores the `chunk_size` parameter from the `list` call and collects all
60-
remaining data into a single chunk.
61-
"""
62-
63-
def collect(self) -> ListChunkType:
64-
"""Collect all remaining ObjectMeta objects in the stream.
65-
66-
This ignores the `chunk_size` parameter from the `list` call and collects all
67-
remaining data into a single chunk.
68-
"""
69-
70-
async def __anext__(self) -> ListChunkType:
71-
"""Return the next chunk of ObjectMeta in the stream."""
72-
73-
def __next__(self) -> ListChunkType:
74-
"""Return the next chunk of ObjectMeta in the stream."""
9+
from ._store import ObjectStore
7510

7611
@overload
7712
def list(
@@ -163,7 +98,7 @@ def list(
16398
!!! note
16499
There is no async version of this method, because `list` is not async under the
165100
hood, rather it only instantiates a stream, which can be polled in synchronous
166-
or asynchronous fashion. See [`ListStream`][obstore.ListStream].
101+
or asynchronous fashion. See [`ListStream`][obspec.ListStream].
167102
168103
Args:
169104
store: The ObjectStore instance to use.
@@ -174,8 +109,8 @@ def list(
174109
chunk_size: The number of items to collect per chunk in the returned
175110
(async) iterator. All chunks except for the last one will have this many
176111
items. This is ignored in the
177-
[`collect`][obstore.ListStream.collect] and
178-
[`collect_async`][obstore.ListStream.collect_async] methods of
112+
[`collect`][obspec.ListStream.collect] and
113+
[`collect_async`][obspec.ListStream.collect_async] methods of
179114
`ListStream`.
180115
return_arrow: If `True`, return each batch of list items as an Arrow
181116
`RecordBatch`, not as a list of Python `dict`s. Arrow removes serialization

obstore/python/obstore/_obstore.pyi

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from . import _store as _store
12
from ._buffered import AsyncReadableFile as AsyncReadableFile
23
from ._buffered import AsyncWritableFile as AsyncWritableFile
34
from ._buffered import ReadableFile as ReadableFile
@@ -21,16 +22,14 @@ from ._get import get_ranges as get_ranges
2122
from ._get import get_ranges_async as get_ranges_async
2223
from ._head import head as head
2324
from ._head import head_async as head_async
24-
from ._list import ListChunkType as ListChunkType
25-
from ._list import ListResult as ListResult
26-
from ._list import ListStream as ListStream
2725
from ._list import list as list # noqa: A004
2826
from ._list import list_with_delimiter as list_with_delimiter
2927
from ._list import list_with_delimiter_async as list_with_delimiter_async
3028
from ._put import put as put
3129
from ._put import put_async as put_async
3230
from ._rename import rename as rename
3331
from ._rename import rename_async as rename_async
32+
from ._scheme import parse_scheme as parse_scheme
3433
from ._sign import HTTP_METHOD as HTTP_METHOD
3534
from ._sign import SignCapableStore as SignCapableStore
3635
from ._sign import sign as sign

obstore/python/obstore/_rename.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .store import ObjectStore
1+
from ._store import ObjectStore
22

33
def rename(store: ObjectStore, from_: str, to: str, *, overwrite: bool = True) -> None:
44
"""Move an object from one path to another in the same object store.

obstore/python/obstore/_scheme.pyi

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from typing import Literal
2+
3+
def parse_scheme(
4+
url: str,
5+
) -> Literal["s3", "gcs", "http", "local", "memory", "azure"]: ...

obstore/python/obstore/store/__init__.pyi renamed to obstore/python/obstore/_store/__init__.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# TODO: move to reusable types package
22
from collections.abc import Callable
33
from pathlib import Path
4-
from typing import Any, TypeAlias, Unpack, overload
4+
from typing import Any, Self, TypeAlias, Unpack, overload
55

66
from ._aws import S3Config as S3Config
77
from ._aws import S3Credential as S3Credential
@@ -154,7 +154,7 @@ class LocalStore:
154154
*,
155155
automatic_cleanup: bool = False,
156156
mkdir: bool = False,
157-
) -> LocalStore:
157+
) -> Self:
158158
"""Construct a new LocalStore from a `file://` URL.
159159
160160
**Examples:**

obstore/python/obstore/store/_aws.pyi renamed to obstore/python/obstore/_store/_aws.pyi

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
from collections.abc import Coroutine
22
from datetime import datetime
3-
from typing import Any, Literal, NotRequired, Protocol, TypeAlias, TypedDict, Unpack
3+
from typing import (
4+
Any,
5+
Literal,
6+
NotRequired,
7+
Protocol,
8+
Self,
9+
TypeAlias,
10+
TypedDict,
11+
Unpack,
12+
)
413

514
from ._client import ClientConfig
615
from ._retry import RetryConfig
@@ -501,7 +510,7 @@ class S3Store:
501510
retry_config: RetryConfig | None = None,
502511
credential_provider: S3CredentialProvider | None = None,
503512
**kwargs: Unpack[S3Config],
504-
) -> S3Store:
513+
) -> Self:
505514
"""Parse available connection info from a well-known storage URL.
506515
507516
The supported url schemes are:

obstore/python/obstore/store/_azure.pyi renamed to obstore/python/obstore/_store/_azure.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Coroutine
22
from datetime import datetime
3-
from typing import Any, Protocol, TypeAlias, TypedDict, Unpack
3+
from typing import Any, Protocol, Self, TypeAlias, TypedDict, Unpack
44

55
from ._client import ClientConfig
66
from ._retry import RetryConfig
@@ -353,7 +353,7 @@ class AzureStore:
353353
retry_config: RetryConfig | None = None,
354354
credential_provider: AzureCredentialProvider | None = None,
355355
**kwargs: Unpack[AzureConfig],
356-
) -> AzureStore:
356+
) -> Self:
357357
"""Construct a new AzureStore with values populated from a well-known storage URL.
358358
359359
The supported url schemes are:

obstore/python/obstore/store/_gcs.pyi renamed to obstore/python/obstore/_store/_gcs.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Coroutine
22
from datetime import datetime
3-
from typing import Any, Protocol, TypedDict, Unpack
3+
from typing import Any, Protocol, Self, TypedDict, Unpack
44

55
from ._client import ClientConfig
66
from ._retry import RetryConfig
@@ -164,7 +164,7 @@ class GCSStore:
164164
retry_config: RetryConfig | None = None,
165165
credential_provider: GCSCredentialProvider | None = None,
166166
**kwargs: Unpack[GCSConfig],
167-
) -> GCSStore:
167+
) -> Self:
168168
"""Construct a new GCSStore with values populated from a well-known storage URL.
169169
170170
The supported url schemes are:

0 commit comments

Comments
 (0)