Skip to content

Commit b5b801f

Browse files
authored
Add Obstore-backed remote file type and facade (#16)
* feat: add obstore remote file metadata and facade * feat: refine remote file metadata and model-bound store * build: updated the project dependencies * build: updated the project dependencies * fix: harden remote file cleanup and upload metadata * feat: move remote file ops onto model instances * refactor: rename remote file and storage types * refactor: consolidate file support into types.file * refactor: split file module into file storage and helpers * refactor: replace remote file mapper hooks with field handles * refactor: use walrus operator in file storage guards * refactor: split remote metadata from file handle api * refactor: split remote file handle and rename file extra * chore: deduplicate all dependency group extras * refactor: require primary key mixin for remote files * refactor: rename file metadata api and centralize utc helpers * refactor: freeze remote file handle and rename file modules * refactor: move obstore ops onto remotefile * refactor: remove unused remote metadata fields * refactor: remove deleted metadata status * refactor: move file lifecycle listeners to singleton * test: reorganize file type tests and add integration coverage * test: add async deferred delete lifecycle coverage * fix: restore deferred putresult contract for file uploads * fix: require sqlalchemy session for remote file mutations * fix: harden deferred file metadata persistence consistency * chore: address pr comments
1 parent fb46a7d commit b5b801f

22 files changed

+3569
-91
lines changed

examples/__init__.py

Whitespace-only changes.

examples/file.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from __future__ import annotations
2+
3+
import asyncio
4+
5+
from sqlalchemy import create_engine
6+
from sqlalchemy.orm import Mapped, Session, mapped_column
7+
8+
from brussels.base import DataclassBase
9+
from brussels.mixins import PrimaryKeyMixin
10+
11+
try:
12+
from obstore.store import MemoryStore # ty: ignore[unresolved-import]
13+
14+
from brussels.types.file import RemoteFile, RemoteMetadata, RemoteStorage
15+
except ImportError as exc:
16+
msg = "This example requires optional dependencies. Install with: pip install 'brussels[file]'"
17+
raise SystemExit(msg) from exc
18+
19+
20+
class Document(DataclassBase, PrimaryKeyMixin):
21+
__tablename__ = "documents"
22+
23+
file: Mapped[RemoteMetadata | None] = mapped_column(
24+
RemoteStorage(
25+
store=MemoryStore(),
26+
),
27+
nullable=True,
28+
default=None,
29+
)
30+
31+
32+
async def main() -> None:
33+
engine = create_engine("sqlite:///:memory:")
34+
DataclassBase.metadata.create_all(engine)
35+
36+
with Session(engine) as session:
37+
doc = Document()
38+
session.add(doc)
39+
session.flush() # ensure doc.id exists before upload
40+
41+
remote_file = RemoteFile.from_metadata(doc, Document.file)
42+
await remote_file.put_async(
43+
b"hello world",
44+
content_type="text/plain",
45+
)
46+
if remote_file.metadata is None or remote_file.metadata.key != f"{doc.id}/file":
47+
msg = "Unexpected key generated for uploaded file."
48+
raise RuntimeError(msg)
49+
50+
content = await remote_file.get_async()
51+
if bytes(content.bytes()) != b"hello world":
52+
msg = "Unexpected content returned from remote file download."
53+
raise RuntimeError(msg)
54+
55+
await remote_file.delete_async()
56+
session.commit()
57+
if doc.file is not None:
58+
msg = "File metadata should be cleared after delete."
59+
raise RuntimeError(msg)
60+
61+
62+
if __name__ == "__main__":
63+
asyncio.run(main())

pyproject.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ encrypted = [
1818
alembic = [
1919
"alembic-postgresql-enum",
2020
]
21+
file = [
22+
"obstore",
23+
"pydantic>=2,<3",
24+
]
2125

2226
[build-system]
2327
requires = ["uv_build>=0.10.0,<0.11.0"]
@@ -38,8 +42,7 @@ dev = [
3842
"ty>=0.0.1a27",
3943
]
4044
all = [
41-
"alembic-postgresql-enum",
42-
"cryptography>=46.0.4",
45+
"brussels[alembic,encrypted,file]",
4346
]
4447

4548
[tool.uv.build-backend]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from __future__ import annotations
2+
3+
from datetime import UTC, datetime, timedelta, timezone
4+
5+
import pytest
6+
7+
from brussels.utils import now, utc
8+
9+
10+
def test_utc_converts_aware_datetime_to_utc() -> None:
11+
value = datetime(2024, 1, 1, 12, 0, tzinfo=timezone(timedelta(hours=5)))
12+
13+
result = utc(value)
14+
15+
assert result.tzinfo is UTC
16+
assert result == datetime(2024, 1, 1, 7, 0, tzinfo=UTC)
17+
18+
19+
def test_utc_raises_for_naive_datetime_by_default() -> None:
20+
value = datetime(2024, 1, 1, 12, 0, tzinfo=UTC).replace(tzinfo=None)
21+
22+
with pytest.raises(ValueError, match="timezone-aware"):
23+
utc(value)
24+
25+
26+
def test_utc_coerces_naive_datetime_when_configured() -> None:
27+
value = datetime(2024, 1, 1, 12, 0, tzinfo=UTC).replace(tzinfo=None)
28+
29+
result = utc(value, raise_on_naive=False)
30+
31+
assert result.tzinfo is UTC
32+
assert result == datetime(2024, 1, 1, 12, 0, tzinfo=UTC)
33+
34+
35+
def test_now_returns_utc_aware_datetime() -> None:
36+
result = now()
37+
38+
assert result.tzinfo is UTC
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
import pytest
6+
from sqlalchemy import Engine, create_engine
7+
8+
from brussels.base import DataclassBase
9+
10+
try:
11+
import brussels.types.file # noqa: F401
12+
except ImportError:
13+
pytest.skip("files optional dependencies not installed", allow_module_level=True)
14+
15+
if TYPE_CHECKING:
16+
from collections.abc import Iterator
17+
18+
19+
class FakeStoreOps:
20+
def __init__(self) -> None:
21+
self.calls: list[tuple[str, tuple[object, ...], dict[str, object]]] = []
22+
self.put_error: Exception | None = None
23+
self.delete_error: Exception | None = None
24+
self.put_response: object = {
25+
"size_bytes": 5,
26+
"content_type": "text/plain",
27+
"etag": "etag-123",
28+
"checksum": "sum-123",
29+
"version": "v1",
30+
}
31+
32+
def _record(self, name: str, args: tuple[object, ...], kwargs: dict[str, object]) -> None:
33+
self.calls.append((name, args, kwargs))
34+
35+
def put(
36+
self,
37+
path: str,
38+
file: object,
39+
*,
40+
attributes: object | None = None,
41+
tags: dict[str, str] | None = None,
42+
mode: object | None = None,
43+
use_multipart: bool | None = None,
44+
chunk_size: int = 5 * 1024 * 1024,
45+
max_concurrency: int = 12,
46+
) -> object:
47+
self._record(
48+
"put",
49+
(path, file),
50+
{
51+
"attributes": attributes,
52+
"tags": tags,
53+
"mode": mode,
54+
"use_multipart": use_multipart,
55+
"chunk_size": chunk_size,
56+
"max_concurrency": max_concurrency,
57+
},
58+
)
59+
if self.put_error is not None:
60+
raise self.put_error
61+
return self.put_response
62+
63+
async def put_async(
64+
self,
65+
path: str,
66+
file: object,
67+
*,
68+
attributes: object | None = None,
69+
tags: dict[str, str] | None = None,
70+
mode: object | None = None,
71+
use_multipart: bool | None = None,
72+
chunk_size: int = 5 * 1024 * 1024,
73+
max_concurrency: int = 12,
74+
) -> object:
75+
self._record(
76+
"put_async",
77+
(path, file),
78+
{
79+
"attributes": attributes,
80+
"tags": tags,
81+
"mode": mode,
82+
"use_multipart": use_multipart,
83+
"chunk_size": chunk_size,
84+
"max_concurrency": max_concurrency,
85+
},
86+
)
87+
if self.put_error is not None:
88+
raise self.put_error
89+
return self.put_response
90+
91+
def get(self, path: str, *, options: object | None = None) -> object:
92+
self._record("get", (path,), {"options": options})
93+
return b"downloaded-sync"
94+
95+
async def get_async(self, path: str, *, options: object | None = None) -> object:
96+
self._record("get_async", (path,), {"options": options})
97+
return b"downloaded-async"
98+
99+
def get_range(self, path: str, *, start: int, end: int | None = None, length: int | None = None) -> object:
100+
self._record("get_range", (path,), {"start": start, "end": end, "length": length})
101+
return b"range-sync"
102+
103+
async def get_range_async(
104+
self,
105+
path: str,
106+
*,
107+
start: int,
108+
end: int | None = None,
109+
length: int | None = None,
110+
) -> object:
111+
self._record("get_range_async", (path,), {"start": start, "end": end, "length": length})
112+
return b"range-async"
113+
114+
def delete(self, paths: str | tuple[str, ...] | list[str]) -> None:
115+
self._record("delete", (paths,), {})
116+
if self.delete_error is not None:
117+
raise self.delete_error
118+
119+
async def delete_async(self, paths: str | tuple[str, ...] | list[str]) -> None:
120+
self._record("delete_async", (paths,), {})
121+
if self.delete_error is not None:
122+
raise self.delete_error
123+
124+
125+
@pytest.fixture
126+
def engine() -> Iterator[Engine]:
127+
engine = create_engine("sqlite:///:memory:")
128+
DataclassBase.metadata.create_all(engine)
129+
try:
130+
yield engine
131+
finally:
132+
engine.dispose()

0 commit comments

Comments
 (0)