Skip to content

Commit a0be3c8

Browse files
authored
readonly_session("main", as_of=...) to open a branch at a timestamp (#768)
1 parent 474e004 commit a0be3c8

File tree

5 files changed

+88
-15
lines changed

5 files changed

+88
-15
lines changed

icechunk-python/python/icechunk/_icechunk_python.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -983,6 +983,7 @@ class PyRepository:
983983
*,
984984
tag: str | None = None,
985985
snapshot_id: str | None = None,
986+
as_of: datetime.datetime | None = None,
986987
) -> PySession: ...
987988
def writable_session(self, branch: str) -> PySession: ...
988989
def expire_snapshots(

icechunk-python/python/icechunk/repository.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ def readonly_session(
437437
*,
438438
tag: str | None = None,
439439
snapshot_id: str | None = None,
440+
as_of: datetime.datetime | None = None,
440441
) -> Session:
441442
"""
442443
Create a read-only session.
@@ -453,6 +454,9 @@ def readonly_session(
453454
If provided, the tag to create the session on.
454455
snapshot_id : str, optional
455456
If provided, the snapshot ID to create the session on.
457+
as_of: datetime.datetime, optional
458+
When combined with the branch argument, it will open the session at the last
459+
snapshot that is at or before this datetime
456460
457461
Returns
458462
-------
@@ -465,7 +469,7 @@ def readonly_session(
465469
"""
466470
return Session(
467471
self._repository.readonly_session(
468-
branch=branch, tag=tag, snapshot_id=snapshot_id
472+
branch=branch, tag=tag, snapshot_id=snapshot_id, as_of=as_of
469473
)
470474
)
471475

icechunk-python/src/repository.rs

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ impl PyRepository {
517517
let repo = Arc::clone(&self.0);
518518
// This function calls block_on, so we need to allow other thread python to make progress
519519
py.allow_threads(move || {
520-
let version = args_to_version_info(branch, tag, snapshot_id)?;
520+
let version = args_to_version_info(branch, tag, snapshot_id, None)?;
521521
let ancestry = pyo3_async_runtimes::tokio::get_runtime()
522522
.block_on(async move { repo.ancestry_arc(&version).await })
523523
.map_err(PyIcechunkStoreError::RepositoryError)?
@@ -701,8 +701,8 @@ impl PyRepository {
701701
to_tag: Option<String>,
702702
to_snapshot_id: Option<String>,
703703
) -> PyResult<PyDiff> {
704-
let from = args_to_version_info(from_branch, from_tag, from_snapshot_id)?;
705-
let to = args_to_version_info(to_branch, to_tag, to_snapshot_id)?;
704+
let from = args_to_version_info(from_branch, from_tag, from_snapshot_id, None)?;
705+
let to = args_to_version_info(to_branch, to_tag, to_snapshot_id, None)?;
706706

707707
// This function calls block_on, so we need to allow other thread python to make progress
708708
py.allow_threads(move || {
@@ -717,17 +717,18 @@ impl PyRepository {
717717
})
718718
}
719719

720-
#[pyo3(signature = (*, branch = None, tag = None, snapshot_id = None))]
720+
#[pyo3(signature = (*, branch = None, tag = None, snapshot_id = None, as_of = None))]
721721
pub fn readonly_session(
722722
&self,
723723
py: Python<'_>,
724724
branch: Option<String>,
725725
tag: Option<String>,
726726
snapshot_id: Option<String>,
727+
as_of: Option<DateTime<Utc>>,
727728
) -> PyResult<PySession> {
728729
// This function calls block_on, so we need to allow other thread python to make progress
729730
py.allow_threads(move || {
730-
let version = args_to_version_info(branch, tag, snapshot_id)?;
731+
let version = args_to_version_info(branch, tag, snapshot_id, as_of)?;
731732
let session =
732733
pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
733734
self.0
@@ -841,6 +842,7 @@ fn args_to_version_info(
841842
branch: Option<String>,
842843
tag: Option<String>,
843844
snapshot: Option<String>,
845+
as_of: Option<DateTime<Utc>>,
844846
) -> PyResult<VersionInfo> {
845847
let n = [&branch, &tag, &snapshot].iter().filter(|r| !r.is_none()).count();
846848
if n > 1 {
@@ -849,8 +851,18 @@ fn args_to_version_info(
849851
));
850852
}
851853

852-
if let Some(branch_name) = branch {
853-
Ok(VersionInfo::BranchTipRef(branch_name))
854+
if as_of.is_some() && branch.is_none() {
855+
return Err(PyValueError::new_err(
856+
"as_of argument must be provided together with a branch name",
857+
));
858+
}
859+
860+
if let Some(branch) = branch {
861+
if let Some(at) = as_of {
862+
Ok(VersionInfo::AsOf { branch, at })
863+
} else {
864+
Ok(VersionInfo::BranchTipRef(branch))
865+
}
854866
} else if let Some(tag_name) = tag {
855867
Ok(VersionInfo::TagRef(tag_name))
856868
} else if let Some(snapshot_id) = snapshot {

icechunk-python/tests/test_timetravel.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,38 @@ async def test_tag_delete() -> None:
258258

259259
with pytest.raises(ValueError):
260260
repo.create_tag("tag", snap)
261+
262+
263+
async def test_session_with_as_of() -> None:
264+
repo = ic.Repository.create(
265+
storage=ic.in_memory_storage(),
266+
)
267+
268+
session = repo.writable_session("main")
269+
store = session.store
270+
271+
times = []
272+
group = zarr.group(store=store, overwrite=True)
273+
sid = session.commit("root")
274+
times.append(next(repo.ancestry(snapshot_id=sid)).written_at)
275+
276+
for i in range(5):
277+
session = repo.writable_session("main")
278+
store = session.store
279+
group = zarr.open_group(store=store)
280+
group.create_group(f"child {i}")
281+
sid = session.commit(f"child {i}")
282+
times.append(next(repo.ancestry(snapshot_id=sid)).written_at)
283+
284+
ancestry = list(p for p in repo.ancestry(branch="main"))
285+
assert len(ancestry) == 7 # initial + root + 5 children
286+
287+
store = repo.readonly_session("main", as_of=times[-1]).store
288+
group = zarr.open_group(store=store, mode="r")
289+
290+
for i, time in enumerate(times):
291+
store = repo.readonly_session("main", as_of=time).store
292+
group = zarr.open_group(store=store, mode="r")
293+
expected_children = {f"child {j}" for j in range(i)}
294+
actual_children = {g[0] for g in group.members()}
295+
assert expected_children == actual_children

icechunk/src/repository.rs

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use std::{
55
sync::Arc,
66
};
77

8+
use async_recursion::async_recursion;
89
use bytes::Bytes;
10+
use chrono::{DateTime, Utc};
911
use err_into::ErrorInto as _;
1012
use futures::{
1113
stream::{FuturesOrdered, FuturesUnordered},
@@ -37,15 +39,13 @@ use crate::{
3739
Storage, StorageError,
3840
};
3941

40-
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
42+
#[derive(Debug, Clone, PartialEq, Eq)]
4143
#[non_exhaustive]
4244
pub enum VersionInfo {
43-
#[serde(rename = "snapshot_id")]
4445
SnapshotId(SnapshotId),
45-
#[serde(rename = "tag")]
4646
TagRef(String),
47-
#[serde(rename = "branch")]
4847
BranchTipRef(String),
48+
AsOf { branch: String, at: DateTime<Utc> },
4949
}
5050

5151
#[derive(Debug, Error)]
@@ -60,6 +60,8 @@ pub enum RepositoryErrorKind {
6060

6161
#[error("snapshot not found: `{id}`")]
6262
SnapshotNotFound { id: SnapshotId },
63+
#[error("branch {branch} does not have a snapshots before or at {at}")]
64+
InvalidAsOfSpec { branch: String, at: DateTime<Utc> },
6365
#[error("invalid snapshot id: `{0}`")]
6466
InvalidSnapshotId(String),
6567
#[error("tag error: `{0}`")]
@@ -404,11 +406,12 @@ impl Repository {
404406
}
405407

406408
/// Returns the sequence of parents of the snapshot pointed by the given version
409+
#[async_recursion(?Send)]
407410
#[instrument(skip(self))]
408-
pub async fn ancestry(
409-
&self,
411+
pub async fn ancestry<'a>(
412+
&'a self,
410413
version: &VersionInfo,
411-
) -> RepositoryResult<impl Stream<Item = RepositoryResult<SnapshotInfo>> + '_> {
414+
) -> RepositoryResult<impl Stream<Item = RepositoryResult<SnapshotInfo>> + 'a> {
412415
let snapshot_id = self.resolve_version(version).await?;
413416
self.snapshot_ancestry(&snapshot_id).await
414417
}
@@ -572,6 +575,24 @@ impl Repository {
572575
.await?;
573576
Ok(ref_data.snapshot)
574577
}
578+
VersionInfo::AsOf { branch, at } => {
579+
let tip = VersionInfo::BranchTipRef(branch.clone());
580+
let snap = self
581+
.ancestry(&tip)
582+
.await?
583+
.try_skip_while(|parent| ready(Ok(&parent.flushed_at > at)))
584+
.take(1)
585+
.try_collect::<Vec<_>>()
586+
.await?;
587+
match snap.into_iter().next() {
588+
Some(snap) => Ok(snap.id),
589+
None => Err(RepositoryErrorKind::InvalidAsOfSpec {
590+
branch: branch.clone(),
591+
at: *at,
592+
}
593+
.into()),
594+
}
595+
}
575596
}
576597
}
577598

0 commit comments

Comments
 (0)