Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,10 @@ class PyRepository:
max_compressed_manifest_mem_bytes: int = 512 * 1024 * 1024,
max_concurrent_manifest_fetches: int = 500,
) -> int: ...
def inspect_snapshot(self, snapshot_id: str, *, pretty: bool = True) -> str: ...
async def inspect_snapshot_async(
self, snapshot_id: str, *, pretty: bool = True
) -> str: ...

class PySession:
@classmethod
Expand Down
8 changes: 8 additions & 0 deletions icechunk-python/python/icechunk/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,3 +1389,11 @@ async def total_chunks_storage_async(
max_compressed_manifest_mem_bytes=max_compressed_manifest_mem_bytes,
max_concurrent_manifest_fetches=max_concurrent_manifest_fetches,
)

def inspect_snapshot(self, snapshot_id: str, *, pretty: bool = True) -> str:
return self._repository.inspect_snapshot(snapshot_id, pretty=pretty)

async def inspect_snapshot_async(
self, snapshot_id: str, *, pretty: bool = True
) -> str:
return await self._repository.inspect_snapshot_async(snapshot_id, pretty=pretty)
39 changes: 38 additions & 1 deletion icechunk-python/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@ use icechunk::{
snapshot::{ManifestFileInfo, SnapshotInfo, SnapshotProperties},
transaction_log::Diff,
},
inspect::snapshot_json,
ops::{
gc::{ExpiredRefAction, GCConfig, GCSummary, expire, garbage_collect},
manifests::rewrite_manifests,
stats::repo_chunks_storage,
},
repository::{RepositoryErrorKind, VersionInfo},
repository::{RepositoryError, RepositoryErrorKind, VersionInfo},
};
use pyo3::{
IntoPyObjectExt,
Expand Down Expand Up @@ -1623,6 +1624,42 @@ impl PyRepository {
Ok(result)
})
}

#[pyo3(signature = (snapshot_id, *, pretty = true))]
fn inspect_snapshot(&self, snapshot_id: String, pretty: bool) -> PyResult<String> {
let result = pyo3_async_runtimes::tokio::get_runtime()
.block_on(async move {
let lock = self.0.read().await;
let snap = SnapshotId::try_from(snapshot_id.as_str())
.map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
let res = snapshot_json(lock.asset_manager(), &snap, pretty).await?;
Ok(res)
})
.map_err(PyIcechunkStoreError::RepositoryError)?;
Ok(result)
}

#[pyo3(signature = (snapshot_id, *, pretty = true))]
fn inspect_snapshot_async<'py>(
&self,
py: Python<'py>,
snapshot_id: String,
pretty: bool,
) -> PyResult<Bound<'py, PyAny>> {
let repository = self.0.clone();
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let lock = repository.read().await;
let snap = SnapshotId::try_from(snapshot_id.as_str())
.map_err(|e| {
RepositoryError::from(RepositoryErrorKind::Other(e.to_string()))
})
.map_err(PyIcechunkStoreError::RepositoryError)?;
let res = snapshot_json(lock.asset_manager(), &snap, pretty)
.await
.map_err(PyIcechunkStoreError::RepositoryError)?;
Ok(res)
})
}
}

fn map_credentials(
Expand Down
35 changes: 35 additions & 0 deletions icechunk-python/tests/test_inspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json

import icechunk as ic


def test_inspect_snapshot() -> None:
repo = ic.Repository.open(
storage=ic.local_filesystem_storage("./tests/data/split-repo")
)
snap = next(repo.ancestry(branch="main")).id
pretty_str = repo.inspect_snapshot(snap, pretty=True)
non_pretty_str = repo.inspect_snapshot(snap, pretty=False)

pretty = json.loads(pretty_str)
non_pretty = json.loads(non_pretty_str)

assert pretty["id"] == snap
assert pretty_str != non_pretty_str
assert pretty == non_pretty


async def test_inspect_snapshot_async() -> None:
repo = await ic.Repository.open_async(
storage=ic.local_filesystem_storage("./tests/data/split-repo")
)
snap = next(repo.ancestry(branch="main")).id
pretty_str = await repo.inspect_snapshot_async(snap, pretty=True)
non_pretty_str = await repo.inspect_snapshot_async(snap, pretty=False)

pretty = json.loads(pretty_str)
non_pretty = json.loads(non_pretty_str)

assert pretty["id"] == snap
assert pretty_str != non_pretty_str
assert pretty == non_pretty
159 changes: 159 additions & 0 deletions icechunk/src/inspect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
use chrono::{DateTime, Utc};
use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::{
asset_manager::AssetManager,
format::{
SnapshotId,
manifest::ManifestRef,
snapshot::{
ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties,
},
},
repository::{RepositoryErrorKind, RepositoryResult},
};

#[derive(Debug, Serialize, Deserialize)]
struct ManifestFileInfoInspect {
id: String,
size_bytes: u64,
num_chunk_refs: u32,
}

impl From<ManifestFileInfo> for ManifestFileInfoInspect {
fn from(value: ManifestFileInfo) -> Self {
Self {
id: value.id.to_string(),
size_bytes: value.size_bytes,
num_chunk_refs: value.num_chunk_refs,
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct ManifestRefInspect {
id: String,
extents: Vec<(u32, u32)>,
}

impl From<ManifestRef> for ManifestRefInspect {
fn from(value: ManifestRef) -> Self {
Self {
id: value.object_id.to_string(),
extents: value.extents.iter().map(|r| (r.start, r.end)).collect(),
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct NodeSnapshotInspect {
id: String,
path: String,
node_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
manifest_refs: Option<Vec<ManifestRefInspect>>,
}

impl From<NodeSnapshot> for NodeSnapshotInspect {
fn from(value: NodeSnapshot) -> Self {
Self {
id: value.id.to_string(),
path: value.path.to_string(),
node_type: match value.node_type() {
NodeType::Group => "group".to_string(),
NodeType::Array => "array".to_string(),
},
manifest_refs: match value.node_data {
NodeData::Array { manifests, .. } => {
let ms = manifests.into_iter().map(|m| m.into()).collect();
Some(ms)
}
NodeData::Group => None,
},
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct SnapshotInfoInspect {
// TODO: add fields
//path: String,
//size_bytes: u64,
id: String,
#[serde(skip_serializing_if = "Option::is_none")]
parent_id: Option<String>,
flushed_at: DateTime<Utc>,
commit_message: String,
metadata: SnapshotProperties,

manifests: Vec<ManifestFileInfoInspect>,
nodes: Vec<NodeSnapshotInspect>,
}

async fn inspect_snapshot(
asset_manager: &AssetManager,
id: &SnapshotId,
) -> RepositoryResult<SnapshotInfoInspect> {
let snap = asset_manager.fetch_snapshot(id).await?;
let res = SnapshotInfoInspect {
id: snap.id().to_string(),
parent_id: snap.parent_id().map(|p| p.to_string()),
flushed_at: snap.flushed_at()?,
commit_message: snap.message(),
metadata: snap.metadata()?,
manifests: snap.manifest_files().map(|f| f.into()).collect(),
nodes: snap.iter().map_ok(|n| n.into()).try_collect()?,
};

Ok(res)
}

pub async fn snapshot_json(
asset_manager: &AssetManager,
id: &SnapshotId,
pretty: bool,
) -> RepositoryResult<String> {
let info = inspect_snapshot(asset_manager, id).await?;
let res = if pretty {
serde_json::to_string_pretty(&info)
} else {
serde_json::to_string(&info)
}
.map_err(|e| RepositoryErrorKind::Other(e.to_string()))?;
Ok(res)
}

#[cfg(test)]
#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::{ObjectStorage, Repository, repository::VersionInfo};
use futures::{StreamExt, TryStreamExt};
use std::{path::PathBuf, sync::Arc};

#[icechunk_macros::tokio_test]
async fn test_print_snapshot() -> Result<(), Box<dyn std::error::Error>> {
let st = Arc::new(
ObjectStorage::new_local_filesystem(&PathBuf::from(
"../icechunk-python/tests/data/split-repo",
))
.await?,
);
let repo = Repository::open(None, st, Default::default()).await?;
let snap_id = repo
.ancestry(&VersionInfo::BranchTipRef("main".to_string()))
.await?
.boxed()
.try_next()
.await?
.unwrap()
.id;

let json = snapshot_json(repo.asset_manager(), &snap_id, true).await?;
let info: SnapshotInfoInspect = serde_json::from_str(json.as_str())?;
assert!(info.id == snap_id.to_string());

Ok(())
}
}
1 change: 1 addition & 0 deletions icechunk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub mod config;
pub mod conflicts;
pub mod error;
pub mod format;
pub mod inspect;
pub mod ops;
pub mod refs;
pub mod repository;
Expand Down
Loading