Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1465,6 +1465,7 @@ class PyRepository:
max_compressed_manifest_mem_bytes: int = 512 * 1024 * 1024,
max_concurrent_manifest_fetches: int = 500,
) -> int: ...
def inspect_snapshot(self, snapshot_id: str, *, pretty: bool = True) -> str: ...

class PySession:
@classmethod
Expand Down
3 changes: 3 additions & 0 deletions icechunk-python/python/icechunk/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1389,3 +1389,6 @@ async def total_chunks_storage_async(
max_compressed_manifest_mem_bytes=max_compressed_manifest_mem_bytes,
max_concurrent_manifest_fetches=max_concurrent_manifest_fetches,
)

def inspect_snapshot(self, snapshot_id: str, *, pretty: bool = True) -> str:
return self._repository.inspect_snapshot(snapshot_id, pretty=pretty)
14 changes: 14 additions & 0 deletions icechunk-python/src/repository.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use icechunk::{
snapshot::{ManifestFileInfo, SnapshotInfo, SnapshotProperties},
transaction_log::Diff,
},
inspect::snapshot_json,
ops::{
gc::{ExpiredRefAction, GCConfig, GCSummary, expire, garbage_collect},
manifests::rewrite_manifests,
Expand Down Expand Up @@ -1623,6 +1624,19 @@ impl PyRepository {
Ok(result)
})
}

#[pyo3(signature = (snapshot_id, *, pretty = true))]
fn inspect_snapshot(&self, snapshot_id: String, pretty: bool) -> PyResult<String> {
let result = pyo3_async_runtimes::tokio::get_runtime().block_on(async move {
let lock = self.0.read().await;
let snap = snapshot_id.try_into().map_err(PyValueError::new_err)?;
let res = snapshot_json(lock.asset_manager(), &snap, pretty)
.await
.map_err(|e| PyValueError::new_err(e.to_string()))?;
Ok::<_, PyErr>(res)
})?;
Ok(result)
}
}

fn map_credentials(
Expand Down
19 changes: 19 additions & 0 deletions icechunk-python/tests/test_inspect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json

import icechunk as ic


def test_inspect_snapshot() -> None:
repo = ic.Repository.open(
storage=ic.local_filesystem_storage("./tests/data/split-repo")
)
snap = next(repo.ancestry(branch="main")).id
pretty_str = repo.inspect_snapshot(snap, pretty=True)
non_pretty_str = repo.inspect_snapshot(snap, pretty=False)

pretty = json.loads(pretty_str)
non_pretty = json.loads(non_pretty_str)

assert pretty["id"] == snap
assert pretty_str != non_pretty_str
assert pretty == non_pretty
157 changes: 157 additions & 0 deletions icechunk/src/inspect.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
use chrono::{DateTime, Utc};
use itertools::Itertools;
use serde::{Deserialize, Serialize};

use crate::{
asset_manager::AssetManager,
format::{
SnapshotId,
manifest::ManifestRef,
snapshot::{
ManifestFileInfo, NodeData, NodeSnapshot, NodeType, SnapshotProperties,
},
},
};

#[derive(Debug, Serialize, Deserialize)]
struct ManifestFileInfoInspect {
id: String,
size_bytes: u64,
num_chunk_refs: u32,
}

impl From<ManifestFileInfo> for ManifestFileInfoInspect {
fn from(value: ManifestFileInfo) -> Self {
Self {
id: value.id.to_string(),
size_bytes: value.size_bytes,
num_chunk_refs: value.num_chunk_refs,
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct ManifestRefInspect {
id: String,
extents: Vec<(u32, u32)>,
}

impl From<ManifestRef> for ManifestRefInspect {
fn from(value: ManifestRef) -> Self {
Self {
id: value.object_id.to_string(),
extents: value.extents.iter().map(|r| (r.start, r.end)).collect(),
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct NodeSnapshotInspect {
id: String,
path: String,
node_type: String,
#[serde(skip_serializing_if = "Option::is_none")]
manifest_refs: Option<Vec<ManifestRefInspect>>,
}

impl From<NodeSnapshot> for NodeSnapshotInspect {
fn from(value: NodeSnapshot) -> Self {
Self {
id: value.id.to_string(),
path: value.path.to_string(),
node_type: match value.node_type() {
NodeType::Group => "group".to_string(),
NodeType::Array => "array".to_string(),
},
manifest_refs: match value.node_data {
NodeData::Array { manifests, .. } => {
let ms = manifests.into_iter().map(|m| m.into()).collect();
Some(ms)
}
NodeData::Group => None,
},
}
}
}

#[derive(Debug, Serialize, Deserialize)]
struct SnapshotInfoInspect {
// TODO: add fields
//path: String,
//size_bytes: u64,
id: String,
#[serde(skip_serializing_if = "Option::is_none")]
parent_id: Option<String>,
flushed_at: DateTime<Utc>,
commit_message: String,
metadata: SnapshotProperties,

manifests: Vec<ManifestFileInfoInspect>,
nodes: Vec<NodeSnapshotInspect>,
}

async fn inspect_snapshot(
asset_manager: &AssetManager,
id: &SnapshotId,
) -> Result<SnapshotInfoInspect, Box<dyn std::error::Error>> {
let snap = asset_manager.fetch_snapshot(id).await?;
let res = SnapshotInfoInspect {
id: snap.id().to_string(),
parent_id: snap.parent_id().map(|p| p.to_string()),
flushed_at: snap.flushed_at()?,
commit_message: snap.message(),
metadata: snap.metadata()?,
manifests: snap.manifest_files().map(|f| f.into()).collect(),
nodes: snap.iter().map_ok(|n| n.into()).try_collect()?,
};

Ok(res)
}

pub async fn snapshot_json(
asset_manager: &AssetManager,
id: &SnapshotId,
pretty: bool,
) -> Result<String, Box<dyn std::error::Error>> {
let info = inspect_snapshot(asset_manager, id).await?;
let res = if pretty {
serde_json::to_string_pretty(&info)
} else {
serde_json::to_string(&info)
}?;
Ok(res)
}

#[cfg(test)]
#[allow(clippy::panic, clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::{ObjectStorage, Repository, repository::VersionInfo};
use futures::{StreamExt, TryStreamExt};
use std::{path::PathBuf, sync::Arc};

#[icechunk_macros::tokio_test]
async fn test_print_snapshot() -> Result<(), Box<dyn std::error::Error>> {
let st = Arc::new(
ObjectStorage::new_local_filesystem(&PathBuf::from(
"../icechunk-python/tests/data/split-repo",
))
.await?,
);
let repo = Repository::open(None, st, Default::default()).await?;
let snap_id = repo
.ancestry(&VersionInfo::BranchTipRef("main".to_string()))
.await?
.boxed()
.try_next()
.await?
.unwrap()
.id;

let json = snapshot_json(repo.asset_manager(), &snap_id, true).await?;
let info: SnapshotInfoInspect = serde_json::from_str(json.as_str())?;
assert!(info.id == snap_id.to_string());

Ok(())
}
}
1 change: 1 addition & 0 deletions icechunk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub mod config;
pub mod conflicts;
pub mod error;
pub mod format;
pub mod inspect;
pub mod ops;
pub mod refs;
pub mod repository;
Expand Down
Loading