Skip to content

Commit 5e201c7

Browse files
authored
feat: add lance_dataset_versions for listing dataset version history (#17)
## Summary - Adds `lance_dataset_versions` — returns an opaque `LanceVersions` snapshot - Accessors: `lance_versions_count`, `lance_versions_id_at`, `lance_versions_timestamp_ms_at`, `lance_versions_close` - C++: new `lance::VersionInfo` struct and a `lance::Dataset::versions()` member returning `std::vector<VersionInfo>` ## Motivation C/C++ callers can read the current version and the latest version today, but there's no way to list the full history. This covers the read side of version management and is a prerequisite for restore (#12). ## Notes - Handle pattern matches `LanceScanner` / `LanceBatch` (opaque handle + index-based accessors). - Each entry carries the monotonic version id and a Unix epoch millisecond timestamp. - Per-version metadata and tags are out of scope here — they're separate features. ## Test plan - `cargo test` — 7 new tests: single-version, multi-version ordering, NULL on each entrypoint, out-of-range index (boundary + far), close-null safety - `cargo clippy --all-targets -- -D warnings` and `cargo fmt --check` clean - `cargo test --test compile_and_run_test -- --ignored` — C and C++ smoke tests iterate the snapshot Closes #11.
1 parent d721501 commit 5e201c7

7 files changed

Lines changed: 362 additions & 1 deletion

File tree

include/lance.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,10 @@ void lance_free_string(const char* s);
9191

9292
/* ─── Opaque handles ─── */
9393

94-
typedef struct LanceDataset LanceDataset;
94+
typedef struct LanceDataset LanceDataset;
9595
typedef struct LanceScanner LanceScanner;
9696
typedef struct LanceBatch LanceBatch;
97+
typedef struct LanceVersions LanceVersions;
9798

9899
/* ─── Dataset lifecycle ─── */
99100

@@ -125,6 +126,35 @@ uint64_t lance_dataset_count_rows(const LanceDataset* dataset);
125126
/** Return the latest version ID (I/O). Returns 0 on error. */
126127
uint64_t lance_dataset_latest_version(const LanceDataset* dataset);
127128

129+
/* ─── Version history ─── */
130+
131+
/**
132+
* Snapshot the dataset's version history. Caller frees the returned handle
133+
* with lance_versions_close().
134+
* @return handle on success, or NULL on error
135+
*/
136+
LanceVersions* lance_dataset_versions(const LanceDataset* dataset);
137+
138+
/** Number of versions in the snapshot. Returns 0 on error. */
139+
uint64_t lance_versions_count(const LanceVersions* versions);
140+
141+
/**
142+
* Monotonic version id at `index` (0 <= index < count).
143+
* Returns 0 on error (NULL handle or out-of-range index) — check
144+
* lance_last_error_code().
145+
*/
146+
uint64_t lance_versions_id_at(const LanceVersions* versions, size_t index);
147+
148+
/**
149+
* Version timestamp at `index`, as Unix epoch milliseconds.
150+
* Returns 0 on error (NULL handle or out-of-range index) — check
151+
* lance_last_error_code().
152+
*/
153+
int64_t lance_versions_timestamp_ms_at(const LanceVersions* versions, size_t index);
154+
155+
/** Close and free a versions handle. Safe to call with NULL. */
156+
void lance_versions_close(LanceVersions* versions);
157+
128158
/**
129159
* Export the dataset schema via Arrow C Data Interface.
130160
* @param out Pointer to caller-allocated ArrowSchema struct

include/lance.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ class Handle {
8484

8585
class Scanner;
8686

87+
// ─── Version history ─────────────────────────────────────────────────────────
88+
89+
/// Metadata for a single dataset version.
90+
/// `id` mirrors the upstream Version::version (monotonic manifest version);
91+
/// `timestamp_ms` is Unix epoch milliseconds.
92+
struct VersionInfo {
93+
uint64_t id;
94+
int64_t timestamp_ms;
95+
};
96+
8797
// ─── Dataset ─────────────────────────────────────────────────────────────────
8898

8999
class Dataset {
@@ -131,6 +141,27 @@ class Dataset {
131141
return v;
132142
}
133143

144+
/// Snapshot the dataset's version history, ordered by version id.
145+
/// Throws lance::Error on failure.
146+
std::vector<VersionInfo> versions() const {
147+
auto* raw = lance_dataset_versions(handle_.get());
148+
if (!raw) check_error();
149+
Handle<LanceVersions, lance_versions_close> snap(raw);
150+
151+
uint64_t n = lance_versions_count(snap.get());
152+
std::vector<VersionInfo> out;
153+
out.reserve(static_cast<size_t>(n));
154+
for (uint64_t i = 0; i < n; i++) {
155+
VersionInfo info;
156+
info.id = lance_versions_id_at(snap.get(), static_cast<size_t>(i));
157+
info.timestamp_ms =
158+
lance_versions_timestamp_ms_at(snap.get(), static_cast<size_t>(i));
159+
if (lance_last_error_code() != LANCE_OK) check_error();
160+
out.push_back(info);
161+
}
162+
return out;
163+
}
164+
134165
/// Export the schema as an Arrow C Data Interface struct.
135166
void schema(ArrowSchema* out) const {
136167
if (lance_dataset_schema(handle_.get(), out) != 0) {

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ mod fragment_writer;
2323
mod helpers;
2424
pub mod runtime;
2525
mod scanner;
26+
mod versions;
2627

2728
// Re-export all extern "C" symbols so they appear in the cdylib.
2829
pub use batch::*;
@@ -32,3 +33,4 @@ pub use error::{
3233
};
3334
pub use fragment_writer::*;
3435
pub use scanner::*;
36+
pub use versions::*;

src/versions.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright The Lance Authors
3+
4+
//! Versions C API: list all versions of a Lance dataset.
5+
//!
6+
//! `lance_dataset_versions` returns an opaque `LanceVersions` snapshot;
7+
//! accessors read entries by index, and `lance_versions_close` frees it.
8+
9+
use lance_core::Result;
10+
11+
use crate::dataset::LanceDataset;
12+
use crate::error::{LanceErrorCode, clear_last_error, ffi_try, set_last_error};
13+
use crate::runtime::block_on;
14+
15+
/// Opaque snapshot of a dataset's version history.
16+
pub struct LanceVersions {
17+
entries: Vec<VersionEntry>,
18+
}
19+
20+
#[derive(Clone, Copy)]
21+
struct VersionEntry {
22+
id: u64,
23+
timestamp_ms: i64,
24+
}
25+
26+
/// Return a snapshot of the dataset's version list. The caller frees the
27+
/// returned handle with `lance_versions_close`. Returns NULL on error.
28+
#[unsafe(no_mangle)]
29+
pub unsafe extern "C" fn lance_dataset_versions(
30+
dataset: *const LanceDataset,
31+
) -> *mut LanceVersions {
32+
ffi_try!(unsafe { versions_inner(dataset) }, null)
33+
}
34+
35+
unsafe fn versions_inner(dataset: *const LanceDataset) -> Result<*mut LanceVersions> {
36+
if dataset.is_null() {
37+
return Err(lance_core::Error::InvalidInput {
38+
source: "dataset must not be NULL".into(),
39+
location: snafu::location!(),
40+
});
41+
}
42+
let ds = unsafe { &*dataset };
43+
let versions = block_on(ds.inner.versions())?;
44+
let entries = versions
45+
.into_iter()
46+
.map(|v| VersionEntry {
47+
id: v.version,
48+
timestamp_ms: v.timestamp.timestamp_millis(),
49+
})
50+
.collect();
51+
Ok(Box::into_raw(Box::new(LanceVersions { entries })))
52+
}
53+
54+
/// Return the number of versions. Returns 0 on error (NULL handle).
55+
#[unsafe(no_mangle)]
56+
pub unsafe extern "C" fn lance_versions_count(versions: *const LanceVersions) -> u64 {
57+
if versions.is_null() {
58+
set_last_error(LanceErrorCode::InvalidArgument, "versions is NULL");
59+
return 0;
60+
}
61+
let v = unsafe { &*versions };
62+
clear_last_error();
63+
v.entries.len() as u64
64+
}
65+
66+
/// Return the monotonic version id at `index` (0 <= index < count).
67+
/// Returns 0 and sets the thread-local error on NULL or out-of-range input.
68+
#[unsafe(no_mangle)]
69+
pub unsafe extern "C" fn lance_versions_id_at(versions: *const LanceVersions, index: usize) -> u64 {
70+
unsafe { entry_at(versions, index) }
71+
.map(|e| e.id)
72+
.unwrap_or(0)
73+
}
74+
75+
/// Return the Unix epoch millisecond timestamp at `index`.
76+
/// Returns 0 and sets the thread-local error on NULL or out-of-range input.
77+
#[unsafe(no_mangle)]
78+
pub unsafe extern "C" fn lance_versions_timestamp_ms_at(
79+
versions: *const LanceVersions,
80+
index: usize,
81+
) -> i64 {
82+
unsafe { entry_at(versions, index) }
83+
.map(|e| e.timestamp_ms)
84+
.unwrap_or(0)
85+
}
86+
87+
/// Close and free a versions handle. Safe to call with NULL.
88+
#[unsafe(no_mangle)]
89+
pub unsafe extern "C" fn lance_versions_close(versions: *mut LanceVersions) {
90+
if !versions.is_null() {
91+
unsafe {
92+
let _ = Box::from_raw(versions);
93+
}
94+
}
95+
}
96+
97+
// ---------------------------------------------------------------------------
98+
// Internal helpers
99+
// ---------------------------------------------------------------------------
100+
101+
/// Copy the entry at `index` out of the versions handle. Sets the thread-local
102+
/// error and returns `None` on NULL handle or out-of-range index.
103+
unsafe fn entry_at(versions: *const LanceVersions, index: usize) -> Option<VersionEntry> {
104+
if versions.is_null() {
105+
set_last_error(LanceErrorCode::InvalidArgument, "versions is NULL");
106+
return None;
107+
}
108+
let v = unsafe { &*versions };
109+
match v.entries.get(index).copied() {
110+
Some(e) => {
111+
clear_last_error();
112+
Some(e)
113+
}
114+
None => {
115+
set_last_error(
116+
LanceErrorCode::InvalidArgument,
117+
format!(
118+
"version index {} out of range; count = {}",
119+
index,
120+
v.entries.len()
121+
),
122+
);
123+
None
124+
}
125+
}
126+
}

tests/c_api_test.rs

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1655,3 +1655,131 @@ fn test_robotics_e2e_write_then_finalize() {
16551655

16561656
unsafe { lance_dataset_close(ds) };
16571657
}
1658+
1659+
// ---------------------------------------------------------------------------
1660+
// Version history (lance_dataset_versions)
1661+
// ---------------------------------------------------------------------------
1662+
1663+
/// Helper: open an existing dataset and append a batch, creating a new version.
1664+
fn append_batch(uri: &str, schema: Arc<Schema>, batch: RecordBatch) {
1665+
lance_c::runtime::block_on(async {
1666+
let mut ds = Dataset::open(uri).await.unwrap();
1667+
ds.append(
1668+
arrow::record_batch::RecordBatchIterator::new(vec![Ok(batch)], schema),
1669+
None,
1670+
)
1671+
.await
1672+
.unwrap();
1673+
});
1674+
}
1675+
1676+
#[test]
1677+
fn test_dataset_versions_single_version() {
1678+
let (_tmp, uri) = create_test_dataset();
1679+
let c_uri = c_str(&uri);
1680+
let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
1681+
1682+
let vs = unsafe { lance_dataset_versions(ds) };
1683+
assert!(!vs.is_null());
1684+
assert_eq!(unsafe { lance_versions_count(vs) }, 1);
1685+
assert_eq!(unsafe { lance_versions_id_at(vs, 0) }, 1);
1686+
assert!(unsafe { lance_versions_timestamp_ms_at(vs, 0) } > 0);
1687+
1688+
unsafe { lance_versions_close(vs) };
1689+
unsafe { lance_dataset_close(ds) };
1690+
}
1691+
1692+
#[test]
1693+
fn test_dataset_versions_multiple_versions() {
1694+
let (_tmp, uri) = create_test_dataset();
1695+
let schema = Arc::new(Schema::new(vec![
1696+
Field::new("id", DataType::Int32, false),
1697+
Field::new("name", DataType::Utf8, true),
1698+
]));
1699+
let batch = RecordBatch::try_new(
1700+
schema.clone(),
1701+
vec![
1702+
Arc::new(Int32Array::from(vec![6, 7])),
1703+
Arc::new(StringArray::from(vec!["frank", "grace"])),
1704+
],
1705+
)
1706+
.unwrap();
1707+
append_batch(&uri, schema, batch);
1708+
1709+
let c_uri = c_str(&uri);
1710+
let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
1711+
let vs = unsafe { lance_dataset_versions(ds) };
1712+
1713+
let count = unsafe { lance_versions_count(vs) };
1714+
assert_eq!(count, 2);
1715+
1716+
let id0 = unsafe { lance_versions_id_at(vs, 0) };
1717+
let id1 = unsafe { lance_versions_id_at(vs, 1) };
1718+
assert_eq!(id0, 1);
1719+
assert_eq!(id1, 2);
1720+
1721+
let ts0 = unsafe { lance_versions_timestamp_ms_at(vs, 0) };
1722+
let ts1 = unsafe { lance_versions_timestamp_ms_at(vs, 1) };
1723+
assert!(ts0 > 0, "timestamps should be populated");
1724+
assert!(
1725+
ts1 >= ts0,
1726+
"timestamps should be monotonic by version order"
1727+
);
1728+
1729+
unsafe { lance_versions_close(vs) };
1730+
unsafe { lance_dataset_close(ds) };
1731+
}
1732+
1733+
#[test]
1734+
fn test_dataset_versions_null_dataset() {
1735+
let vs = unsafe { lance_dataset_versions(ptr::null()) };
1736+
assert!(vs.is_null());
1737+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1738+
}
1739+
1740+
#[test]
1741+
fn test_versions_count_null_handle() {
1742+
let n = unsafe { lance_versions_count(ptr::null()) };
1743+
assert_eq!(n, 0);
1744+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1745+
}
1746+
1747+
#[test]
1748+
fn test_versions_index_out_of_range() {
1749+
let (_tmp, uri) = create_test_dataset();
1750+
let c_uri = c_str(&uri);
1751+
let ds = unsafe { lance_dataset_open(c_uri.as_ptr(), ptr::null(), 0) };
1752+
let vs = unsafe { lance_dataset_versions(ds) };
1753+
1754+
// Count is 1 for a freshly-created dataset. Exercise both the exact
1755+
// boundary (index == count) and a clearly-out-of-range index.
1756+
let count = unsafe { lance_versions_count(vs) };
1757+
for index in [count as usize, 5] {
1758+
let id = unsafe { lance_versions_id_at(vs, index) };
1759+
assert_eq!(id, 0);
1760+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1761+
1762+
let ts = unsafe { lance_versions_timestamp_ms_at(vs, index) };
1763+
assert_eq!(ts, 0);
1764+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1765+
}
1766+
1767+
unsafe { lance_versions_close(vs) };
1768+
unsafe { lance_dataset_close(ds) };
1769+
}
1770+
1771+
#[test]
1772+
fn test_versions_accessors_null_handle() {
1773+
let id = unsafe { lance_versions_id_at(ptr::null(), 0) };
1774+
assert_eq!(id, 0);
1775+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1776+
1777+
let ts = unsafe { lance_versions_timestamp_ms_at(ptr::null(), 0) };
1778+
assert_eq!(ts, 0);
1779+
assert_eq!(lance_last_error_code(), LanceErrorCode::InvalidArgument);
1780+
}
1781+
1782+
#[test]
1783+
fn test_versions_close_null_is_safe() {
1784+
unsafe { lance_versions_close(ptr::null_mut()) };
1785+
}

0 commit comments

Comments
 (0)