Skip to content

Commit 8c45fd4

Browse files
umartinLiam Brannigan
authored and
Liam Brannigan
committed
feat: set column metadata from python
Signed-off-by: Martin Andersson <[email protected]> Signed-off-by: Liam Brannigan <[email protected]>
1 parent ccfa999 commit 8c45fd4

File tree

4 files changed

+90
-8
lines changed

4 files changed

+90
-8
lines changed

python/deltalake/_internal.pyi

+7
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,13 @@ class RawDeltaTable:
225225
allow_out_of_range: bool = False,
226226
) -> pyarrow.RecordBatchReader: ...
227227
def transaction_versions(self) -> Dict[str, Transaction]: ...
228+
def set_column_metadata(
229+
self,
230+
column: str,
231+
metadata: dict[str, str],
232+
commit_properties: Optional[CommitProperties],
233+
post_commithook_properties: Optional[PostCommitHookProperties],
234+
) -> None: ...
228235
def __datafusion_table_provider__(self) -> Any: ...
229236

230237
def rust_core_version() -> str: ...

python/deltalake/table.py

+22
Original file line numberDiff line numberDiff line change
@@ -2167,6 +2167,28 @@ def set_table_properties(
21672167
commit_properties,
21682168
)
21692169

2170+
def set_column_metadata(
2171+
self,
2172+
column: str,
2173+
metadata: dict[str, str],
2174+
commit_properties: Optional[CommitProperties] = None,
2175+
post_commithook_properties: Optional[PostCommitHookProperties] = None,
2176+
) -> None:
2177+
"""
2178+
Update a field's metadata in a schema. If the metadata key does not exist, the entry is inserted.
2179+
2180+
If the column name doesn't exist in the schema - an error is raised.
2181+
2182+
:param column: name of the column to update metadata for.
2183+
:param metadata: the metadata to be added or modified on the column.
2184+
:param commit_properties: properties of the transaction commit. If None, default values are used.
2185+
:param post_commithook_properties: properties for the post commit hook. If None, default values are used.
2186+
:return:
2187+
"""
2188+
self.table._table.set_column_metadata(
2189+
column, metadata, commit_properties, post_commithook_properties
2190+
)
2191+
21702192

21712193
class TableOptimizer:
21722194
"""API for various table optimization commands."""

python/src/lib.rs

+46-8
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use arrow::pyarrow::PyArrowType;
1818
use chrono::{DateTime, Duration, FixedOffset, Utc};
1919
use datafusion_ffi::table_provider::FFI_TableProvider;
2020
use delta_kernel::expressions::Scalar;
21-
use delta_kernel::schema::StructField;
21+
use delta_kernel::schema::{MetadataValue, StructField};
2222
use deltalake::arrow::compute::concat_batches;
2323
use deltalake::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
2424
use deltalake::arrow::record_batch::{RecordBatch, RecordBatchIterator};
@@ -63,13 +63,6 @@ use error::DeltaError;
6363
use futures::future::join_all;
6464
use tracing::log::*;
6565

66-
use pyo3::exceptions::{PyRuntimeError, PyValueError};
67-
use pyo3::prelude::*;
68-
use pyo3::pybacked::PyBackedStr;
69-
use pyo3::types::{PyCapsule, PyDict, PyFrozenSet};
70-
use serde_json::{Map, Value};
71-
use uuid::Uuid;
72-
7366
use crate::error::DeltaProtocolError;
7467
use crate::error::PythonError;
7568
use crate::features::TableFeatures;
@@ -78,6 +71,14 @@ use crate::merge::PyMergeBuilder;
7871
use crate::query::PyQueryBuilder;
7972
use crate::schema::{schema_to_pyobject, Field};
8073
use crate::utils::rt;
74+
use deltalake::operations::update_field_metadata::UpdateFieldMetadataBuilder;
75+
use deltalake::protocol::DeltaOperation::UpdateFieldMetadata;
76+
use pyo3::exceptions::{PyRuntimeError, PyValueError};
77+
use pyo3::prelude::*;
78+
use pyo3::pybacked::PyBackedStr;
79+
use pyo3::types::{PyCapsule, PyDict, PyFrozenSet};
80+
use serde_json::{Map, Value};
81+
use uuid::Uuid;
8182

8283
#[cfg(all(target_family = "unix", not(target_os = "emscripten")))]
8384
use jemallocator::Jemalloc;
@@ -1521,6 +1522,43 @@ impl RawDeltaTable {
15211522
}
15221523
}
15231524

1525+
#[pyo3(signature = (field_name, metadata, commit_properties=None, post_commithook_properties=None))]
1526+
pub fn set_column_metadata(
1527+
&self,
1528+
py: Python,
1529+
field_name: &str,
1530+
metadata: HashMap<String, String>,
1531+
commit_properties: Option<PyCommitProperties>,
1532+
post_commithook_properties: Option<PyPostCommitHookProperties>,
1533+
) -> PyResult<()> {
1534+
let table = py.allow_threads(|| {
1535+
let mut cmd = UpdateFieldMetadataBuilder::new(self.log_store()?, self.cloned_state()?);
1536+
1537+
cmd = cmd.with_field_name(field_name).with_metadata(
1538+
metadata
1539+
.iter()
1540+
.map(|(k, v)| (k.clone(), MetadataValue::String(v.clone())))
1541+
.collect(),
1542+
);
1543+
1544+
if let Some(commit_properties) =
1545+
maybe_create_commit_properties(commit_properties, post_commithook_properties)
1546+
{
1547+
cmd = cmd.with_commit_properties(commit_properties)
1548+
}
1549+
1550+
if self.log_store()?.name() == "LakeFSLogStore" {
1551+
cmd = cmd.with_custom_execute_handler(Arc::new(LakeFSCustomExecuteHandler {}))
1552+
}
1553+
1554+
rt().block_on(cmd.into_future())
1555+
.map_err(PythonError::from)
1556+
.map_err(PyErr::from)
1557+
})?;
1558+
self.set_state(table.state)?;
1559+
Ok(())
1560+
}
1561+
15241562
fn __datafusion_table_provider__<'py>(
15251563
&self,
15261564
py: Python<'py>,

python/tests/test_alter.py

+15
Original file line numberDiff line numberDiff line change
@@ -457,3 +457,18 @@ def test_add_feautres(existing_sample_table: DeltaTable):
457457
"v2Checkpoint",
458458
]
459459
) # type: ignore
460+
461+
462+
def test_set_column_metadata(tmp_path: pathlib.Path, sample_table: pa.Table):
463+
write_deltalake(tmp_path, sample_table)
464+
465+
dt = DeltaTable(tmp_path)
466+
467+
dt.alter.set_column_metadata("price", {"comment": "my comment"})
468+
469+
fields_by_name = {field.name: field for field in dt.schema().fields}
470+
assert fields_by_name["price"].metadata == {"comment": "my comment"}
471+
472+
with pytest.raises(DeltaError):
473+
# Can't set metadata for non existing column.
474+
dt.alter.set_column_metadata("non_existing_column", {"comment": "my comment"})

0 commit comments

Comments
 (0)