-
Couldn't load subscription status.
- Fork 537
Closed
Labels
bugSomething isn't workingSomething isn't workingon-holdIssues and Pull Requests that are on hold for some reasonIssues and Pull Requests that are on hold for some reason
Description
Environment
Delta-rs version:
0.21
Bug
I have an existing deltalake that was created with spark. Now I am trying to write/merge into it with the new delta lake library.
It fails with "Generic error: Error partitioning record batch: Missing partition column: failed to parse"
from deltalake import DeltaTable, write_deltalake
import polars as pl
import pyarrow as pa
from datetime import datetime
from decimal import Decimal
from datetime import date
DATABASE_NAME = "TEST_DB"
data = {
"timestamp": [
datetime(2024, 11, 25, 9, 44, 46, 660000),
datetime(2024, 11, 25, 9, 47, 4, 240000)
],
"date": [
date(2024, 11, 25),
date(2024, 11, 25)
],
"value": [
Decimal("823.0"),
Decimal("823.0")
]
}
df = pl.DataFrame(data)
# Simplified schema
schema = pa.schema(
[
("timestamp", pa.timestamp("us")),
("date", pa.date32()),
("value", pa.decimal128(6, 1)),
]
)
# Create new Delta table with partitioning
dt = DeltaTable.create(
DATABASE_NAME,
schema=schema,
partition_by=["date"]
)
# Initial write
write_deltalake(dt, df.to_pandas(), mode="append")
# Read Delta table and display schema and content
dt_read = DeltaTable(DATABASE_NAME)
print("Schema:")
print(dt_read.schema())
# Display content
df_read = dt_read.to_pandas()
print("\nContent:")
print(df_read)
# Merge operation
dt.merge(
source=df.to_pandas(),
predicate="target.timestamp = source.timestamp AND target.value = source.value",
source_alias="source",
target_alias="target",
).when_matched_update_all().when_not_matched_insert_all().execute()
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workingon-holdIssues and Pull Requests that are on hold for some reasonIssues and Pull Requests that are on hold for some reason