We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 5a98b1e commit 2817166Copy full SHA for 2817166
python/raydp/spark/dataset.py
@@ -74,7 +74,12 @@ def _fetch_arrow_table_from_executor(executor_actor_name: str,
74
executor_actor.getRDDPartition.remote(
75
rdd_id, partition_id, schema_json, driver_agent_url))
76
reader = pa.ipc.open_stream(pa.BufferReader(ipc_bytes))
77
- return reader.read_all()
+ table = reader.read_all()
78
+ # Spark's Arrow conversion may attach schema metadata. Ray Data metadata extraction
79
+ # can be sensitive to unexpected schema metadata in some Ray/PyArrow combinations.
80
+ # Strip schema metadata to make blocks more portable/deterministic.
81
+ table = table.replace_schema_metadata()
82
+ return table
83
84
85
class RecordPiece:
0 commit comments