Skip to content

Commit 2d95832

Browse files
Subham SinghalSubham Singhal
authored andcommitted
Adds support for parquet field id
1 parent 53b0ffb commit 2d95832

9 files changed

Lines changed: 1039 additions & 28 deletions

File tree

datafusion/common/src/config.rs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -751,6 +751,11 @@ config_namespace! {
751751
/// parquet reader setting. 0 means no caching.
752752
pub max_predicate_cache_size: Option<usize>, default = None
753753

754+
/// (reading) If true, use Parquet field IDs for column resolution instead of
755+
/// column names. This enables schema evolution with renamed/reordered columns.
756+
/// When field IDs are unavailable, falls back to name-based matching.
757+
pub field_id_read_enabled: bool, default = false
758+
754759
// The following options affect writing to parquet files
755760
// and map to parquet::file::properties::WriterProperties
756761

@@ -1142,12 +1147,6 @@ config_namespace! {
11421147
///
11431148
/// Default: true
11441149
pub enable_sort_pushdown: bool, default = true
1145-
1146-
/// When set to true, the optimizer will extract leaf expressions
1147-
/// (such as `get_field`) from filter/sort/join nodes into projections
1148-
/// closer to the leaf table scans, and push those projections down
1149-
/// towards the leaf nodes.
1150-
pub enable_leaf_expression_pushdown: bool, default = true
11511150
}
11521151
}
11531152

@@ -3071,22 +3070,6 @@ config_namespace! {
30713070
/// If not specified, the default level for the compression algorithm is used.
30723071
pub compression_level: Option<u32>, default = None
30733072
pub schema_infer_max_rec: Option<usize>, default = None
3074-
/// The JSON format to use when reading files.
3075-
///
3076-
/// When `true` (default), expects newline-delimited JSON (NDJSON):
3077-
/// ```text
3078-
/// {"key1": 1, "key2": "val"}
3079-
/// {"key1": 2, "key2": "vals"}
3080-
/// ```
3081-
///
3082-
/// When `false`, expects JSON array format:
3083-
/// ```text
3084-
/// [
3085-
/// {"key1": 1, "key2": "val"},
3086-
/// {"key1": 2, "key2": "vals"}
3087-
/// ]
3088-
/// ```
3089-
pub newline_delimited: bool, default = true
30903073
}
30913074
}
30923075

datafusion/common/src/file_options/parquet_writer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ impl ParquetOptions {
209209
coerce_int96: _, // not used for writer props
210210
skip_arrow_metadata: _,
211211
max_predicate_cache_size: _,
212+
field_id_read_enabled: _, // not used for writer props
212213
} = self;
213214

214215
let mut builder = WriterProperties::builder()

0 commit comments

Comments
 (0)