@@ -19,10 +19,11 @@ import qualified Data.Text as T
1919import Data.Text.Encoding
2020import Data.Time
2121import Data.Time.Clock.POSIX (posixSecondsToUTCTime )
22+ import qualified Data.Vector as V
2223import DataFrame.Errors (DataFrameException (ColumnsNotFoundException ))
2324import DataFrame.Internal.Binary (littleEndianWord32 )
2425import qualified DataFrame.Internal.Column as DI
25- import DataFrame.Internal.DataFrame (DataFrame )
26+ import DataFrame.Internal.DataFrame (DataFrame , columns )
2627import DataFrame.Internal.Expression (Expr , getColumns )
2728import qualified DataFrame.Operations.Core as DI
2829import DataFrame.Operations.Merge ()
@@ -75,6 +76,8 @@ data ParquetReadOptions = ParquetReadOptions
7576 -- ^ Optional row filter expression applied before projection.
7677 , rowRange :: Maybe (Int , Int )
7778 -- ^ Optional row slice @(start, end)@ with start-inclusive/end-exclusive semantics.
79+ , safeColumns :: Bool
80+ -- ^ When True, every column is promoted to OptionalColumn after read, regardless of nullability in the schema.
7881 }
7982 deriving (Eq , Show )
8083
@@ -87,6 +90,7 @@ ParquetReadOptions
8790 { selectedColumns = Nothing
8891 , predicate = Nothing
8992 , rowRange = Nothing
93+ , safeColumns = False
9094 }
9195@
9296-}
@@ -96,6 +100,7 @@ defaultParquetReadOptions =
96100 { selectedColumns = Nothing
97101 , predicate = Nothing
98102 , rowRange = Nothing
103+ , safeColumns = False
99104 }
100105
101106-- Public API --------------------------------------------------------------
@@ -349,9 +354,15 @@ applyPredicate :: ParquetReadOptions -> DataFrame -> DataFrame
349354applyPredicate opts df =
350355 maybe df (`DS.filterWhere` df) (predicate opts)
351356
357+ applySafeRead :: ParquetReadOptions -> DataFrame -> DataFrame
358+ applySafeRead opts df
359+ | safeColumns opts = df{columns = V. map DI. ensureOptional (columns df)}
360+ | otherwise = df
361+
352362applyReadOptions :: ParquetReadOptions -> DataFrame -> DataFrame
353363applyReadOptions opts =
354- applyRowRange opts
364+ applySafeRead opts
365+ . applyRowRange opts
355366 . applySelectedColumns opts
356367 . applyPredicate opts
357368
0 commit comments