Skip to content

Commit 4240449

Browse files
authored
fix joins for missing key columns (#187)
1 parent 44f05d8 commit 4240449

File tree

16 files changed

+287
-126
lines changed

16 files changed

+287
-126
lines changed

src/DataFrame/Errors.hs

Lines changed: 40 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import qualified Data.Vector.Unboxed as VU
1111

1212
import Control.Exception
1313
import Data.Array
14+
import qualified Data.List as L
1415
import Data.Typeable (Typeable)
1516
import DataFrame.Display.Terminal.Colours
1617
import Type.Reflection (TypeRep)
@@ -29,7 +30,7 @@ data DataFrameException where
2930
TypeErrorContext a b ->
3031
DataFrameException
3132
AggregatedAndNonAggregatedException :: T.Text -> T.Text -> DataFrameException
32-
ColumnNotFoundException :: T.Text -> T.Text -> [T.Text] -> DataFrameException
33+
ColumnsNotFoundException :: [T.Text] -> T.Text -> [T.Text] -> DataFrameException
3334
EmptyDataSetException :: T.Text -> DataFrameException
3435
InternalException :: T.Text -> DataFrameException
3536
NonColumnReferenceException :: T.Text -> DataFrameException
@@ -51,7 +52,7 @@ instance Show DataFrameException where
5152
(errorColumnName context)
5253
(callingFunctionName context)
5354
errorString
54-
show (ColumnNotFoundException columnName callPoint availableColumns) = columnNotFound columnName callPoint availableColumns
55+
show (ColumnsNotFoundException columnNames callPoint availableColumns) = columnsNotFound columnNames callPoint availableColumns
5556
show (EmptyDataSetException callPoint) = emptyDataSetError callPoint
5657
show (WrongQuantileNumberException q) = wrongQuantileNumberError q
5758
show (WrongQuantileIndexException qs q) = wrongQuantileIndexError qs q
@@ -65,15 +66,46 @@ instance Show DataFrameException where
6566
++ T.unpack expr2
6667

6768
columnNotFound :: T.Text -> T.Text -> [T.Text] -> String
68-
columnNotFound name callPoint columns =
69+
columnNotFound missingColumn = columnsNotFound [missingColumn]
70+
71+
columnsNotFound :: [T.Text] -> T.Text -> [T.Text] -> String
72+
columnsNotFound missingColumns callPoint availableColumns =
6973
red "\n\n[ERROR] "
70-
++ "Column not found: "
71-
++ T.unpack name
74+
++ missingColumnsLabel missingColumns
75+
++ ": "
76+
++ T.unpack (T.intercalate ", " missingColumns)
7277
++ " for operation "
7378
++ T.unpack callPoint
74-
++ "\n\tDid you mean "
75-
++ T.unpack (guessColumnName name columns)
76-
++ "?\n\n"
79+
++ formatSuggestions missingColumns availableColumns
80+
++ "\n\n"
81+
where
82+
missingColumnsLabel [_] = "Column not found"
83+
missingColumnsLabel _ = "Columns not found"
84+
85+
formatSuggestions [missingColumn] columns =
86+
case guessColumnName missingColumn columns of
87+
"" -> ""
88+
guessed ->
89+
"\n\tDid you mean "
90+
++ T.unpack guessed
91+
++ "?"
92+
formatSuggestions names columns =
93+
case traverse (`suggestColumnName` columns) names of
94+
Just guessedColumns
95+
| not (null guessedColumns) ->
96+
"\n\tDid you mean "
97+
++ formatColumnSuggestions guessedColumns
98+
++ "?"
99+
_ -> ""
100+
101+
suggestColumnName missingColumn columns = case guessColumnName missingColumn columns of
102+
"" -> Nothing
103+
guessed -> Just guessed
104+
105+
formatColumnSuggestions guessedColumns =
106+
"["
107+
++ L.intercalate ", " (map (show . T.unpack) guessedColumns)
108+
++ "]"
77109

78110
typeMismatchError :: String -> String -> String
79111
typeMismatchError givenType expectedType =

src/DataFrame/IO/Parquet.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import qualified Data.Text as T
1919
import Data.Text.Encoding
2020
import Data.Time
2121
import Data.Time.Clock.POSIX (posixSecondsToUTCTime)
22-
import DataFrame.Errors (DataFrameException (ColumnNotFoundException))
22+
import DataFrame.Errors (DataFrameException (ColumnsNotFoundException))
2323
import DataFrame.Internal.Binary (littleEndianWord32)
2424
import qualified DataFrame.Internal.Column as DI
2525
import DataFrame.Internal.DataFrame (DataFrame)
@@ -179,8 +179,8 @@ _readParquetWithOpts extraConfig opts path = withFileBufferedOrSeekable extraCon
179179
in unless
180180
(L.null missing)
181181
( throw
182-
( ColumnNotFoundException
183-
(T.pack $ show missing)
182+
( ColumnsNotFoundException
183+
missing
184184
"readParquetWithOpts"
185185
availableSelectedColumns
186186
)

src/DataFrame/Internal/DataFrame.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -161,16 +161,16 @@ getColumn name df
161161

162162
{- | Retrieves a column by name from the dataframe, throwing an exception if not found.
163163
164-
This is an unsafe version of 'getColumn' that throws 'ColumnNotFoundException'
164+
This is an unsafe version of 'getColumn' that throws 'ColumnsNotFoundException'
165165
if the column does not exist. Use this when you are certain the column exists.
166166
167167
==== __Throws__
168168
169-
* 'ColumnNotFoundException' - if the column with the given name does not exist
169+
* 'ColumnsNotFoundException' - if the column with the given name does not exist
170170
-}
171171
unsafeGetColumn :: T.Text -> DataFrame -> Column
172172
unsafeGetColumn name df = case getColumn name df of
173-
Nothing -> throw $ ColumnNotFoundException name "" (M.keys $ columnIndices df)
173+
Nothing -> throw $ ColumnsNotFoundException [name] "" (M.keys $ columnIndices df)
174174
Just col -> col
175175

176176
{- | Checks if the dataframe is empty (has no columns).

src/DataFrame/Internal/Interpreter.hs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ eval _ (Lit v) = Right (Scalar v)
482482
eval (FlatCtx df) (Col name) =
483483
case getColumn name df of
484484
Nothing ->
485-
Left $ ColumnNotFoundException name "" (M.keys $ columnIndices df)
485+
Left $ ColumnsNotFoundException [name] "" (M.keys $ columnIndices df)
486486
Just c
487487
| hasElemType @a c -> Right (Flat c)
488488
| otherwise ->
@@ -500,8 +500,8 @@ eval (GroupCtx gdf) (Col name) =
500500
case getColumn name (fullDataframe gdf) of
501501
Nothing ->
502502
Left $
503-
ColumnNotFoundException
504-
name
503+
ColumnsNotFoundException
504+
[name]
505505
""
506506
(M.keys $ columnIndices $ fullDataframe gdf)
507507
Just c
@@ -524,14 +524,14 @@ eval (FlatCtx df) (CastWith name _tag onResult) =
524524
case getColumn name df of
525525
Nothing ->
526526
Left $
527-
ColumnNotFoundException name "" (M.keys $ columnIndices df)
527+
ColumnsNotFoundException [name] "" (M.keys $ columnIndices df)
528528
Just c -> Flat <$> promoteColumnWith onResult c
529529
eval (GroupCtx gdf) (CastWith name _tag onResult) =
530530
case getColumn name (fullDataframe gdf) of
531531
Nothing ->
532532
Left $
533-
ColumnNotFoundException
534-
name
533+
ColumnsNotFoundException
534+
[name]
535535
""
536536
(M.keys $ columnIndices $ fullDataframe gdf)
537537
Just c -> do
@@ -579,8 +579,8 @@ eval (GroupCtx gdf) expr@(Agg (FoldAgg _ (Just seed) (f :: a -> b -> a)) (Col na
579579
case getColumn name (fullDataframe gdf) of
580580
Nothing ->
581581
Left $
582-
ColumnNotFoundException
583-
name
582+
ColumnsNotFoundException
583+
[name]
584584
""
585585
(M.keys $ columnIndices $ fullDataframe gdf)
586586
Just col ->
@@ -599,8 +599,8 @@ eval (GroupCtx gdf) expr@(Agg (FoldAgg _ Nothing (f :: a -> b -> a)) (Col name :
599599
case getColumn name (fullDataframe gdf) of
600600
Nothing ->
601601
Left $
602-
ColumnNotFoundException
603-
name
602+
ColumnsNotFoundException
603+
[name]
604604
""
605605
(M.keys $ columnIndices $ fullDataframe gdf)
606606
Just col ->
@@ -617,8 +617,8 @@ eval
617617
case getColumn name (fullDataframe gdf) of
618618
Nothing ->
619619
Left $
620-
ColumnNotFoundException
621-
name
620+
ColumnsNotFoundException
621+
[name]
622622
""
623623
(M.keys $ columnIndices $ fullDataframe gdf)
624624
Just col ->

src/DataFrame/Internal/Row.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ mkRowFromArgs names df i = V.map get (V.fromList names)
169169
get name = case getColumn name df of
170170
Nothing ->
171171
throw $
172-
ColumnNotFoundException
173-
name
172+
ColumnsNotFoundException
173+
[name]
174174
"[INTERNAL] mkRowFromArgs"
175175
(M.keys $ columnIndices df)
176176
Just (BoxedColumn column) -> toAny (column V.! i)
@@ -203,7 +203,7 @@ mkRowRep df names i = V.generate (L.length names) (\index -> get (names' V.! ind
203203
Just e -> toAny e
204204
Nothing -> throwError name
205205
Nothing ->
206-
throw $ ColumnNotFoundException name "mkRowRep" (M.keys $ columnIndices df)
206+
throw $ ColumnsNotFoundException [name] "mkRowRep" (M.keys $ columnIndices df)
207207

208208
sortedIndexes' :: [Bool] -> V.Vector Row -> VU.Vector Int
209209
sortedIndexes' flipCompare rows = runST $ do

src/DataFrame/Operations/Aggregation.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ groupBy ::
4949
groupBy names df
5050
| any (`notElem` columnNames df) names =
5151
throw $
52-
ColumnNotFoundException
53-
(T.pack $ show $ names L.\\ columnNames df)
52+
ColumnsNotFoundException
53+
(names L.\\ columnNames df)
5454
"groupBy"
5555
(columnNames df)
5656
| nRows df == 0 =

src/DataFrame/Operations/Core.hs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ cloneColumn original new df
394394
| null df = throw (EmptyDataSetException "cloneColumn")
395395
| otherwise = fromMaybe
396396
( throw $
397-
ColumnNotFoundException original "cloneColumn" (M.keys $ columnIndices df)
397+
ColumnsNotFoundException [original] "cloneColumn" (M.keys $ columnIndices df)
398398
)
399399
$ do
400400
column <- getColumn original df
@@ -485,7 +485,7 @@ renameSafe ::
485485
renameSafe orig new df
486486
| null df = throw (EmptyDataSetException "rename")
487487
| otherwise = fromMaybe
488-
(Left $ ColumnNotFoundException orig "rename" (M.keys $ columnIndices df))
488+
(Left $ ColumnsNotFoundException [orig] "rename" (M.keys $ columnIndices df))
489489
$ do
490490
columnIndex <- M.lookup orig (columnIndices df)
491491
let origRemoved = M.delete orig (columnIndices df)
@@ -859,7 +859,8 @@ columnAsVector expr df
859859
(Col name) -> case getColumn name df of
860860
Just col -> toVector col
861861
Nothing ->
862-
Left $ ColumnNotFoundException name "columnAsVector" (M.keys $ columnIndices df)
862+
Left $
863+
ColumnsNotFoundException [name] "columnAsVector" (M.keys $ columnIndices df)
863864
_ -> case interpret df expr of
864865
Left e -> throw e
865866
Right (TColumn col) -> toVector col
@@ -876,7 +877,7 @@ columnAsIntVector (Col name) df = case getColumn name df of
876877
Just col -> toIntVector col
877878
Nothing ->
878879
Left $
879-
ColumnNotFoundException name "columnAsIntVector" (M.keys $ columnIndices df)
880+
ColumnsNotFoundException [name] "columnAsIntVector" (M.keys $ columnIndices df)
880881
columnAsIntVector expr df = case interpret df expr of
881882
Left e -> throw e
882883
Right (TColumn col) -> toIntVector col
@@ -893,7 +894,10 @@ columnAsDoubleVector (Col name) df = case getColumn name df of
893894
Just col -> toDoubleVector col
894895
Nothing ->
895896
Left $
896-
ColumnNotFoundException name "columnAsDoubleVector" (M.keys $ columnIndices df)
897+
ColumnsNotFoundException
898+
[name]
899+
"columnAsDoubleVector"
900+
(M.keys $ columnIndices df)
897901
columnAsDoubleVector expr df = case interpret df expr of
898902
Left e -> throw e
899903
Right (TColumn col) -> toDoubleVector col
@@ -910,7 +914,10 @@ columnAsFloatVector (Col name) df = case getColumn name df of
910914
Just col -> toFloatVector col
911915
Nothing ->
912916
Left $
913-
ColumnNotFoundException name "columnAsFloatVector" (M.keys $ columnIndices df)
917+
ColumnsNotFoundException
918+
[name]
919+
"columnAsFloatVector"
920+
(M.keys $ columnIndices df)
914921
columnAsFloatVector expr df = case interpret df expr of
915922
Left e -> throw e
916923
Right (TColumn col) -> toFloatVector col
@@ -923,7 +930,10 @@ columnAsUnboxedVector (Col name) df = case getColumn name df of
923930
Just col -> toUnboxedVector col
924931
Nothing ->
925932
Left $
926-
ColumnNotFoundException name "columnAsFloatVector" (M.keys $ columnIndices df)
933+
ColumnsNotFoundException
934+
[name]
935+
"columnAsFloatVector"
936+
(M.keys $ columnIndices df)
927937
columnAsUnboxedVector expr df = case interpret df expr of
928938
Left e -> throw e
929939
Right (TColumn col) -> toUnboxedVector col

0 commit comments

Comments
 (0)