@@ -30,6 +30,7 @@ import Data.DataFrame.Internal.Column (Column(..), freezeColumn', writeColumn, c
3030import Data.DataFrame.Internal.DataFrame (DataFrame (.. ))
3131import Data.DataFrame.Internal.Parsing
3232import Data.DataFrame.Operations.Typing
33+ import Data.Foldable (fold )
3334import Data.Function (on )
3435import Data.IORef
3536import Data.Maybe
@@ -184,27 +185,24 @@ field c =
184185 <?> " field"
185186{-# INLINE field #-}
186187
188+ unquotedTerminators :: Char -> S. Set Char
189+ unquotedTerminators sep = S. fromList [sep, ' \n ' , ' \r ' , ' "' ]
190+
187191unquotedField :: Char -> Parser T. Text
188192unquotedField sep =
189- takeWhile nonTerminal <?> " unquoted field"
190- where nonTerminal = ( `S.notMember` S. fromList [ sep, ' \n ' , ' \r ' , ' " ' ])
193+ takeWhile ( not . ( `S.member` terminators)) <?> " unquoted field"
194+ where terminators = unquotedTerminators sep
191195{-# INLINE unquotedField #-}
192196
193- insideQuotes :: Parser T. Text
194- insideQuotes =
195- T. append <$> takeWhile (/= ' "' )
196- <*> (T. concat <$> many (T. cons <$> dquotes <*> insideQuotes))
197- <?> " inside of double quotes"
198- where
199- dquotes =
200- string " \"\" " >> return ' "'
201- <?> " paired double quotes"
202- {-# INLINE insideQuotes #-}
203-
204197quotedField :: Parser T. Text
205- quotedField =
206- char ' "' *> insideQuotes <* char ' "'
207- <?> " quoted field"
198+ quotedField = char ' "' *> contents <* char ' "' <?> " quoted field"
199+ where
200+ contents = fold <$> many (unquote <|> unescape)
201+ where
202+ unquote = takeWhile1 (notInClass " \"\\ " )
203+ unescape = char ' \\ ' *> do
204+ T. singleton <$> do
205+ char ' \\ ' <|> char ' "'
208206{-# INLINE quotedField #-}
209207
210208lineEnd :: Parser ()
@@ -226,7 +224,7 @@ countRows c path = withFile path ReadMode $! go 0 ""
226224 Fail unconsumed ctx er -> do
227225 erpos <- hTell h
228226 fail $ " Failed to parse CSV file around " <> show erpos <> " byte; due: "
229- <> show er <> " ; context: " <> show ctx
227+ <> show er <> " ; context: " <> show ctx <> " " <> show unconsumed
230228 Partial c -> do
231229 fail $ " Partial handler is called; n = " <> show n
232230 Done (unconsumed :: T. Text ) _ ->
0 commit comments