Skip to content

Commit c23ea62

Browse files
committed
feat: Add fromCsv function.
1 parent 718f051 commit c23ea62

File tree

5 files changed

+109
-0
lines changed

5 files changed

+109
-0
lines changed

dataframe.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ test-suite tests
249249
Functions,
250250
GenDataFrame,
251251
Internal.Parsing,
252+
IO.CSV,
252253
IO.JSON,
253254
Operations.Aggregations,
254255
Operations.Apply,

src/DataFrame.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ import DataFrame.IO.CSV as CSV (
250250
ReadOptions (..),
251251
TypeSpec (..),
252252
defaultReadOptions,
253+
fromCsv,
254+
fromCsvBytes,
253255
readCsv,
254256
readCsvWithOpts,
255257
readSeparated,

src/DataFrame/IO/CSV.hs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import Data.Csv.Streaming (Records (..))
2727
import qualified Data.Csv.Streaming as CsvStream
2828

2929
import Control.DeepSeq
30+
import Control.Exception (SomeException, catch)
3031
import Control.Monad
3132
import Data.Char
3233
import qualified Data.Csv as Csv
@@ -547,6 +548,17 @@ writeSeparated ::
547548
IO ()
548549
writeSeparated c filepath df = TIO.writeFile filepath (toSeparated c df)
549550

551+
-- | Parse a CSV string into a DataFrame using default options.
552+
fromCsv :: String -> IO (Either String DataFrame)
553+
fromCsv s = do
554+
let bs = BL.fromStrict (TE.encodeUtf8 (T.pack s))
555+
(Right <$> decodeSeparated defaultReadOptions bs)
556+
`catch` (\(e :: SomeException) -> pure (Left (show e)))
557+
558+
-- | Parse a lazy 'ByteString' containing CSV data into a DataFrame using default options.
559+
fromCsvBytes :: BL.ByteString -> IO DataFrame
560+
fromCsvBytes = decodeSeparated defaultReadOptions
561+
550562
stripQuotes :: T.Text -> T.Text
551563
stripQuotes txt =
552564
case T.uncons txt of

tests/IO/CSV.hs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
{-# LANGUAGE OverloadedStrings #-}
2+
{-# LANGUAGE TypeApplications #-}
3+
4+
module IO.CSV where
5+
6+
import qualified Data.ByteString.Lazy as BL
7+
import qualified Data.Text as T
8+
import qualified Data.Text.Encoding as TE
9+
import qualified DataFrame as D
10+
import qualified DataFrame.Internal.Column as DI
11+
import DataFrame.Internal.DataFrame (DataFrame (..), toCsv)
12+
import DataFrame.IO.CSV (fromCsv, fromCsvBytes)
13+
import qualified DataFrame.Operations.Core as D
14+
import Test.HUnit (
15+
Test (TestCase, TestLabel),
16+
assertEqual,
17+
assertFailure,
18+
)
19+
20+
-- | Happy path: parse a simple CSV string with Int and Text columns.
21+
fromCsvHappyPath :: Test
22+
fromCsvHappyPath = TestLabel "fromCsv_happy_path" $ TestCase $ do
23+
result <- fromCsv "name,age\nAlice,30\nBob,25\nCharlie,35\n"
24+
case result of
25+
Left err -> assertFailure $ "Unexpected Left: " ++ err
26+
Right df -> do
27+
assertEqual "rows" 3 (D.nRows df)
28+
assertEqual "columns" 2 (D.nColumns df)
29+
30+
-- | Empty input should return a Left.
31+
fromCsvEmpty :: Test
32+
fromCsvEmpty = TestLabel "fromCsv_empty" $ TestCase $ do
33+
result <- fromCsv ""
34+
case result of
35+
Left _ -> return ()
36+
Right _ -> assertFailure "Expected Left for empty input"
37+
38+
-- | fromCsvBytes happy path.
39+
fromCsvBytesHappyPath :: Test
40+
fromCsvBytesHappyPath = TestLabel "fromCsvBytes_happy_path" $ TestCase $ do
41+
let bs = BL.fromStrict (TE.encodeUtf8 "x,y\n1,2\n3,4\n")
42+
df <- fromCsvBytes bs
43+
assertEqual "rows" 2 (D.nRows df)
44+
assertEqual "columns" 2 (D.nColumns df)
45+
46+
-- | Round trip: toCsv then fromCsv preserves data.
47+
fromCsvRoundTrip :: Test
48+
fromCsvRoundTrip = TestLabel "fromCsv_roundTrip" $ TestCase $ do
49+
let df =
50+
D.fromNamedColumns
51+
[ ("a", DI.fromList @Int [1, 2, 3])
52+
, ("b", DI.fromList @T.Text ["hello", "world", "test"])
53+
]
54+
let csvString = T.unpack (toCsv df)
55+
result <- fromCsv csvString
56+
case result of
57+
Left err -> assertFailure $ "Unexpected Left: " ++ err
58+
Right df' -> do
59+
assertEqual "round trip dimensions" (dataframeDimensions df) (dataframeDimensions df')
60+
assertEqual "round trip data" df df'
61+
62+
-- | Round trip via fromCsvBytes.
63+
fromCsvBytesRoundTrip :: Test
64+
fromCsvBytesRoundTrip = TestLabel "fromCsvBytes_roundTrip" $ TestCase $ do
65+
let df =
66+
D.fromNamedColumns
67+
[ ("x", DI.fromList @Int [10, 20])
68+
, ("y", DI.fromList @Double [1.5, 2.5])
69+
]
70+
let bs = BL.fromStrict (TE.encodeUtf8 (toCsv df))
71+
df' <- fromCsvBytes bs
72+
assertEqual "round trip dimensions" (dataframeDimensions df) (dataframeDimensions df')
73+
74+
-- | Single column CSV.
75+
fromCsvSingleColumn :: Test
76+
fromCsvSingleColumn = TestLabel "fromCsv_single_column" $ TestCase $ do
77+
result <- fromCsv "id\n10\n20\n30\n"
78+
case result of
79+
Left err -> assertFailure $ "Unexpected Left: " ++ err
80+
Right df -> do
81+
assertEqual "rows" 3 (D.nRows df)
82+
assertEqual "columns" 1 (D.nColumns df)
83+
84+
tests :: [Test]
85+
tests =
86+
[ fromCsvHappyPath
87+
, fromCsvEmpty
88+
, fromCsvBytesHappyPath
89+
, fromCsvRoundTrip
90+
, fromCsvBytesRoundTrip
91+
, fromCsvSingleColumn
92+
]

tests/Main.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import Test.QuickCheck
1111

1212
import qualified DecisionTree
1313
import qualified Functions
14+
import qualified IO.CSV
1415
import qualified IO.JSON
1516
import qualified Internal.Parsing
1617
import qualified LazyParquet
@@ -62,6 +63,7 @@ tests =
6263
++ Operations.Take.tests
6364
++ Operations.Typing.tests
6465
++ Functions.tests
66+
++ IO.CSV.tests
6567
++ IO.JSON.tests
6668
++ Parquet.tests
6769
++ LazyParquet.tests

0 commit comments

Comments
 (0)