Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 58 additions & 40 deletions scls-util/src/Cardano/SCLS/Util/Tool.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE RecordWildCards #-}

-- | File manipulation utilities for SCLS files.
module Cardano.SCLS.Util.Tool (splitFile, mergeFiles, extract, ExtractOptions (..)) where

import Cardano.SCLS.Internal.Reader
Expand All @@ -13,7 +14,7 @@ import Cardano.Types.Namespace (Namespace (..))
import Cardano.Types.Namespace qualified as Namespace
import Cardano.Types.Network (NetworkId (Mainnet))
import Cardano.Types.SlotNo (SlotNo (SlotNo))
import Control.Exception (SomeException, catch)
import Control.Exception (SomeException, bracket, catch)
import Control.Monad (foldM)
import Data.Function ((&))
import Data.Map.Strict qualified as Map
Expand All @@ -24,6 +25,10 @@ import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>))
import System.IO

{- | Split a single SCLS file into multiple files by namespace.
Takes a source SCLS file and an output directory, and creates separate files
for each namespace found in the source file.
-}
splitFile :: FilePath -> FilePath -> IO Result
splitFile sourceFile outputDir = do
putStrLn $ "Splitting file: " ++ sourceFile
Expand Down Expand Up @@ -54,6 +59,11 @@ splitFile sourceFile outputDir = do
putStrLn $ "Error: " ++ show e
pure OtherError

{- | Merge multiple SCLS files into a single output file.

Takes a list of input files and combines their namespace data into a single
output file.
-}
mergeFiles :: FilePath -> [FilePath] -> IO Result
mergeFiles _ [] = do
putStrLn "No source files provided for merging"
Expand All @@ -66,25 +76,21 @@ mergeFiles outputFile sourceFiles = do

putStrLn $ "Found " ++ show (Map.size nsToFiles) ++ " unique namespace(s)"

let stream =
S.each (Map.toList nsToFiles)
& S.mapM_ \(ns, files) -> do
S.each files
& S.mapM
( \file -> do
s <- withNamespacedData @RawBytes file ns $ \s ->
-- eagerly load each stream to avoid issues with file handles
-- FIXME: use a different data structure like Vector
-- FIXME: concerns about loading entire namespace data into memory
S.toList_ s
pure (ns S.:> S.each s)
)

serialize
outputFile
Mainnet
(SlotNo 1)
(defaultSerializationPlan & addChunks stream)
withNamespaceHandles nsToFiles $ \nsHandles -> do
let stream =
S.each nsHandles
& S.mapM_ \(ns, handles) -> do
S.each handles
& S.map
( \handle ->
(ns S.:> namespacedData @RawBytes handle ns)
)

serialize
outputFile
Mainnet
(SlotNo 1)
(defaultSerializationPlan & addChunks stream)

putStrLn "Merge complete"
pure Ok
Expand All @@ -106,11 +112,29 @@ mergeFiles outputFile sourceFiles = do
)
mempty
files
-- Open file handles for each namespace's files, execute the given action,
-- and ensure all handles are closed afterwards.
withNamespaceHandles :: Map.Map Namespace [FilePath] -> ([(Namespace, [Handle])] -> IO a) -> IO a
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only problem with this approach is that we do open all the handles simultaneously it means that there is an upper bound limit on the number of the namespaces we can handle.

I think it's ok but worth adding a comment

withNamespaceHandles nsToFiles =
bracket
( Map.foldrWithKey
( \ns files acc -> do
handles <- mapM (\file -> openFile file ReadMode) files
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will leak handles in case if an exceptional will arrive in the middle when we opened some handles. There are two ways to resolve that:

  1. build nester brackets with using foldr so every bracket will be closed
  2. use bracketOnError to allocate reasources and close if any error happened (keeping only a single bracket).

(:) (ns, handles) <$> acc
)
(pure [])
nsToFiles
)
(mapM_ (mapM_ hClose . snd))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that Handle uses MVar inside so hClose may be interruptible it means that if asynchronous exception arrives we may close only some of the handles. Having something like hClose x1 finally hClose x2 ... is cleaner way to do it (or mask uninterruptible (I prefer not doing that))


data ExtractOptions = ExtractOptions
{ extractNamespaces :: Maybe [Namespace]
}

{- | Extract specific data from an SCLS file into a new file.
Takes a source SCLS file, an output file, and extraction options specifying
which data to extract.
-}
extract :: FilePath -> FilePath -> ExtractOptions -> IO Result
extract sourceFile outputFile ExtractOptions{..} = do
putStrLn $ "Extracting from file: " ++ sourceFile
Expand All @@ -119,26 +143,20 @@ extract sourceFile outputFile ExtractOptions{..} = do
do
Hdr{..} <- withHeader sourceFile pure

let chunks =
case extractNamespaces of
Nothing -> S.each []
Just nsList ->
S.each nsList
& S.mapM
( \ns -> do
s <- withNamespacedData @RawBytes sourceFile ns $ \s ->
-- eagerly load each stream to avoid issues with file handles
-- FIXME: use a different data structure like Vector
-- FIXME: concerns about loading entire namespace data into memory
S.toList_ s
pure (ns S.:> S.each s)
)

serialize
outputFile
networkId
slotNo
(defaultSerializationPlan & addChunks chunks)
withBinaryFile sourceFile ReadMode \handle -> do
let chunks =
case extractNamespaces of
Nothing -> S.each []
Just nsList ->
S.each nsList
& S.map
(\ns -> (ns S.:> namespacedData @RawBytes handle ns))

serialize
outputFile
networkId
slotNo
(defaultSerializationPlan & addChunks chunks)

pure Ok
\(e :: SomeException) -> do
Expand Down
Loading