diff --git a/scls-util/src/Cardano/SCLS/Util/Tool.hs b/scls-util/src/Cardano/SCLS/Util/Tool.hs index c0a4d1a..73783bc 100644 --- a/scls-util/src/Cardano/SCLS/Util/Tool.hs +++ b/scls-util/src/Cardano/SCLS/Util/Tool.hs @@ -1,6 +1,7 @@ {-# LANGUAGE BlockArguments #-} {-# LANGUAGE RecordWildCards #-} +-- | File manipulation utilities for SCLS files. module Cardano.SCLS.Util.Tool (splitFile, mergeFiles, extract, ExtractOptions (..)) where import Cardano.SCLS.Internal.Reader @@ -13,7 +14,7 @@ import Cardano.Types.Namespace (Namespace (..)) import Cardano.Types.Namespace qualified as Namespace import Cardano.Types.Network (NetworkId (Mainnet)) import Cardano.Types.SlotNo (SlotNo (SlotNo)) -import Control.Exception (SomeException, catch) +import Control.Exception (SomeException, bracket, catch) import Control.Monad (foldM) import Data.Function ((&)) import Data.Map.Strict qualified as Map @@ -24,6 +25,10 @@ import System.Directory (createDirectoryIfMissing) import System.FilePath (()) import System.IO +{- | Split a single SCLS file into multiple files by namespace. +Takes a source SCLS file and an output directory, and creates separate files +for each namespace found in the source file. +-} splitFile :: FilePath -> FilePath -> IO Result splitFile sourceFile outputDir = do putStrLn $ "Splitting file: " ++ sourceFile @@ -54,6 +59,11 @@ splitFile sourceFile outputDir = do putStrLn $ "Error: " ++ show e pure OtherError +{- | Merge multiple SCLS files into a single output file. + +Takes a list of input files and combines their namespace data into a single +output file. +-} mergeFiles :: FilePath -> [FilePath] -> IO Result mergeFiles _ [] = do putStrLn "No source files provided for merging" @@ -66,25 +76,21 @@ mergeFiles outputFile sourceFiles = do putStrLn $ "Found " ++ show (Map.size nsToFiles) ++ " unique namespace(s)" - let stream = - S.each (Map.toList nsToFiles) - & S.mapM_ \(ns, files) -> do - S.each files - & S.mapM - ( \file -> do - s <- withNamespacedData @RawBytes file ns $ \s -> - -- eagerly load each stream to avoid issues with file handles - -- FIXME: use a different data structure like Vector - -- FIXME: concerns about loading entire namespace data into memory - S.toList_ s - pure (ns S.:> S.each s) - ) - - serialize - outputFile - Mainnet - (SlotNo 1) - (defaultSerializationPlan & addChunks stream) + withNamespaceHandles nsToFiles $ \nsHandles -> do + let stream = + S.each nsHandles + & S.mapM_ \(ns, handles) -> do + S.each handles + & S.map + ( \handle -> + (ns S.:> namespacedData @RawBytes handle ns) + ) + + serialize + outputFile + Mainnet + (SlotNo 1) + (defaultSerializationPlan & addChunks stream) putStrLn "Merge complete" pure Ok @@ -106,11 +112,29 @@ mergeFiles outputFile sourceFiles = do ) mempty files + -- Open file handles for each namespace's files, execute the given action, + -- and ensure all handles are closed afterwards. + withNamespaceHandles :: Map.Map Namespace [FilePath] -> ([(Namespace, [Handle])] -> IO a) -> IO a + withNamespaceHandles nsToFiles = + bracket + ( Map.foldrWithKey + ( \ns files acc -> do + handles <- mapM (\file -> openFile file ReadMode) files + (:) (ns, handles) <$> acc + ) + (pure []) + nsToFiles + ) + (mapM_ (mapM_ hClose . snd)) data ExtractOptions = ExtractOptions { extractNamespaces :: Maybe [Namespace] } +{- | Extract specific data from an SCLS file into a new file. +Takes a source SCLS file, an output file, and extraction options specifying +which data to extract. +-} extract :: FilePath -> FilePath -> ExtractOptions -> IO Result extract sourceFile outputFile ExtractOptions{..} = do putStrLn $ "Extracting from file: " ++ sourceFile @@ -119,26 +143,20 @@ extract sourceFile outputFile ExtractOptions{..} = do do Hdr{..} <- withHeader sourceFile pure - let chunks = - case extractNamespaces of - Nothing -> S.each [] - Just nsList -> - S.each nsList - & S.mapM - ( \ns -> do - s <- withNamespacedData @RawBytes sourceFile ns $ \s -> - -- eagerly load each stream to avoid issues with file handles - -- FIXME: use a different data structure like Vector - -- FIXME: concerns about loading entire namespace data into memory - S.toList_ s - pure (ns S.:> S.each s) - ) - - serialize - outputFile - networkId - slotNo - (defaultSerializationPlan & addChunks chunks) + withBinaryFile sourceFile ReadMode \handle -> do + let chunks = + case extractNamespaces of + Nothing -> S.each [] + Just nsList -> + S.each nsList + & S.map + (\ns -> (ns S.:> namespacedData @RawBytes handle ns)) + + serialize + outputFile + networkId + slotNo + (defaultSerializationPlan & addChunks chunks) pure Ok \(e :: SomeException) -> do