Skip to content
Open
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 57 additions & 40 deletions scls-util/src/Cardano/SCLS/Util/Tool.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE RecordWildCards #-}

-- | File manipulation utilities for SCLS files.
module Cardano.SCLS.Util.Tool (splitFile, mergeFiles, extract, ExtractOptions (..)) where

import Cardano.SCLS.Internal.Reader
Expand All @@ -13,9 +14,10 @@ import Cardano.Types.Namespace (Namespace (..))
import Cardano.Types.Namespace qualified as Namespace
import Cardano.Types.Network (NetworkId (Mainnet))
import Cardano.Types.SlotNo (SlotNo (SlotNo))
import Control.Exception (SomeException, catch)
import Control.Exception (SomeException, bracket, catch)
import Control.Monad (foldM)
import Data.Function ((&))
import Data.Map (Map)
import Data.Map.Strict qualified as Map
import Data.MemPack.Extra
import Streaming qualified as S
Expand All @@ -24,6 +26,10 @@ import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>))
import System.IO

{- | Split a single SCLS file into multiple files by namespace.
Takes a source SCLS file and an output directory, and creates separate files
for each namespace found in the source file.
-}
splitFile :: FilePath -> FilePath -> IO Result
splitFile sourceFile outputDir = do
putStrLn $ "Splitting file: " ++ sourceFile
Expand Down Expand Up @@ -54,6 +60,11 @@ splitFile sourceFile outputDir = do
putStrLn $ "Error: " ++ show e
pure OtherError

{- | Merge multiple SCLS files into a single output file.

Takes a list of input files and combines their namespace data into a single
output file.
-}
mergeFiles :: FilePath -> [FilePath] -> IO Result
mergeFiles _ [] = do
putStrLn "No source files provided for merging"
Expand All @@ -66,25 +77,21 @@ mergeFiles outputFile sourceFiles = do

putStrLn $ "Found " ++ show (Map.size nsToFiles) ++ " unique namespace(s)"

let stream =
S.each (Map.toList nsToFiles)
& S.mapM_ \(ns, files) -> do
S.each files
& S.mapM
( \file -> do
s <- withNamespacedData @RawBytes file ns $ \s ->
-- eagerly load each stream to avoid issues with file handles
-- FIXME: use a different data structure like Vector
-- FIXME: concerns about loading entire namespace data into memory
S.toList_ s
pure (ns S.:> S.each s)
)

serialize
outputFile
Mainnet
(SlotNo 1)
(defaultSerializationPlan & addChunks stream)
withNamespaceHandles nsToFiles $ \nsHandles -> do
let stream =
S.each nsHandles
& S.mapM_ \(ns, handles) -> do
S.each handles
& S.map
( \handle ->
(ns S.:> namespacedData @RawBytes handle ns)
)

serialize
outputFile
Mainnet
(SlotNo 1)
(defaultSerializationPlan & addChunks stream)

putStrLn "Merge complete"
pure Ok
Expand All @@ -106,11 +113,27 @@ mergeFiles outputFile sourceFiles = do
)
mempty
files
withNamespaceHandles :: Map Namespace [FilePath] -> ([(Namespace, [Handle])] -> IO a) -> IO a
withNamespaceHandles nsToFiles =
bracket
( Map.foldrWithKey
( \ns files acc -> do
handles <- mapM (\file -> openFile file ReadMode) files
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will leak handles in case if an exceptional will arrive in the middle when we opened some handles. There are two ways to resolve that:

  1. build nester brackets with using foldr so every bracket will be closed
  2. use bracketOnError to allocate reasources and close if any error happened (keeping only a single bracket).

(:) (ns, handles) <$> acc
)
(pure [])
nsToFiles
)
(mapM_ (mapM_ hClose . snd))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that Handle uses MVar inside so hClose may be interruptible it means that if asynchronous exception arrives we may close only some of the handles. Having something like hClose x1 finally hClose x2 ... is cleaner way to do it (or mask uninterruptible (I prefer not doing that))


data ExtractOptions = ExtractOptions
{ extractNamespaces :: Maybe [Namespace]
}

{- | Extract specific data from an SCLS file into a new file.
Takes a source SCLS file, an output file, and extraction options specifying
which data to extract.
-}
extract :: FilePath -> FilePath -> ExtractOptions -> IO Result
extract sourceFile outputFile ExtractOptions{..} = do
putStrLn $ "Extracting from file: " ++ sourceFile
Expand All @@ -119,26 +142,20 @@ extract sourceFile outputFile ExtractOptions{..} = do
do
Hdr{..} <- withHeader sourceFile pure

let chunks =
case extractNamespaces of
Nothing -> S.each []
Just nsList ->
S.each nsList
& S.mapM
( \ns -> do
s <- withNamespacedData @RawBytes sourceFile ns $ \s ->
-- eagerly load each stream to avoid issues with file handles
-- FIXME: use a different data structure like Vector
-- FIXME: concerns about loading entire namespace data into memory
S.toList_ s
pure (ns S.:> S.each s)
)

serialize
outputFile
networkId
slotNo
(defaultSerializationPlan & addChunks chunks)
withBinaryFile sourceFile ReadMode \handle -> do
let chunks =
case extractNamespaces of
Nothing -> S.each []
Just nsList ->
S.each nsList
& S.map
(\ns -> (ns S.:> namespacedData @RawBytes handle ns))

serialize
outputFile
networkId
slotNo
(defaultSerializationPlan & addChunks chunks)

pure Ok
\(e :: SomeException) -> do
Expand Down
Loading