Problem
When using _arc.TryWriteAsync it goes as follows:
- TryWriteAsync
member this.TryWriteAsync(arcPath) =
this.GetWriteContracts()
|> fullFillContractBatchAsync arcPath
- GetWriteContracts; ends by creating ONE writeContract for each file in filesystem! As we read ALL files in root (even
.git), this can be a LOT of contracts.
//...
// Iterates over filesystem and creates a write contract for every file. If possible, include DTO.
// "No idea why we do this, i think this is done to also write .gitkeep files and folders? Then this should be part of getWriteContracts on the class objects.
// Will keep it for now but this is not DRY" ~Kevin Frey
_fs.Tree.ToFilePaths(true)
|> Array.map (fun fp ->
match Dictionary.tryGet fp filemap with
| Some (dto,wb) -> Contract.createCreate(fp,dto,wb)
| None -> Contract.createCreate(fp, DTOType.PlainText)
)
- fullFillContractBatchAsync -> fulfillWriteContractAsync; Will in addition overwrite existing files with empty files? If this is true, this is a CRITICAL bug
let fulfillWriteContractAsync basePath (c : Contract) =
crossAsync {
try
match c.DTO with
| Some (DTO.Text t) ->
let path = ArcPathHelper.combine basePath c.Path
do! FileSystemHelper.ensureDirectoryOfFileAsync path
do! FileSystemHelper.writeFileTextAsync path t
return Ok (c)
| Some (DTO.Spreadsheet wb) ->
let path = ArcPathHelper.combine basePath c.Path
do! FileSystemHelper.ensureDirectoryOfFileAsync path
do! FileSystemHelper.writeFileXlsxAsync path (wb :?> FsWorkbook)
return Ok (c)
| None ->
let path = ArcPathHelper.combine basePath c.Path
do! FileSystemHelper.ensureDirectoryOfFileAsync path
do! FileSystemHelper.writeFileTextAsync path ""
return Ok (c)
| _ ->
return Error (sprintf "Contract %s is not an ISA contract" c.Path)
with
| e -> return Error (sprintf "Error writing contract %s: %s" c.Path e.Message)
}
|> catchWith (fun e -> Error (sprintf "Error writing contract %s: %s" c.Path e.Message))
@HLWeil I propose not creating a single write contract for each file in filesystem, and instead only creating targeted contracts for ArcFiles.
Hotfix
@Etschbeijer
To avoid this critical issue in Swate, we will create a function we will use instead of WriteAsync.
type ARC with
/// This function is a hotfix for ARCtrl bug #XXXX
/// copy paste from ARCtrl, except for filesystem iteration
member this.GetWriteContractsSwate(?skipUpdateFS : bool) =
if not (defaultArg skipUpdateFS false) then
this.UpdateFileSystem()
//let datamapFile = defaultArg datamapFile false
/// Map containing the fileName and the types for DTOTypes and objects.
let filemap = System.Collections.Generic.Dictionary<string, DTOType*DTO>()
let investigationConverter = ArcInvestigation.toFsWorkbook
filemap.Add (InvestigationFileName, (DTOType.ISA_Investigation, investigationConverter this |> box |> DTO.Spreadsheet))
this.StaticHash <- this.GetLightHashCode()
this.Studies
|> Seq.iter (fun s ->
s.StaticHash <- s.GetLightHashCode()
filemap.Add (
Identifier.Study.fileNameFromIdentifier s.Identifier,
(DTOType.ISA_Study, ArcStudy.toFsWorkbook s |> box |> DTO.Spreadsheet)
)
if s.Datamap.IsSome (*&& datamapFile*) then
let dm = s.Datamap.Value
dm.StaticHash <- dm.GetHashCode()
filemap.Add (
Identifier.Study.datamapFileNameFromIdentifier s.Identifier,
(DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
)
)
this.Assays
|> Seq.iter (fun a ->
a.StaticHash <- a.GetLightHashCode()
filemap.Add (
Identifier.Assay.fileNameFromIdentifier a.Identifier,
(DTOType.ISA_Assay, ArcAssay.toFsWorkbook a |> box |> DTO.Spreadsheet))
if a.Datamap.IsSome (*&& datamapFile*) then
let dm = a.Datamap.Value
dm.StaticHash <- dm.GetHashCode()
filemap.Add (
Identifier.Assay.datamapFileNameFromIdentifier a.Identifier,
(DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
)
)
this.Workflows
|> Seq.iter (fun w ->
w.StaticHash <- w.GetLightHashCode()
filemap.Add (
Identifier.Workflow.fileNameFromIdentifier w.Identifier,
(DTOType.ISA_Workflow, ArcWorkflow.toFsWorkbook w |> box |> DTO.Spreadsheet)
)
if w.CWLDescription.IsSome then
failwith "Not implemented yet: CWL description in ARC.GetWriteContracts"
if w.Datamap.IsSome (*&& datamapFile*) then
let dm = w.Datamap.Value
dm.StaticHash <- dm.GetHashCode()
filemap.Add (
Identifier.Workflow.datamapFileNameFromIdentifier w.Identifier,
(DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
)
)
this.Runs
|> Seq.iter (fun r ->
r.StaticHash <- r.GetLightHashCode()
filemap.Add (
Identifier.Run.fileNameFromIdentifier r.Identifier,
(DTOType.ISA_Run, ArcRun.toFsWorkbook r |> box |> DTO.Spreadsheet)
)
if r.CWLDescription.IsSome then
failwith "Not implemented yet: CWL description in ARC.GetWriteContracts"
if r.CWLInput.Count > 0 then
failwith "Not implemented yet: CWL YAML input in ARC.GetWriteContracts"
if r.Datamap.IsSome (*&& datamapFile*) then
let dm = r.Datamap.Value
dm.StaticHash <- dm.GetHashCode()
filemap.Add (
Identifier.Run.datamapFileNameFromIdentifier r.Identifier,
(DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
)
)
match this.License with
| Some l ->
match l.Type with
| LicenseContentType.Fulltext ->
l.StaticHash <- l.GetHashCode()
filemap.Add (l.Path, (DTOType.PlainText, DTO.Text l.Content))
| None ->
()
// from here on: pseudocode, goal is to get all dtos from filemap and create contracts only for those!
// Check if this also creates .gitkeep files for assays,studies,runs,workflows. If not these must be added by hand
// Check if it makes sense to use existing function for Assay createWriteContract. Check Contract project for this
[
for KeyValuePair(fp, (dto,wb)) in filemap do
Contract.createCreate(fp,dto,wb))
]
/// This function is a hotfix for ARCtrl bug #XXXX
member this.TryWriteAsyncSwate(arcPath) =
this.GetWriteContracts()
|> fullFillContractBatchAsync arcPath
Problem
When using
_arc.TryWriteAsyncit goes as follows:.git), this can be a LOT of contracts.@HLWeil I propose not creating a single write contract for each file in filesystem, and instead only creating targeted contracts for ArcFiles.
Hotfix
@Etschbeijer
To avoid this critical issue in Swate, we will create a function we will use instead of WriteAsync.