Skip to content

[BUG] _arc.TryWriteAsync overwrites and creates empty files based on filetree #618

@Freymaurer

Description

@Freymaurer

Problem

When using _arc.TryWriteAsync it goes as follows:

  1. TryWriteAsync
    member this.TryWriteAsync(arcPath) =
        this.GetWriteContracts()
        |> fullFillContractBatchAsync arcPath
  1. GetWriteContracts; ends by creating ONE writeContract for each file in filesystem! As we read ALL files in root (even .git), this can be a LOT of contracts.
//...

// Iterates over filesystem and creates a write contract for every file. If possible, include DTO.
        // "No idea why we do this, i think this is done to also write .gitkeep files and folders? Then this should be part of getWriteContracts on the class objects.
        // Will keep it for now but this is not DRY" ~Kevin Frey
        _fs.Tree.ToFilePaths(true)
        |> Array.map (fun fp ->
            match Dictionary.tryGet fp filemap with
            | Some (dto,wb) -> Contract.createCreate(fp,dto,wb)
            | None -> Contract.createCreate(fp, DTOType.PlainText)
        )
  1. fullFillContractBatchAsync -> fulfillWriteContractAsync; Will in addition overwrite existing files with empty files? If this is true, this is a CRITICAL bug
let fulfillWriteContractAsync basePath (c : Contract) =
    crossAsync {
        try 
            match c.DTO with
            | Some (DTO.Text t) ->
                let path = ArcPathHelper.combine basePath c.Path
                do! FileSystemHelper.ensureDirectoryOfFileAsync path
                do! FileSystemHelper.writeFileTextAsync path t
                return Ok (c)
            | Some (DTO.Spreadsheet wb) ->
                let path = ArcPathHelper.combine basePath c.Path
                do! FileSystemHelper.ensureDirectoryOfFileAsync path
                do! FileSystemHelper.writeFileXlsxAsync path (wb :?> FsWorkbook)
                return Ok (c)           
            | None ->
                let path = ArcPathHelper.combine basePath c.Path
                do! FileSystemHelper.ensureDirectoryOfFileAsync path
                do! FileSystemHelper.writeFileTextAsync path ""
                return Ok (c)
            | _ -> 
                return Error (sprintf "Contract %s is not an ISA contract" c.Path)
        with
        | e -> return Error (sprintf "Error writing contract %s: %s" c.Path e.Message)
    }
    |> catchWith (fun e -> Error (sprintf "Error writing contract %s: %s" c.Path e.Message)) 

@HLWeil I propose not creating a single write contract for each file in filesystem, and instead only creating targeted contracts for ArcFiles.


Hotfix

@Etschbeijer

To avoid this critical issue in Swate, we will create a function we will use instead of WriteAsync.

type ARC with

    /// This function is a hotfix for ARCtrl bug #XXXX 
    /// copy paste from ARCtrl, except for filesystem iteration
    member this.GetWriteContractsSwate(?skipUpdateFS : bool) =
        if not (defaultArg skipUpdateFS false) then
            this.UpdateFileSystem()
        //let datamapFile = defaultArg datamapFile false
        /// Map containing the fileName and the types for DTOTypes and objects.
        let filemap = System.Collections.Generic.Dictionary<string, DTOType*DTO>()
        
        let investigationConverter = ArcInvestigation.toFsWorkbook
        filemap.Add (InvestigationFileName, (DTOType.ISA_Investigation, investigationConverter this |> box |> DTO.Spreadsheet))
        this.StaticHash <- this.GetLightHashCode()
        this.Studies
        |> Seq.iter (fun s ->
            s.StaticHash <- s.GetLightHashCode()
            filemap.Add (
                Identifier.Study.fileNameFromIdentifier s.Identifier,
                (DTOType.ISA_Study, ArcStudy.toFsWorkbook s |> box |> DTO.Spreadsheet)
            )
            if s.Datamap.IsSome (*&& datamapFile*) then 
                let dm = s.Datamap.Value
                dm.StaticHash <- dm.GetHashCode()
                filemap.Add (
                    Identifier.Study.datamapFileNameFromIdentifier s.Identifier,
                    (DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
                )
                
        )
        this.Assays
        |> Seq.iter (fun a ->
            a.StaticHash <- a.GetLightHashCode()
            filemap.Add (
                Identifier.Assay.fileNameFromIdentifier a.Identifier,
                (DTOType.ISA_Assay, ArcAssay.toFsWorkbook a |> box |> DTO.Spreadsheet))     
            if a.Datamap.IsSome (*&& datamapFile*) then 
                let dm = a.Datamap.Value
                dm.StaticHash <- dm.GetHashCode()
                filemap.Add (
                    Identifier.Assay.datamapFileNameFromIdentifier a.Identifier,
                    (DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
                )
        )
        this.Workflows
        |> Seq.iter (fun w ->
            w.StaticHash <- w.GetLightHashCode()
            filemap.Add (
                Identifier.Workflow.fileNameFromIdentifier w.Identifier,
                (DTOType.ISA_Workflow, ArcWorkflow.toFsWorkbook w |> box |> DTO.Spreadsheet)
            )
            if w.CWLDescription.IsSome then
                failwith "Not implemented yet: CWL description in ARC.GetWriteContracts"
            if w.Datamap.IsSome (*&& datamapFile*) then 
                let dm = w.Datamap.Value
                dm.StaticHash <- dm.GetHashCode()
                filemap.Add (
                    Identifier.Workflow.datamapFileNameFromIdentifier w.Identifier,
                    (DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
                )
        )
        this.Runs
        |> Seq.iter (fun r ->
            r.StaticHash <- r.GetLightHashCode()
            filemap.Add (
                Identifier.Run.fileNameFromIdentifier r.Identifier,
                (DTOType.ISA_Run, ArcRun.toFsWorkbook r |> box |> DTO.Spreadsheet)
            )
            if r.CWLDescription.IsSome then
                failwith "Not implemented yet: CWL description in ARC.GetWriteContracts"
            if r.CWLInput.Count > 0 then
                failwith "Not implemented yet: CWL YAML input in ARC.GetWriteContracts"
            if r.Datamap.IsSome (*&& datamapFile*) then 
                let dm = r.Datamap.Value
                dm.StaticHash <- dm.GetHashCode()
                filemap.Add (
                    Identifier.Run.datamapFileNameFromIdentifier r.Identifier,
                    (DTOType.ISA_Datamap, Spreadsheet.Datamap.toFsWorkbook dm |> box |> DTO.Spreadsheet)
                )
        )

        match this.License with
        | Some l ->
            match l.Type with
            | LicenseContentType.Fulltext ->
                l.StaticHash <- l.GetHashCode()
                filemap.Add (l.Path, (DTOType.PlainText, DTO.Text l.Content))
        | None ->
            ()

        // from here on: pseudocode, goal is to get all dtos from filemap and create contracts only for those! 
        // Check if this also creates .gitkeep files for assays,studies,runs,workflows. If not these must be added by hand
        // Check if it makes sense to use existing function for Assay createWriteContract. Check Contract project for this

        [
            for KeyValuePair(fp, (dto,wb)) in filemap do
                Contract.createCreate(fp,dto,wb))
        ]

    /// This function is a hotfix for ARCtrl bug #XXXX 
    member this.TryWriteAsyncSwate(arcPath) =
        this.GetWriteContracts()
        |> fullFillContractBatchAsync arcPath

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    Status
    No status

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions