Skip to content

Commit e767799

Browse files
authored
Introduce update argument to collect_results! to sync results files. (#286)
* Introduce `update` argument to `collect_results!` to sync results files. This allows `collect_results!` to synchronize an existing results collection with the files it scans. If it encounters a file with a newer `mtime` than the results collection, it updates the entry. If it has entries for which the files are missing it deletes those entries. * Run update tests in temporary directory. * Record and check against individual file s. * Use field in JLD2 file for mtime info instead of df column. * Be conservative when dealing with old result collections. * Update version number and changelog * correct a typo
1 parent 3ac4e8d commit e767799

File tree

4 files changed

+134
-7
lines changed

4 files changed

+134
-7
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# 2.5.0
2+
* Add an `update` option of `collect_results!` allowing the updating of an existing results collection if data files were modified or deleted.
13
# 2.4.1
24
* `savename`'s default options now have `sigdigits = 3` instead of `digits = 3` as stated in the documentation string. This was supposed to happen already since 2.0 but did not because of a bug. (#284)
35
* Any subtypes of `AbstractDict` now work with DrWatson (#283).

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DrWatson"
22
uuid = "634d3b9d-ee7a-5ddf-bec9-22491ea816e1"
33
repo = "https://github.com/JuliaDynamics/DrWatson.jl.git"
4-
version = "2.4.4"
4+
version = "2.5.0"
55

66
[deps]
77
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

src/result_collection.jl

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ See also [`collect_results`](@ref).
3939
* `rpath = nothing` : If not `nothing` stores `relpath(file,rpath)` of result-files
4040
in `df`. By default the absolute path is used.
4141
* `verbose = true` : Print (using `@info`) information about the process.
42+
* `update = false` : Update data from modified files and remove entries for deleted
43+
files.
4244
* `white_list` : List of keys to use from result file. By default
4345
uses all keys from all loaded result-files.
4446
* `black_list = [:gitcommit, :gitpatch, :script]`: List of keys not to include from result-file.
@@ -70,20 +72,37 @@ collect_results!(
7072
joinpath(dirname(folder), "results_$(basename(folder)).jld2"),
7173
folder; kwargs...)
7274

75+
struct InvalidResultsCollection <: Exception
76+
msg::AbstractString
77+
end
78+
showerror(io::IO, e::InvalidResultsCollection) = print(io, e.msg)
79+
7380
function collect_results!(filename, folder;
7481
valid_filetypes = [".bson", "jld", ".jld2"],
7582
subfolders = false,
7683
rpath = nothing,
7784
verbose = true,
85+
update = false,
7886
newfile = false, # keyword only for defining collect_results without !
7987
kwargs...)
8088

8189
if newfile || !isfile(filename)
8290
!newfile && verbose && @info "Starting a new result collection..."
8391
df = DataFrames.DataFrame()
92+
mtimes = Dict{String,Float64}()
8493
else
8594
verbose && @info "Loading existing result collection..."
86-
df = wload(filename)["df"]
95+
data = wload(filename)
96+
df = data["df"]
97+
# Check if we have pre-recorded mtimes (if not this could be because of an old results database).
98+
if "mtime" keys(data)
99+
mtimes = data["mtime"]
100+
else
101+
if update
102+
throw(InvalidResultsCollection("update of existing results collection requested, but no previously recorded modification time found. Likely the existing results collection was produced with an old version of DrWatson. Recomputing the collection solves this problem."))
103+
end
104+
mtimes = nothing
105+
end
87106
end
88107
@info "Scanning folder $folder for result files."
89108

@@ -99,24 +118,66 @@ function collect_results!(filename, folder;
99118
end
100119

101120
n = 0 # new entries added
121+
u = 0 # entries updated
102122
existing_files = "path" in string.(names(df)) ? df[:,:path] : ()
103123
for file allfiles
104124
is_valid_file(file, valid_filetypes) || continue
105125
# maybe use relative path
106126
file = rpath === nothing ? file : relpath(file, rpath)
127+
mtime_file = mtime(file)
128+
replace_entry = false
107129
#already added?
108-
file existing_files && continue
130+
if file existing_files
131+
if !update
132+
continue
133+
end
134+
135+
# Error if file is not in the mtimes database
136+
if file keys(mtimes)
137+
throw(InvalidResultsCollection("existing results correction is corrupt: no `mtime` entry for file $(file) found."))
138+
end
139+
140+
# Skip if mtime is the same as the one previously recorded
141+
if mtimes[file] == mtime_file
142+
continue
143+
end
144+
145+
replace_entry = true
146+
end
147+
148+
# Now update the mtime of the new or modified file
149+
mtimes[file] = mtime_file
109150

110151
data = rpath === nothing ? wload(file) : wload(joinpath(rpath, file))
111152
df_new = to_data_row(data, file; kwargs...)
112153
#add filename
113154
df_new[!, :path] .= file
114-
155+
if replace_entry
156+
# Delete the row with the old data
157+
delete!(df, findfirst((x)->(x.path == file), eachrow(df)))
158+
u += 1
159+
else
160+
n += 1
161+
end
115162
df = merge_dataframes!(df, df_new)
116-
n += 1
117163
end
118-
verbose && @info "Added $n entries."
119-
!newfile && wsave(filename, Dict("df" => df))
164+
if update
165+
# Delete entries with nonexisting files.
166+
idx = findall((x)->(!isfile(x.path)), eachrow(df))
167+
delete!(df, idx)
168+
verbose && @info "Added $n entries. Updated $u entries. Deleted $(length(idx)) entries."
169+
else
170+
verbose && @info "Added $n entries."
171+
end
172+
if !newfile
173+
data = Dict{String,Any}("df" => df)
174+
# mtimes is only `nothing` if we are working with an older collection
175+
# We want to keep it that way, so do not try to create mtimes entry.
176+
if !isnothing(mtimes)
177+
data["mtime"] = mtimes
178+
end
179+
wsave(filename, data)
180+
end
120181
return df
121182
end
122183

test/update_results_tests.jl

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,70 @@ subfolders = true, special_list=special_list, black_list = black_list)
132132
@test sort(names(cres3)) == sort(names(cres2))
133133
@test size(cres3) == size(cres2)
134134

135+
###############################################################################
136+
# test updating feature #
137+
###############################################################################
138+
139+
@testset "Test updating feature $(mtime_info)" for mtime_info in ["with mtime", "without initial update", "without mtime", "with corrupt mtime"]
140+
# Create a temp directory and run the tests, creating files in that folder
141+
# Julia takes care of removing the folder after the function is done.
142+
mktempdir(datadir()) do folder
143+
# Create three data files with slightly different data
144+
d = Dict("idx" => :keep, "b" => "some_value")
145+
fname_keep = joinpath(folder, savename(d, ending, ignores = ("b",)))
146+
DrWatson.wsave(fname_keep, d)
147+
148+
d = Dict("idx" => :delete, "b" => "some_other_value")
149+
fname_delete = joinpath(folder, savename(d, ending, ignores = ("b",)))
150+
DrWatson.wsave(fname_delete, d)
151+
152+
d = Dict("idx" => :to_modify, "b" => "original_value")
153+
fname_modify = joinpath(folder, savename(d, ending, ignores = ("b",)))
154+
DrWatson.wsave(fname_modify, d)
155+
156+
# Collect our "results"
157+
if mtime_info == "without initial update"
158+
# Test this case: https://github.com/JuliaDynamics/DrWatson.jl/pull/286#pullrequestreview-755999610
159+
cres_before = collect_results!(folder; update = false)
160+
else
161+
cres_before = collect_results!(folder; update = true)
162+
end
163+
164+
if mtime_info == "without mtime"
165+
# Leave out the mtime information to simulate old results collection.
166+
wsave(joinpath(dirname(folder), "results_$(basename(folder)).jld2"), Dict("df" => cres_before))
167+
elseif mtime_info == "with corrupt mtime"
168+
# Corrupt mtime information
169+
wsave(joinpath(dirname(folder), "results_$(basename(folder)).jld2"), Dict("df" => cres_before, "mtime" => Dict{String,Float64}()))
170+
else
171+
# Modify one data file
172+
d = Dict("idx" => :to_modify, "b" => "modified_value")
173+
DrWatson.wsave(fname_modify, d)
174+
175+
# Delete another data file
176+
rm(fname_delete)
177+
end
178+
179+
# Collect the "results" again
180+
if (mtime_info == "without mtime") || (mtime_info == "with corrupt mtime")
181+
@test_throws DrWatson.InvalidResultsCollection collect_results!(folder; update = true)
182+
else
183+
cres_after = collect_results!(folder; update = true)
184+
185+
# Compare the before and after - they should differ
186+
@test cres_before[:,[:idx, :b]] != cres_after[:,[:idx, :b]]
187+
# The unmodified entry should be the same
188+
@test ((:keep cres_before.idx) && (:keep cres_after.idx))
189+
# The deleted entry should be gone
190+
@test ((:delete cres_before.idx) && (:delete cres_after.idx))
191+
# The modified entry should differ between before and after
192+
@test cres_before.b[cres_before.idx .== :to_modify][1] == "original_value"
193+
@test cres_after.b[cres_after.idx .== :to_modify][1] == "modified_value"
194+
end
195+
end
196+
end
197+
198+
135199
###############################################################################
136200
# Quickactivate macro #
137201
###############################################################################

0 commit comments

Comments
 (0)