Skip to content

Commit 2ebac48

Browse files
authored
Add option to persist handle to NetCDF files (#31)
* add interface for keeping handles open for faster dataset opening * update tests * test on lts instead of 1.9 * Add dependabot * test 1.10 since workflows are too old
1 parent 7773192 commit 2ebac48

File tree

7 files changed

+91
-27
lines changed

7 files changed

+91
-27
lines changed

.github/dependabot.yml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
2+
version: 2
3+
updates:
4+
- package-ecosystem: "github-actions"
5+
directory: "/" # Location of package manifests
6+
schedule:
7+
interval: "weekly"

.github/workflows/CI.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
version:
16-
- '1.9'
16+
- '1.10'
1717
- '1'
1818
- 'nightly'
1919
os:

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "YAXArrayBase"
22
uuid = "90b8fcef-0c2d-428d-9c56-5f86629e9d14"
33
authors = ["Fabian Gans <[email protected]>"]
4-
version = "0.7.4"
4+
version = "0.7.5"
55

66
[deps]
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"

ext/ArchGDALExt/archgdaldataset.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ function GDALDataset(filename; mode="r")
7171
end
7272
Base.haskey(ds::GDALDataset, k) = in(k, ("X", "Y")) || haskey(ds.bands, k)
7373
#Implement Dataset interface
74-
function YAB.get_var_handle(ds::GDALDataset, name)
74+
function YAB.get_var_handle(ds::GDALDataset, name; persist=true)
7575
if name == "X"
7676
range(ds.trans[1], length = ds.bandsize[1], step = ds.trans[2])
7777
elseif name == "Y"

ext/NetCDFExt.jl

+38-9
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,33 @@ as a data sink:
1515
struct NetCDFDataset
1616
filename::String
1717
mode::UInt16
18+
handle::Base.RefValue{Union{Nothing, NcFile}}
1819
end
19-
NetCDFDataset(filename;mode="r") = mode == "r" ? NetCDFDataset(filename,NC_NOWRITE) : NetCDFDataset(filename,NC_WRITE)
20+
function NetCDFDataset(filename;mode="r")
21+
m = mode == "r" ? NC_NOWRITE : NC_WRITE
22+
NetCDFDataset(filename,m,Ref{Union{Nothing, NcFile}}(nothing))
23+
end
24+
function dsopen(f,ds::NetCDFDataset)
25+
if ds.handle[] === nothing
26+
NetCDF.open(f, ds.filename)
27+
else
28+
f(ds.handle[])
29+
end
30+
end
31+
function YAB.open_dataset_handle(f, ds::NetCDFDataset)
32+
if ds.handle[] === nothing
33+
try
34+
ds.handle[] = NetCDF.open(ds.filename, mode=ds.mode)
35+
f(ds)
36+
finally
37+
ds.handle[]=nothing
38+
end
39+
else
40+
f(ds)
41+
end
42+
end
43+
44+
2045

2146
import .NetCDF: AbstractDiskArray, readblock!, writeblock!, haschunks, eachchunk
2247

@@ -49,15 +74,19 @@ YAB.iscompressed(v::NetCDFVariable) = NetCDF.open(v->v.compress > 0, v.filename,
4974

5075
Base.size(v::NetCDFVariable) = v.size
5176

52-
YAB.get_var_dims(ds::NetCDFDataset,name) = NetCDF.open(v->map(i->i.name,v[name].dim),ds.filename)
53-
YAB.get_varnames(ds::NetCDFDataset) = NetCDF.open(v->collect(keys(v.vars)),ds.filename)
54-
YAB.get_var_attrs(ds::NetCDFDataset, name) = NetCDF.open(v->v[name].atts,ds.filename)
55-
YAB.get_global_attrs(ds::NetCDFDataset) = NetCDF.open(nc->nc.gatts, ds.filename)
56-
function Base.getindex(ds::NetCDFDataset, i)
57-
s,et = NetCDF.open(j->(size(j),eltype(j)),ds.filename,i)
58-
NetCDFVariable{et,length(s)}(ds.filename, i, s)
77+
YAB.get_var_dims(ds::NetCDFDataset,name) = dsopen(v->map(i->i.name,v[name].dim),ds)
78+
YAB.get_varnames(ds::NetCDFDataset) = dsopen(v->collect(keys(v.vars)),ds)
79+
YAB.get_var_attrs(ds::NetCDFDataset, name) = dsopen(v->v[name].atts,ds)
80+
YAB.get_global_attrs(ds::NetCDFDataset) = dsopen(nc->nc.gatts, ds)
81+
function YAB.get_var_handle(ds::NetCDFDataset, i; persist = true)
82+
if persist || ds.handle[] === nothing
83+
s,et = NetCDF.open(j->(size(j),eltype(j)),ds.filename,i)
84+
NetCDFVariable{et,length(s)}(ds.filename, i, s)
85+
else
86+
ds.handle[][i]
87+
end
5988
end
60-
Base.haskey(ds::NetCDFDataset,k) = NetCDF.open(nc->haskey(nc.vars,k),ds.filename)
89+
Base.haskey(ds::NetCDFDataset,k) = dsopen(nc->haskey(nc.vars,k),ds)
6190

6291
function YAB.add_var(p::NetCDFDataset, T::Type, varname, s, dimnames, attr;
6392
chunksize=s, compress = -1)

src/datasets/datasetinterface.jl

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#Functions to be implemented for Dataset sources:
22
"Return a DiskArray handle to a dataset"
3-
get_var_handle(ds, name) = ds[name]
3+
get_var_handle(ds, name; persist=true) = ds[name]
44

55
"Return a list of variable names"
66
function get_varnames end
@@ -18,6 +18,11 @@ function get_global_attrs end
1818
"Initialize and return a handle to a new empty dataset"
1919
function create_empty end
2020

21+
"Apply a function `f` on a dataset `ds` while keeping possible file handles open during the operations"
22+
function open_dataset_handle(f, ds)
23+
f(ds)
24+
end
25+
2126
"""
2227
add_var(ds, T, name, s, dimlist, atts)
2328

test/datasets.jl

+37-14
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,29 @@ h = get_var_handle(ds_nc, "tas")
3838
@test all(isapprox.(h[1:2,1:2], [215.893 217.168; 215.805 217.03]))
3939
@test allow_parallel_write(ds_nc) == false
4040
@test allow_missings(ds_nc) == false
41+
#Repeat the same test with an open get_var_handle
42+
ds_nc2 = YAXArrayBase.to_dataset(p2)
43+
YAXArrayBase.open_dataset_handle(ds_nc2) do ds_nc
44+
@test ds_nc.handle[] !== nothing
45+
vn = get_varnames(ds_nc)
46+
@test sort(vn) == ["area", "lat", "lat_bnds", "lon", "lon_bnds", "msk_rgn",
47+
"plev", "pr", "tas", "time", "time_bnds", "ua"]
48+
@test get_var_dims(ds_nc, "tas") == ["lon", "lat", "time"]
49+
@test get_var_dims(ds_nc, "area") == ["lon", "lat"]
50+
@test get_var_dims(ds_nc, "time") == ["time"]
51+
@test get_var_dims(ds_nc, "time_bnds") == ["bnds", "time"]
52+
@test get_var_attrs(ds_nc,"tas")["long_name"] == "air_temperature"
53+
h1 = get_var_handle(ds_nc, "tas",persist=true)
54+
@test !(h1 isa NetCDF.NcVar)
55+
@test !YAXArrayBase.iscompressed(h1)
56+
@test all(isapprox.(h1[1:2,1:2], [215.893 217.168; 215.805 217.03]))
57+
h2 = get_var_handle(ds_nc, "tas",persist=false)
58+
@test h2 isa NetCDF.NcVar
59+
@test !YAXArrayBase.iscompressed(h2)
60+
@test all(isapprox.(h2[1:2,1:2], [215.893 217.168; 215.805 217.03]))
61+
@test allow_parallel_write(ds_nc) == false
62+
@test allow_missings(ds_nc) == false
63+
end
4164
end
4265

4366
@testset "Reading Zarr" begin
@@ -71,22 +94,22 @@ end
7194
@test allow_missings(ds_tif) == true
7295
end
7396
function test_write(T)
74-
p = tempname()
75-
ds = create_empty(T, p)
76-
add_var(ds, 0.5:1:9.5, "lon", ("lon",), Dict("units"=>"degrees_east"))
77-
add_var(ds, 20:-1.0:1, "lat", ("lat",), Dict("units"=>"degrees_north"))
78-
v = add_var(ds, Float32, "tas", (10,20), ("lon", "lat"), Dict{String,Any}("units"=>"Celsius"))
97+
p = tempname()
98+
ds = create_empty(T, p)
99+
add_var(ds, 0.5:1:9.5, "lon", ("lon",), Dict("units"=>"degrees_east"))
100+
add_var(ds, 20:-1.0:1, "lat", ("lat",), Dict("units"=>"degrees_north"))
101+
v = add_var(ds, Float32, "tas", (10,20), ("lon", "lat"), Dict{String,Any}("units"=>"Celsius"))
79102

80-
v[:,:] = collect(reshape(1:200, 10, 20))
103+
v[:,:] = collect(reshape(1:200, 10, 20))
81104

82-
@test sort(get_varnames(ds)) == ["lat","lon","tas"]
83-
@test get_var_dims(ds, "tas") == ["lon", "lat"]
84-
@test get_var_dims(ds, "lon") == ["lon"]
85-
@test get_var_attrs(ds,"tas")["units"] == "Celsius"
86-
h = get_var_handle(ds, "lon")
87-
@test h[:] == 0.5:1:9.5
88-
v = get_var_handle(ds, "tas")
89-
@test v[1:2,1:2] == [1 11; 2 12]
105+
@test sort(get_varnames(ds)) == ["lat","lon","tas"]
106+
@test get_var_dims(ds, "tas") == ["lon", "lat"]
107+
@test get_var_dims(ds, "lon") == ["lon"]
108+
@test get_var_attrs(ds,"tas")["units"] == "Celsius"
109+
h = get_var_handle(ds, "lon")
110+
@test h[:] == 0.5:1:9.5
111+
v = get_var_handle(ds, "tas")
112+
@test v[1:2,1:2] == [1 11; 2 12]
90113
end
91114

92115
@testset "Writing NetCDF" begin

0 commit comments

Comments
 (0)