Skip to content

Commit 4a551e2

Browse files
committed
Add soft link and external links in JLD2
1 parent f8fefb6 commit 4a551e2

File tree

12 files changed

+732
-196
lines changed

12 files changed

+732
-196
lines changed

CHANGELOG.md

Lines changed: 176 additions & 113 deletions
Large diffs are not rendered by default.

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ makedocs(;
2020
"Internals & Design" => "internals.md",
2121
"HDF5 Compatibility" => "hdf5compat.md",
2222
"Advanced Usage" => "advanced.md",
23+
"Dataset Links" => "external_links.md",
2324
"Legacy" => "legacy.md",
2425
"Troubleshooting" => "troubleshooting.md"
2526
],

docs/src/external_links.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Links in JLD2
2+
3+
JLD2 supports three types of links compatible with the HDF5 specification:
4+
5+
- **Hard Links** (default): Direct pointers to objects within the same file
6+
- **Soft Links**: Path-based symbolic links resolved at access time
7+
- **External Links**: References to objects in different files
8+
9+
## Usage
10+
11+
```@example
12+
using JLD2
13+
jldopen("file.jld2", "w") do f
14+
f["data"] = [1, 2, 3, 4, 5]
15+
16+
# Soft link (within same file)
17+
f["data_alias"] = JLD2.Link("/data")
18+
19+
# External link (to different file)
20+
f["remote_data"] = JLD2.Link("/dataset"; file="other.jld2")
21+
end
22+
23+
# Create external file (before or after)
24+
jldsave("other.jld2"; dataset="external data")
25+
26+
# Access links transparently
27+
jldopen("file.jld2", "r") do f
28+
f["data_alias"] # Returns [1, 2, 3, 4, 5]
29+
f["remote_data"] # Loads from other.jld2
30+
end
31+
```

src/JLD2.jl

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export jldopen, @load, @save, save_object, load_object, jldsave
1010
export Shuffle, Deflate, ZstdFilter
1111

1212
include("types.jl")
13+
include("links.jl")
1314
include("macros_utils.jl")
1415
include("io/mmapio.jl")
1516
include("io/bufferedio.jl")
@@ -59,13 +60,14 @@ mutable struct Group{T}
5960
next_link_offset::Int64
6061
est_num_entries::Int
6162
est_link_name_len::Int
62-
unwritten_links::OrderedDict{String,RelOffset}
63+
unwritten_links::OrderedDict{String,Link}
6364
unwritten_child_groups::OrderedDict{String,Group{T}}
64-
written_links::OrderedDict{String,RelOffset}
65+
written_links::OrderedDict{String,Link}
6566

6667
Group{T}(f; est_num_entries::Int=4, est_link_name_len::Int=8) where T =
6768
new(f, -1, -1, -1, -1, est_num_entries, est_link_name_len,
68-
OrderedDict{String,RelOffset}(), OrderedDict{String,Group{T}}())
69+
OrderedDict{String,Link}(), OrderedDict{String,Group{T}}(),
70+
OrderedDict{String,Link}())
6971

7072
Group{T}(f, last_chunk_start_offset, continuation_message_goes_here,
7173
last_chunk_checksum_offset, next_link_offset,
@@ -180,7 +182,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
180182
parallel_read::Bool=false,
181183
plain::Bool=false
182184
) where T<:Union{Type{IOStream},Type{MmapIO}}
183-
185+
184186
mmaparrays && @warn "mmaparrays keyword is currently ignored" maxlog = 1
185187
filters = Filters.normalize_filters(compress)
186188

@@ -263,10 +265,12 @@ function initialize_fileobject!(f::JLDFile)
263265
end
264266
f.root_group = load_group(f, f.root_group_offset)
265267

266-
types_offset = get(f.root_group.written_links, "_types", UNDEFINED_ADDRESS)
267-
if types_offset != UNDEFINED_ADDRESS
268+
# Use lookup_link directly instead of lookup_offset
269+
types_offset = getoffset(f.root_group, lookup_link(f.root_group, "_types"))
270+
if types_offset !== UNDEFINED_ADDRESS
268271
f.types_group = f.loaded_groups[types_offset] = load_group(f, types_offset)
269-
for (i, offset::RelOffset) in enumerate(values(f.types_group.written_links))
272+
for (i, link) in enumerate(values(f.types_group.written_links))
273+
offset = getoffset(f.types_group, link)
270274
f.datatype_locations[offset] = CommittedDatatype(offset, i)
271275
end
272276
resize!(f.datatypes, length(f.datatype_locations))
@@ -409,6 +413,8 @@ Base.keys(f::JLDFile) = filter!(x->x != "_types", keys(f.root_group))
409413
Base.keytype(f::JLDFile) = String
410414
Base.length(f::Union{JLDFile, Group}) = length(keys(f))
411415

416+
lookup_link(f::JLDFile, name::AbstractString) = lookup_link(f.root_group, name)
417+
412418
Base.get(default::Function, f::Union{JLDFile, Group}, name::AbstractString) =
413419
haskey(f, name) ? f[name] : default()
414420
Base.get(f::Union{JLDFile, Group}, name::AbstractString, default) =

src/explicit_datasets.jl

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,15 +315,15 @@ function get_dataset(g::Group, name::String)
315315
f.n_times_opened == 0 && throw(ArgumentError("file is closed"))
316316

317317
(g, name) = pathize(g, name, false)
318-
roffset = lookup_offset(g, name)
319-
if roffset == UNDEFINED_ADDRESS
320-
if isempty(name)
321-
# this is a group
322-
return get_dataset(f, group_offset(g), g, name)
323-
end
324-
throw(KeyError(name))
318+
319+
if isempty(name)
320+
# this is a group
321+
return get_dataset(f, group_offset(g), g, name)
325322
end
326-
get_dataset(f, roffset, g, name)
323+
324+
link = lookup_link(g, name)
325+
offset = getoffset(g, link)
326+
return get_dataset(f, offset, g, name)
327327
end
328328

329329
function get_dataset(f::JLDFile, offset::RelOffset, g=f.root_group, name="")
@@ -362,8 +362,53 @@ end
362362

363363
# Links
364364
message_size(msg::Pair{String, RelOffset}) = jlsizeof(Val(HmLinkMessage); link_name=msg.first)
365+
message_size(msg::Pair{String, Link}) = message_size_for_link(msg.first, msg.second)
366+
365367
write_header_message(io, f, msg::Pair{String, RelOffset}, _=nothing) =
366368
write_header_message(io, Val(HmLinkMessage); link_name=msg.first, target=msg.second)
369+
write_header_message(io, f, msg::Pair{String, Link}, _=nothing) =
370+
write_link_message(io, msg.first, msg.second)
371+
372+
"""
373+
message_size_for_link(name::String, link::Link) -> Int
374+
375+
Calculate the size of a link message for the given link type.
376+
"""
377+
function message_size_for_link(link_name::String, link::Link)
378+
is_hard_link(link) && return jlsizeof(Val(HmLinkMessage); link_name)
379+
380+
flags = UInt8(0x10 | 0x08 | size_flag(sizeof(link_name)))
381+
if is_soft_link(link)
382+
jlsizeof(Val(HmLinkMessage); link_name, flags, link_type=UInt8(1),
383+
link_info_size=sizeof(link.path), soft_link=UInt8[])
384+
else # external link
385+
jlsizeof(Val(HmLinkMessage); link_name, flags, link_type=UInt8(64),
386+
link_info_size=3+sizeof(link.external_file)+sizeof(link.path),
387+
external_link=UInt8[])
388+
end
389+
end
390+
391+
"""
392+
write_link_message(io, name::String, link::Link)
393+
394+
Write a link message for the given link type to the I/O stream.
395+
"""
396+
function write_link_message(io, link_name::String, link::Link)
397+
if is_hard_link(link)
398+
return write_header_message(io, Val(HmLinkMessage); link_name, target=link.offset)
399+
end
400+
flags = UInt8(0x10 | 0x08 | size_flag(sizeof(link_name)))
401+
if is_soft_link(link)
402+
soft_link = Vector{UInt8}(link.path)
403+
write_header_message(io, Val(HmLinkMessage); link_name, flags, link_type=1, soft_link)
404+
elseif is_external_link(link)
405+
# External link data: two null-terminated strings
406+
external_link = vcat(0x00, Vector{UInt8}(link.external_file), 0x00,
407+
Vector{UInt8}(link.path), 0x00)
408+
write_header_message(io, Val(HmLinkMessage); link_name, flags, link_type=64,
409+
external_link)
410+
end
411+
end
367412

368413
function attach_message(f::JLDFile, offset, messages, wsession=JLDWriteSession();
369414
chunk_start,

0 commit comments

Comments
 (0)