Skip to content

Commit e40abe4

Browse files
authored
improve type stability by avoiding using a Dict{String, Any} as a tree and instead definining proper structs that can be recursive (#188)
1 parent d236fa0 commit e40abe4

File tree

2 files changed

+76
-49
lines changed

2 files changed

+76
-49
lines changed

src/create.jl

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
struct RewriteEntry
2+
hdr::Header
3+
pos::Int64
4+
end
5+
6+
struct RewriteTree
7+
children::Dict{String, Union{RewriteTree, RewriteEntry}}
8+
end
9+
RewriteTree() = RewriteTree(Dict{String, Union{RewriteTree, RewriteEntry}}())
10+
111
function create_tarball(
212
predicate::Function,
313
tar::IO,
@@ -47,42 +57,43 @@ function rewrite_tarball(
4757
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
4858
portable::Bool = false,
4959
)
50-
tree = Dict{String,Any}()
60+
tree = RewriteTree()
5161
read_tarball(predicate, old_tar; buf=buf) do hdr, parts
5262
portable && check_windows_path(hdr.path, parts)
5363
isempty(parts) && return
5464
node = tree
5565
name = pop!(parts)
5666
for part in parts
57-
node′ = get(node, part, nothing)
58-
if !(node′ isa Dict)
59-
node′ = node[part] = Dict{String,Any}()
67+
child = get(node.children, part, nothing)
68+
if !(child isa RewriteTree)
69+
child = node.children[part] = RewriteTree()
6070
end
61-
node = node′
71+
node = child
6272
end
6373
if hdr.type == :hardlink
64-
node′ = tree
74+
linked = tree
6575
for part in split(hdr.link, '/')
66-
node′ = node′[part]
76+
linked = linked.children[part]
6777
end
68-
hdr′ = Header(node′[1], path=hdr.path, mode=hdr.mode)
69-
node[name] = (hdr′, node′[2])
78+
entry = linked::RewriteEntry
79+
hdr′ = Header(entry.hdr, path=hdr.path, mode=hdr.mode)
80+
node.children[name] = RewriteEntry(hdr′, entry.pos)
7081
else
71-
if !(hdr.type == :directory && get(node, name, nothing) isa Dict)
72-
node[name] = (hdr, position(old_tar))
82+
if !(hdr.type == :directory && get(node.children, name, nothing) isa RewriteTree)
83+
node.children[name] = RewriteEntry(hdr, position(old_tar))
7384
end
7485
skip_data(old_tar, hdr.size)
7586
end
7687
end
7788
write_tarball(new_tar, tree, buf=buf) do node, tar_path
78-
if node isa Dict
89+
if node isa RewriteTree
7990
hdr = Header(tar_path, :directory, 0o755, 0, "")
80-
return hdr, node
91+
return hdr, node.children
8192
else
82-
hdr, pos = node
83-
mode = hdr.type == :file && iszero(hdr.mode & 0o100) ? 0o644 : 0o755
84-
hdr′ = Header(hdr; path=tar_path, mode=mode)
85-
data = hdr.type == :directory ? nothing : (old_tar, pos)
93+
entry = node::RewriteEntry
94+
mode = entry.hdr.type == :file && iszero(entry.hdr.mode & 0o100) ? 0o644 : 0o755
95+
hdr′ = Header(entry.hdr; path=tar_path, mode=mode)
96+
data = entry.hdr.type == :directory ? nothing : (old_tar, entry.pos)
8697
return hdr′, data
8798
end
8899
end

src/extract.jl

Lines changed: 48 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
const PathInfo = Union{String, Int64, Symbol}
2+
3+
struct GitLeaf
4+
mode::String
5+
hash::String
6+
end
7+
8+
struct GitTree
9+
children::Dict{String, Union{GitTree, GitLeaf}}
10+
end
11+
GitTree() = GitTree(Dict{String, Union{GitTree, GitLeaf}}())
12+
113
function iterate_headers(
214
callback::Function,
315
tar::IO;
@@ -168,7 +180,7 @@ end
168180

169181
# resolve symlink target or nothing if not valid
170182
function link_target(
171-
paths::Dict{String},
183+
paths::Dict{String, PathInfo},
172184
path::AbstractString,
173185
link::AbstractString,
174186
)
@@ -214,21 +226,21 @@ function git_tree_hash(
214226
buf::Vector{UInt8} = Vector{UInt8}(undef, DEFAULT_BUFFER_SIZE),
215227
) where HashType <: SHA.SHA_CTX
216228
# build tree with leaves for files and symlinks
217-
tree = Dict{String,Any}()
229+
tree = GitTree()
218230
read_tarball(predicate, tar; buf=buf) do hdr, parts
219231
isempty(parts) && return
220232
name = pop!(parts)
221233
node = tree
222234
for part in parts
223-
node′ = get(node, part, nothing)
224-
if !(node′ isa Dict)
225-
node′ = node[part] = Dict{String,Any}()
235+
child = get(node.children, part, nothing)
236+
if !(child isa GitTree)
237+
child = node.children[part] = GitTree()
226238
end
227-
node = node′
239+
node = child
228240
end
229241
if hdr.type == :directory
230-
if !(get(node, name, nothing) isa Dict)
231-
node[name] = Dict{String,Any}()
242+
if !(get(node.children, name, nothing) isa GitTree)
243+
node.children[name] = GitTree()
232244
end
233245
return
234246
elseif hdr.type == :symlink
@@ -238,47 +250,51 @@ function git_tree_hash(
238250
end
239251
elseif hdr.type == :hardlink
240252
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
241-
node′ = tree
253+
linked = tree
242254
for part in split(hdr.link, '/')
243-
node′ = node′[part]
255+
linked = linked.children[part]
244256
end
245-
hash = node′[2] # hash of linked file
257+
hash = (linked::GitLeaf).hash
246258
elseif hdr.type == :file
247259
mode = iszero(hdr.mode & 0o100) ? "100644" : "100755"
248260
hash = git_file_hash(tar, hdr.size, HashType, buf=buf)
249261
else
250262
error("unsupported type for git tree hashing: $(hdr.type)")
251263
end
252-
node[name] = (mode, hash)
264+
node.children[name] = GitLeaf(mode, hash)
253265
end
254266

255267
# prune directories that don't contain any files
256268
if skip_empty
257-
prune_empty!(node::Tuple) = true
258-
function prune_empty!(node::Dict)
259-
filter!(node) do (name, child)
260-
prune_empty!(child)
261-
end
262-
return !isempty(node)
263-
end
264269
prune_empty!(tree)
265270
end
266271

267272
# reduce the tree to a single hash value
268-
hash_tree(node::Tuple) = node
269-
function hash_tree(node::Dict)
270-
by((name, child)) = child isa Dict ? "$name/" : name
271-
hash = git_object_hash("tree", HashType) do io
272-
for (name, child) in sort!(collect(node), by=by)
273-
mode, hash = hash_tree(child)
274-
print(io, mode, ' ', name, '\0')
275-
write(io, hex2bytes(hash))
276-
end
277-
end
278-
return "40000", hash
273+
return hash_git_tree(tree, HashType)[end]
274+
end
275+
276+
prune_empty!(node::GitLeaf) = true
277+
function prune_empty!(node::GitTree)
278+
filter!(node.children) do (name, child)
279+
prune_empty!(child)
279280
end
281+
return !isempty(node.children)
282+
end
280283

281-
return hash_tree(tree)[end]
284+
function hash_git_tree(node::GitLeaf, ::Type{HashType}) where HashType <: SHA.SHA_CTX
285+
return (node.mode, node.hash)
286+
end
287+
288+
function hash_git_tree(node::GitTree, ::Type{HashType}) where HashType <: SHA.SHA_CTX
289+
by((name, child)) = child isa GitTree ? "$name/" : name
290+
hash = git_object_hash("tree", HashType) do io
291+
for (name, child) in sort!(collect(node.children), by=by)
292+
mode, hash = hash_git_tree(child, HashType)
293+
print(io, mode, ' ', name, '\0')
294+
write(io, hex2bytes(hash))
295+
end
296+
end
297+
return ("40000", hash)
282298
end
283299

284300
function git_object_hash(
@@ -350,7 +366,7 @@ function read_tarball(
350366
)
351367
write_skeleton_header(skeleton, buf=buf)
352368
# symbols for path types except symlinks store the link
353-
paths = Dict{String,Any}()
369+
paths = Dict{String, PathInfo}()
354370
globals = Dict{String,String}()
355371
while !eof(tar)
356372
hdr = read_header(tar, globals=globals, buf=buf, tee=skeleton)

0 commit comments

Comments
 (0)