Skip to content

Commit d4ac6a5

Browse files
committed
Implement Incremental Path Hashing
- Implement ComputePathHash() and HasPath() using incremental prefix-based hashing. - Improve mounting performance for archives with many nested directories.
1 parent 258f683 commit d4ac6a5

5 files changed

Lines changed: 317 additions & 166 deletions

File tree

lib/node.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,25 @@
1616

1717
#include "node.h"
1818

19+
#include <algorithm>
1920
#include <cassert>
2021
#include <ctime>
2122
#include <memory>
2223
#include <vector>
2324

25+
#include <boost/container_hash/hash.hpp>
26+
2427
#include <zip.h>
2528

2629
#include "error.h"
2730
#include "extra_field.h"
2831

32+
HashedStringView::HashedStringView(std::string_view s) : string(s), hash(0) {
33+
for (const char c : s) {
34+
boost::hash_combine(hash, c);
35+
}
36+
}
37+
2938
std::ostream& operator<<(std::ostream& out, const FileType t) {
3039
switch (t) {
3140
case FileType::BlockDevice:
@@ -244,6 +253,42 @@ std::string Node::GetPath() const {
244253
return path;
245254
}
246255

256+
bool Node::HasPath(std::string_view path) const {
257+
if (!parent) {
258+
return path == "/";
259+
}
260+
261+
if (path.size() != path_length) {
262+
return false;
263+
}
264+
265+
if (!path.ends_with(name)) {
266+
return false;
267+
}
268+
269+
return parent->HasPath(
270+
path.substr(0, path_length - name.size() - (parent->parent ? 1 : 0)));
271+
}
272+
273+
void Node::ComputePathHash() {
274+
path_length = 0;
275+
path_hash = 0;
276+
277+
if (parent) {
278+
path_length = parent->path_length;
279+
path_hash = parent->path_hash;
280+
if (parent->parent) {
281+
++path_length;
282+
boost::hash_combine(path_hash, '/');
283+
}
284+
}
285+
286+
path_length += name.size();
287+
for (const char c : name) {
288+
boost::hash_combine(path_hash, c);
289+
}
290+
}
291+
247292
void Node::AddChild(Node* const child) {
248293
assert(child);
249294
assert(this == child->parent);

lib/node.h

Lines changed: 55 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838

3939
namespace bi = boost::intrusive;
4040

41+
struct HashedStringView {
42+
std::string_view string;
43+
size_t hash;
44+
45+
HashedStringView(std::string_view s);
46+
HashedStringView(std::string_view s, size_t h) : string(s), hash(h) {}
47+
};
48+
4149
enum class FileType : mode_t {
4250
Unknown = 0, // Unknown
4351
BlockDevice = S_IFBLK, // Block-oriented device
@@ -76,8 +84,19 @@ struct Node {
7684
static bool original_permissions;
7785
static ino_t ino_count;
7886

79-
// Reference to the ZIP archive.
80-
zip_t* zip = nullptr;
87+
#ifdef NDEBUG
88+
using LinkMode = bi::link_mode<bi::normal_link>;
89+
#else
90+
using LinkMode = bi::link_mode<bi::safe_link>;
91+
#endif
92+
93+
// --- 16-byte members (Highest alignment) ---
94+
95+
timespec mtime = g_now;
96+
timespec atime = g_now;
97+
timespec ctime = g_now;
98+
99+
// --- 8-byte members (Fixed size) ---
81100

82101
// Index of the entry represented by this node in the ZIP archive, or -1 if it
83102
// is not directly represented in the ZIP archive (like the root directory, or
@@ -86,20 +105,14 @@ struct Node {
86105

87106
// Inode-specific data.
88107
ino_t const ino = ++ino_count;
89-
mutable nlink_t nlink = 1;
90-
mode_t mode = 0;
91-
uid_t uid = g_uid;
92-
gid_t gid = g_gid;
93-
dev_t dev = 0;
94108
zip_uint64_t size = 0;
95-
timespec mtime = g_now;
96-
timespec atime = g_now;
97-
timespec ctime = g_now;
109+
dev_t dev = 0;
98110

99-
// Link target (e.g. for symlinks or hardlinks)
100-
std::string target;
101-
mutable Reader::Ptr cached_reader;
102-
static const blksize_t block_size = 512;
111+
// --- Architecture-dependent members (8 bytes on 64-bit, 4 bytes on 32-bit)
112+
// --- Grouped in even numbers to maintain 8-byte alignment on 32-bit systems.
113+
114+
// Reference to the ZIP archive.
115+
zip_t* zip = nullptr;
103116

104117
// If this Node is a hardlink, this points to the target node.
105118
Node* hardlink_target = nullptr;
@@ -108,23 +121,12 @@ struct Node {
108121
// root directory which has a null parent pointer.
109122
Node* parent = nullptr;
110123

111-
// Name of this node in the context of its parent. This name should be a valid
112-
// and non-empty filename, and it shouldn't contain any '/' separator. The
113-
// only exception is the root directory, which is just named "/".
114-
std::string name;
115-
116-
// Original path as recorded in the ZIP archive. This is used to find hardlink
117-
// targets.
118-
std::string_view original_path;
124+
mutable Reader::Ptr cached_reader;
119125

120-
// Number of entries whose name have initially collided with this node.
121-
int collision_count = 0;
126+
size_t path_length = 0;
127+
size_t path_hash = 0;
122128

123-
#ifdef NDEBUG
124-
using LinkMode = bi::link_mode<bi::normal_link>;
125-
#else
126-
using LinkMode = bi::link_mode<bi::safe_link>;
127-
#endif
129+
// --- Intrusive Hooks (8-24 bytes on 64-bit, 4-12 bytes on 32-bit) ---
128130

129131
// Hook used to index Nodes by parent.
130132
using ByParent = bi::slist_member_hook<LinkMode>;
@@ -141,8 +143,28 @@ struct Node {
141143
Children children;
142144

143145
// Hooks used to index Nodes by full path and by original path.
144-
using ByPath = bi::unordered_set_member_hook<LinkMode, bi::store_hash<true>>;
145-
ByPath by_path, by_original_path;
146+
using ByPath = bi::unordered_set_member_hook<LinkMode, bi::store_hash<false>>;
147+
ByPath by_path;
148+
149+
// --- Remaining members (Strings and 4-byte types) ---
150+
151+
// Name of this node in the context of its parent. This name should be a valid
152+
// and non-empty filename, and it shouldn't contain any '/' separator. The
153+
// only exception is the root directory, which is just named "/".
154+
std::string name;
155+
156+
// Link target (e.g. for symlinks or hardlinks).
157+
std::string target;
158+
159+
uid_t uid = g_uid;
160+
gid_t gid = g_gid;
161+
mode_t mode = 0;
162+
mutable nlink_t nlink = 1;
163+
164+
// Number of entries whose name have initially collided with this node.
165+
int collision_count = 0;
166+
167+
static const blksize_t block_size = 512;
146168

147169
// Methods.
148170
const Node& GetTarget() const {
@@ -158,7 +180,8 @@ struct Node {
158180

159181
// Gets the full absolute path of this node.
160182
std::string GetPath() const;
161-
183+
bool HasPath(std::string_view path) const;
184+
void ComputePathHash();
162185
void AddChild(Node* child);
163186
Node* GetUniqueChildDirectory();
164187

0 commit comments

Comments
 (0)