Skip to content

Commit e0654c9

Browse files
committed
Add support for unhashed directory entries
1 parent 4dec76e commit e0654c9

File tree

6 files changed

+132
-82
lines changed

6 files changed

+132
-82
lines changed

dissect/apfs/apfs.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ def __init__(self, fh: BinaryIO):
2424
self.fh.seek(0)
2525

2626
self.sb = NxSuperblock.from_block(self, 0, self.fh.read(c_apfs.NX_DEFAULT_BLOCK_SIZE))
27-
self.sb = sorted(
28-
[self.sb] + [obj for obj in self.sb.checkpoint_objects if isinstance(obj, NxSuperblock)],
29-
key=lambda obj: obj.xid,
30-
)[-1]
27+
self.sbs = [self.sb] + [obj for obj in self.sb.checkpoint_objects if isinstance(obj, NxSuperblock)]
28+
self.sb = sorted(self.sbs, key=lambda obj: obj.xid)[-1]
3129

3230
@property
3331
def block_size(self) -> int:

dissect/apfs/objects/fs.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from dissect.apfs.objects.btree import BTree
2020
from dissect.apfs.objects.omap import ObjectMap
2121
from dissect.apfs.stream import DecmpfsStream, FileStream
22-
from dissect.apfs.util import cmp_fs, cmp_fs_dir_hash, parse_fs_object_key
22+
from dissect.apfs.util import cmp_fs, cmp_fs_dir, cmp_fs_dir_hash, parse_fs_object_key
2323

2424
if TYPE_CHECKING:
2525
from collections.abc import Iterator
@@ -647,25 +647,25 @@ def paths(self) -> list[str]:
647647
def get(self, name: str) -> DirectoryEntry:
648648
"""Get a directory entry by name."""
649649
if not self.volume.is_case_insensitive and not self.volume.is_normalization_insensitive:
650-
for entry in self.iterdir():
651-
if entry.name == name:
652-
return entry
653-
654-
# Length is not used in the key comparison
655-
name_hash = (
656-
_hash_filename(name, self.volume.is_case_insensitive) << c_apfs.J_DREC_HASH_SHIFT
657-
) & c_apfs.J_DREC_HASH_MASK
658-
# If the volume is case sensitive, we can use the name in the search key for an exact match
659-
# Otherwise, we set it to None to ignore it in the comparison
660-
name_search = None if self.volume.is_case_insensitive else (name.encode() + b"\x00")
650+
# APFS beta's didn't have normalization insensitivity, so when the filesystem is case sensitive
651+
# we can do a simple exact match
652+
key = ((self.oid, c_apfs.APFS_TYPE.DIR_REC.value), name.encode() + b"\x00")
653+
cmp = cmp_fs_dir
654+
else:
655+
# Length is not used in the key comparison
656+
name_hash = (
657+
_hash_filename(name, self.volume.is_case_insensitive) << c_apfs.J_DREC_HASH_SHIFT
658+
) & c_apfs.J_DREC_HASH_MASK
659+
# If the volume is case sensitive, we can use the name in the search key for an exact match
660+
# Otherwise, we set it to None to ignore it in the comparison
661+
name_search = None if self.volume.is_case_insensitive else (name.encode() + b"\x00")
662+
663+
key = ((self.oid, c_apfs.APFS_TYPE.DIR_REC.value), name_hash, name_search)
664+
cmp = cmp_fs_dir_hash
661665

662666
cursor = self.volume.cursor()
663667
try:
664-
cursor.search(
665-
((self.oid, c_apfs.APFS_TYPE.DIR_REC.value), name_hash, name_search),
666-
exact=True,
667-
cmp=cmp_fs_dir_hash,
668-
)
668+
cursor.search(key, exact=True, cmp=cmp)
669669
except KeyError:
670670
raise FileNotFoundError(f"File not found: {name}")
671671

@@ -737,9 +737,7 @@ class DirectoryEntry:
737737

738738
def __init__(self, volume: FS, key: bytes, value: bytes):
739739
self.volume = volume
740-
if self.volume.incompatible_features & (
741-
c_apfs.APFS_INCOMPAT.CASE_INSENSITIVE | c_apfs.APFS_INCOMPAT.NORMALIZATION_INSENSITIVE
742-
):
740+
if self.volume.is_case_insensitive or self.volume.is_normalization_insensitive:
743741
self.key = c_apfs.j_drec_hashed_key(key)
744742
else:
745743
self.key = c_apfs.j_drec_key(key)

dissect/apfs/util.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,24 @@ def cmp_fext(key: tuple[int, int], other: bytes) -> Literal[-1, 0, 1]:
101101
return (logical_addr < other_logical_addr) - (logical_addr > other_logical_addr)
102102

103103

104+
_H = struct.Struct("<H")
105+
106+
107+
def cmp_fs_dir(key: tuple[tuple[int, int], bytes], other: bytes) -> Literal[-1, 0, 1]:
108+
"""Comparison function for FS directory entries."""
109+
# Slightly more unreadable but faster than parsing a struct
110+
obj_id_and_type, name = key
111+
112+
# First compare the j_key portion
113+
if (res := cmp_fs(obj_id_and_type, other[:8])) != 0:
114+
return res
115+
116+
# Then compare the name
117+
(other_name_len,) = _H.unpack_from(other, 8)
118+
other_name = other[10 : 10 + (other_name_len)]
119+
return (name < other_name) - (name > other_name)
120+
121+
104122
_I = struct.Struct("<I")
105123
_J_DREC_LEN_MASK = c_apfs.J_DREC_LEN_MASK
106124

@@ -121,8 +139,8 @@ def cmp_fs_dir_hash(key: tuple[tuple[int, int], int, bytes | None], other: bytes
121139
if (res := (name_hash < other_hash) - (name_hash > other_hash)) != 0:
122140
return res
123141

142+
# Special case for searching without a name
124143
if name is None:
125-
# Special case for searching without a name
126144
return 0
127145

128146
# Finally compare the name
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:a03b2ffedb0fed538e920f668520646a9689c8744e0537554c332c2aefdf8231
3+
size 19574
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:799c576e71c02219448ba7e2888eca6b5c426e80603b93d7fd29bd7c808a510a
3+
size 19378

tests/test_apfs.py

Lines changed: 87 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from dissect.apfs.objects.fs import FS
1515

1616

17-
def _assert_apfs_content(volume: FS) -> None:
17+
def _assert_apfs_content(volume: FS, beta: bool) -> None:
1818
# Root directory
1919
node = volume.get("/")
2020
assert node.name == "root"
@@ -61,20 +61,33 @@ def _assert_apfs_content(volume: FS) -> None:
6161
assert node.is_dir()
6262
assert all(
6363
name in sorted(node.listdir())
64-
for name in [
65-
"compressed-lzfse-fork",
66-
"compressed-lzfse-xattr",
67-
"compressed-lzvn-fork",
68-
"compressed-lzvn-xattr",
69-
"compressed-zlib-fork",
70-
"compressed-zlib-xattr",
71-
"fifo",
72-
"file",
73-
"resourcefork",
74-
"xattr-dir",
75-
"xattr-large",
76-
"xattr-small",
77-
]
64+
for name in (
65+
[
66+
"compressed-zlib-fork",
67+
"compressed-zlib-xattr",
68+
"fifo",
69+
"file",
70+
"resourcefork",
71+
"xattr-dir",
72+
"xattr-large",
73+
"xattr-small",
74+
]
75+
if beta
76+
else [
77+
"compressed-lzfse-fork",
78+
"compressed-lzfse-xattr",
79+
"compressed-lzvn-fork",
80+
"compressed-lzvn-xattr",
81+
"compressed-zlib-fork",
82+
"compressed-zlib-xattr",
83+
"fifo",
84+
"file",
85+
"resourcefork",
86+
"xattr-dir",
87+
"xattr-large",
88+
"xattr-small",
89+
]
90+
)
7891
)
7992

8093
# Regular file
@@ -159,7 +172,9 @@ def _assert_apfs_content(volume: FS) -> None:
159172
assert node.is_file()
160173
assert (
161174
hashlib.sha256(node.xattr["xattr-large"].open().read()).hexdigest()
162-
== "a11c957142c3fd8ebf2bee1ed0cf184a246033a3874d060acd28c319b323466e"
175+
== "dd4e6730520932767ec0a9e33fe19c4ce24399d6eba4ff62f13013c9ed30ef87"
176+
if beta
177+
else "a11c957142c3fd8ebf2bee1ed0cf184a246033a3874d060acd28c319b323466e"
163178
)
164179

165180
# Compressed file method 3 (ZLIB-XATTR)
@@ -182,47 +197,48 @@ def _assert_apfs_content(volume: FS) -> None:
182197
== "5f46d97f947137dcf974fc19914c547acd18fcdb25124c846c1100f8b3fbca5f"
183198
)
184199

185-
# Compressed file method 7 (LZVN-XATTR)
186-
node = volume.get("dir/compressed-lzvn-xattr")
187-
assert node.name == "compressed-lzvn-xattr"
188-
assert node.is_file()
189-
assert node.is_compressed()
190-
assert (
191-
node.open().read()
192-
== b"Compressed data in xattr aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" # noqa: E501
193-
)
194-
195-
# Compressed file method 8 (LZVN-FORK)
196-
node = volume.get("dir/compressed-lzvn-fork")
197-
assert node.name == "compressed-lzvn-fork"
198-
assert node.is_file()
199-
assert node.is_compressed()
200-
assert (
201-
hashlib.sha256(node.open().read()).hexdigest()
202-
== "5f46d97f947137dcf974fc19914c547acd18fcdb25124c846c1100f8b3fbca5f"
203-
)
204-
205-
# Compressed file method 11 (LZFSE-XATTR)
206-
node = volume.get("dir/compressed-lzfse-xattr")
207-
assert node.name == "compressed-lzfse-xattr"
208-
assert node.is_file()
209-
assert node.is_compressed()
210-
assert (
211-
node.open().read()
212-
== b"Compressed data in xattr aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" # noqa: E501
213-
)
214-
215-
# Compressed file method 12 (LZFSE-FORK)
216-
node = volume.get("dir/compressed-lzfse-fork")
217-
assert node.name == "compressed-lzfse-fork"
218-
assert node.is_file()
219-
assert node.is_compressed()
220-
assert (
221-
hashlib.sha256(node.open().read()).hexdigest()
222-
== "5f46d97f947137dcf974fc19914c547acd18fcdb25124c846c1100f8b3fbca5f"
223-
)
200+
if not beta:
201+
# Compressed file method 7 (LZVN-XATTR)
202+
node = volume.get("dir/compressed-lzvn-xattr")
203+
assert node.name == "compressed-lzvn-xattr"
204+
assert node.is_file()
205+
assert node.is_compressed()
206+
assert (
207+
node.open().read()
208+
== b"Compressed data in xattr aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" # noqa: E501
209+
)
210+
211+
# Compressed file method 8 (LZVN-FORK)
212+
node = volume.get("dir/compressed-lzvn-fork")
213+
assert node.name == "compressed-lzvn-fork"
214+
assert node.is_file()
215+
assert node.is_compressed()
216+
assert (
217+
hashlib.sha256(node.open().read()).hexdigest()
218+
== "5f46d97f947137dcf974fc19914c547acd18fcdb25124c846c1100f8b3fbca5f"
219+
)
220+
221+
# Compressed file method 11 (LZFSE-XATTR)
222+
node = volume.get("dir/compressed-lzfse-xattr")
223+
assert node.name == "compressed-lzfse-xattr"
224+
assert node.is_file()
225+
assert node.is_compressed()
226+
assert (
227+
node.open().read()
228+
== b"Compressed data in xattr aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n" # noqa: E501
229+
)
230+
231+
# Compressed file method 12 (LZFSE-FORK)
232+
node = volume.get("dir/compressed-lzfse-fork")
233+
assert node.name == "compressed-lzfse-fork"
234+
assert node.is_file()
235+
assert node.is_compressed()
236+
assert (
237+
hashlib.sha256(node.open().read()).hexdigest()
238+
== "5f46d97f947137dcf974fc19914c547acd18fcdb25124c846c1100f8b3fbca5f"
239+
)
224240

225-
if ".HFS+ Private Directory Data\r" not in volume.get("/").listdir():
241+
if ".HFS+ Private Directory Data\r" not in volume.get("/").listdir() and not beta:
226242
# Special files
227243
node = volume.get("dir/blockdev")
228244
assert node.name == "blockdev"
@@ -294,6 +310,20 @@ def _assert_apfs_content(volume: FS) -> None:
294310
"password",
295311
id="jfs-encrypted",
296312
),
313+
pytest.param(
314+
"_data/case_insensitive_beta.bin.gz",
315+
"Case Insensitive (beta)",
316+
c_apfs.APFS_INCOMPAT.CASE_INSENSITIVE,
317+
None,
318+
id="case-insensitive-beta",
319+
),
320+
pytest.param(
321+
"_data/case_sensitive_beta.bin.gz",
322+
"Case Sensitive (beta)",
323+
c_apfs.APFS_INCOMPAT(0),
324+
None,
325+
id="case-sensitive-beta",
326+
),
297327
],
298328
)
299329
def test_apfs(path: str, name: str, features: c_apfs.APFS_INCOMPAT, password: str | None) -> None:
@@ -310,7 +340,7 @@ def test_apfs(path: str, name: str, features: c_apfs.APFS_INCOMPAT, password: st
310340
assert volume.is_encrypted
311341
volume.unlock(password)
312342

313-
_assert_apfs_content(volume)
343+
_assert_apfs_content(volume, "(beta)" in name)
314344

315345

316346
def test_snapshots() -> None:

0 commit comments

Comments
 (0)