Skip to content

Commit cb226d7

Browse files
committed
Source/common/verifyinghasher: address review feedback
Bundles review-driven changes: - VerifyingHasherCore.mm phase 5 (tail) now returns kIoError when pread yields 0 bytes with cursor_ < total. The previous silent break finalized the SHA-256 over only a prefix of the file and returned kOk / kPagesMismatched, violating the "32-byte SHA-256 of the full file" contract. Matches phases 1 and 3's existing premature-EOF handling. VerifyingHasherCoreTest.mm adds a regression test using a local TruncatedMemoryFileReader that claims a Size() larger than its data to simulate mid-verification truncation. - MemoryFileReader and CountingMemoryFileReader now set errno before returning -1 (EINVAL on negative offset, EIO on MemoryFileReader's synthetic ScheduleErrorOnNextPread() path). FileReader.h's interface documents "-1 = error (errno set)"; FdFileReader satisfied this via pread() itself, but the in-memory readers left errno untouched. - CodeSignatureParserTest.mm's ExtractCsBlob fast-fails when the fat scan finds no matching architecture, rather than falling through into mach_header_64 parsing of the fat header bytes (which could worst-case allocate std::vector<uint8_t> for an arbitrary uint32). - install_libclang_fuzzer.sh derives the resource directory via `clang -print-resource-dir` instead of parsing the human-facing `clang --version` output. Also removes a latent dependency on the /clang/<full-version> -> /clang/<major> compatibility symlink, which isn't created on all Xcode installs. - HashTraits.h's VERIFYINGHASHER_CC_UPDATE_LOOP macro renames its loop-locals from _p/_n/_step_sz/_step/_rc to vh_-prefixed names. All current expansion sites are at block scope where the underscore names are permitted, but at namespace scope they're reserved; the prefix also avoids silent shadowing of caller-side identifiers. - CodeSignatureParser.h drops an unused <vector> include. The header's ParsedCodeDirectory and ParseCodeSignature declarations use std::span / std::string only.
1 parent 1f513f3 commit cb226d7

8 files changed

Lines changed: 97 additions & 24 deletions

File tree

Source/common/verifyinghasher/CodeSignatureParser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ __END_DECLS
2626
#include <span>
2727
#include <string>
2828
#include <string_view>
29-
#include <vector>
3029

3130
namespace santa {
3231

Source/common/verifyinghasher/CodeSignatureParserTest.mm

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,23 @@
6969
#else
7070
cpu_type_t want = CPU_TYPE_X86_64;
7171
#endif
72+
bool matched = false;
7273
for (auto& a : archs) {
7374
cpu_type_t ct = static_cast<cpu_type_t>(OSSwapBigToHostInt32(a.cputype));
7475
if (ct == want) {
7576
slice_off = OSSwapBigToHostInt32(a.offset);
7677
slice_size = OSSwapBigToHostInt32(a.size);
78+
matched = true;
7779
break;
7880
}
7981
}
82+
// Fast-fail: without this, slice_off stays 0 and the load-command
83+
// walk below reads the fat header bytes as a mach_header_64 and
84+
// allocates std::vector<uint8_t> for an arbitrary mh.sizeofcmds.
85+
if (!matched) {
86+
::close(fd);
87+
return {};
88+
}
8089
}
8190
// Walk the slice's load commands to find LC_CODE_SIGNATURE.
8291
struct mach_header_64 mh{};

Source/common/verifyinghasher/CountingMemoryFileReader.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define SANTA_COMMON_VERIFYINGHASHER_COUNTINGMEMORYFILEREADER_H
1717

1818
#include <algorithm>
19+
#include <cerrno>
1920
#include <cstdint>
2021
#include <cstring>
2122
#include <vector>
@@ -34,7 +35,10 @@ class CountingMemoryFileReader : public FileReader {
3435
explicit CountingMemoryFileReader(std::vector<uint8_t> data)
3536
: data_(std::move(data)), reads_(data_.size(), 0) {}
3637
ssize_t Pread(void* buf, size_t len, off_t off) override {
37-
if (off < 0) return -1;
38+
if (off < 0) {
39+
errno = EINVAL;
40+
return -1;
41+
}
3842
if (static_cast<size_t>(off) >= data_.size()) return 0;
3943
size_t n = std::min(len, data_.size() - static_cast<size_t>(off));
4044
std::memcpy(buf, data_.data() + off, n);

Source/common/verifyinghasher/HashTraits.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -48,19 +48,19 @@ namespace detail {
4848
// silently truncate, corrupting the digest. Wrap the underlying CC call in
4949
// a loop that splits inputs into UINT32_MAX-sized chunks. Returns 1 on
5050
// success, 0 on the first underlying failure (matching CC convention).
51-
#define VERIFYINGHASHER_CC_UPDATE_LOOP(cc_update_fn, ctx, data, len) \
52-
do { \
53-
const auto* _p = static_cast<const uint8_t*>(data); \
54-
size_t _n = (len); \
55-
while (_n > 0) { \
56-
const size_t _step_sz = (_n > UINT32_MAX) ? UINT32_MAX : _n; \
57-
const CC_LONG _step = static_cast<CC_LONG>(_step_sz); \
58-
const int _rc = cc_update_fn((ctx), _p, _step); \
59-
if (_rc != 1) return _rc; \
60-
_p += _step; \
61-
_n -= _step_sz; \
62-
} \
63-
return 1; \
51+
#define VERIFYINGHASHER_CC_UPDATE_LOOP(cc_update_fn, ctx, data, len) \
52+
do { \
53+
const auto* vh_p = static_cast<const uint8_t*>(data); \
54+
size_t vh_n = (len); \
55+
while (vh_n > 0) { \
56+
const size_t vh_step_sz = (vh_n > UINT32_MAX) ? UINT32_MAX : vh_n; \
57+
const CC_LONG vh_step = static_cast<CC_LONG>(vh_step_sz); \
58+
const int vh_rc = cc_update_fn((ctx), vh_p, vh_step); \
59+
if (vh_rc != 1) return vh_rc; \
60+
vh_p += vh_step; \
61+
vh_n -= vh_step_sz; \
62+
} \
63+
return 1; \
6464
} while (0)
6565

6666
struct Sha256Algo {

Source/common/verifyinghasher/MemoryFileReader.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define SANTA_COMMON_VERIFYINGHASHER_MEMORYFILEREADER_H
1717

1818
#include <algorithm>
19+
#include <cerrno>
1920
#include <cstdint>
2021
#include <cstring>
2122
#include <vector>
@@ -33,9 +34,13 @@ class MemoryFileReader : public FileReader {
3334
ssize_t Pread(void* buf, size_t len, off_t off) override {
3435
if (fail_next_) {
3536
fail_next_ = false;
37+
errno = EIO;
38+
return -1;
39+
}
40+
if (off < 0) {
41+
errno = EINVAL;
3642
return -1;
3743
}
38-
if (off < 0) return -1;
3944
if (static_cast<size_t>(off) >= data_.size()) return 0;
4045
size_t available = data_.size() - static_cast<size_t>(off);
4146
size_t n = std::min(len, available);

Source/common/verifyinghasher/VerifyingHasherCore.mm

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,15 @@
216216
last_error_ = "pread failed in tail phase";
217217
return Status::kIoError;
218218
}
219-
if (n == 0) break;
219+
// n == 0 with cursor_ < total means the underlying source returned
220+
// EOF before we hit the size fstat reported at Run() entry (e.g.,
221+
// the file was truncated mid-verification). Treat as kIoError —
222+
// same as phase 3 — so the partial digest doesn't get finalized
223+
// and returned as if it covered the full file.
224+
if (n == 0) {
225+
last_error_ = "unexpected EOF in tail phase";
226+
return Status::kIoError;
227+
}
220228
Sha256Traits::Update(&full_ctx_, chunk_buf_.data(), static_cast<size_t>(n));
221229
cursor_ += static_cast<uint64_t>(n);
222230
}

Source/common/verifyinghasher/VerifyingHasherCoreTest.mm

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,32 @@
205205
return std::vector<uint8_t>(bytes.data() + sig_off, bytes.data() + sig_off + sig_size);
206206
}
207207

208+
// Simulates a file truncated between fstat() and a later pread(): Size()
209+
// reports a larger value than the data actually contains, so a read past
210+
// data_.size() returns 0 (EOF) while the verifier still believes there
211+
// are more bytes to consume.
212+
class TruncatedMemoryFileReader : public santa::FileReader {
213+
public:
214+
TruncatedMemoryFileReader(std::vector<uint8_t> data, off_t claimed_size)
215+
: data_(std::move(data)), claimed_size_(claimed_size) {}
216+
ssize_t Pread(void* buf, size_t len, off_t off) override {
217+
if (off < 0) {
218+
errno = EINVAL;
219+
return -1;
220+
}
221+
if (static_cast<size_t>(off) >= data_.size()) return 0;
222+
size_t available = data_.size() - static_cast<size_t>(off);
223+
size_t n = std::min(len, available);
224+
std::memcpy(buf, data_.data() + off, n);
225+
return static_cast<ssize_t>(n);
226+
}
227+
off_t Size() const override { return claimed_size_; }
228+
229+
private:
230+
std::vector<uint8_t> data_;
231+
off_t claimed_size_;
232+
};
233+
208234
} // namespace
209235

210236
@interface VerifyingHasherCoreTest : XCTestCase
@@ -282,6 +308,26 @@ - (void)testIoErrorPropagated {
282308
XCTAssertTrue(v.FullFileDigest().empty());
283309
}
284310

311+
// Phase 5 (tail) used to silently `break` when pread returned 0 with
312+
// cursor_ < total, finalizing the digest over only a prefix of the
313+
// file. The fix classifies that as kIoError, matching phases 1 and 3.
314+
// We simulate the trigger with a reader that claims a larger Size()
315+
// than its actual data — the verifier completes header / cs-blob /
316+
// signed-region phases normally, then the tail drain hits EOF early.
317+
- (void)testTailEofClassifiedAsIoError {
318+
auto bytes = Slurp("/usr/bin/yes");
319+
XCTAssertFalse(bytes.empty());
320+
const off_t real_size = static_cast<off_t>(bytes.size());
321+
TruncatedMemoryFileReader r(std::move(bytes), real_size + 1024 * 1024);
322+
VerifyingHasherCore v(r, kHostArch);
323+
auto s = v.Run();
324+
XCTAssertEqual(s, VerifyingHasherCore::Status::kIoError);
325+
XCTAssertTrue(v.FullFileDigest().empty());
326+
XCTAssertNotEqual(std::string(v.LastError()).find("tail"), std::string::npos,
327+
@"LastError should point at the tail phase: %s",
328+
std::string(v.LastError()).c_str());
329+
}
330+
285331
- (void)testSinglePassInvariant {
286332
auto bytes = Slurp("/usr/bin/yes");
287333
XCTAssertFalse(bytes.empty());

Testing/Fuzzing/install_libclang_fuzzer.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,17 @@ TARBALL_URL="https://github.com/llvm/llvm-project/releases/download/llvmorg-${LL
4343
# Major version only (e.g. "22" from "22.1.4") — matches the lib path inside the tarball.
4444
TARBALL_CLANG_MAJOR="${LLVM_VERSION%%.*}"
4545

46-
# DST_PATH uses Apple Clang's version string (from `clang --version`) because
47-
# the Xcode toolchain directory layout follows Apple's numbering, not LLVM's.
48-
# Only the download URL needs the upstream LLVM version.
49-
CLANG_VERSION=$(clang --version | head -n 1 | cut -d' ' -f 4)
50-
if [[ -z "${CLANG_VERSION}" ]]; then
51-
echo "ERROR: failed to parse Apple Clang version from 'clang --version'" >&2
46+
# Ask clang directly for its resource directory. On Apple toolchains this
47+
# resolves to .../Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/<apple-version>,
48+
# so we avoid parsing `clang --version` — whose human-facing format Apple has
49+
# changed in the past — and we avoid hand-concatenating the Xcode prefix.
50+
# Only the download URL still needs the upstream LLVM version (used above).
51+
RESOURCE_DIR=$(clang -print-resource-dir)
52+
if [[ -z "${RESOURCE_DIR}" ]]; then
53+
echo "ERROR: 'clang -print-resource-dir' returned no output" >&2
5254
exit 1
5355
fi
54-
DST_PATH="$(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/lib/clang/${CLANG_VERSION}/lib/darwin/libclang_rt.fuzzer_osx.a"
56+
DST_PATH="${RESOURCE_DIR}/lib/darwin/libclang_rt.fuzzer_osx.a"
5557

5658
if [ -f "${DST_PATH}" ]; then
5759
echo "libclang_rt.fuzzer_osx.a already present at ${DST_PATH}, nothing to do."

0 commit comments

Comments
 (0)