|
4 | 4 | use std::io::{ErrorKind, Read}; |
5 | 5 |
|
6 | 6 | use globwalk::fix_glob_pattern; |
7 | | -use hex::ToHex; |
8 | 7 | use ignore::WalkBuilder; |
9 | 8 | use sha1::{Digest, Sha1}; |
10 | 9 | use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, IntoUnix}; |
11 | 10 | use wax::{Glob, Program, any}; |
12 | 11 |
|
13 | 12 | use crate::{Error, GitHashes}; |
14 | 13 |
|
| 14 | +/// Hex-encode a 20-byte SHA1 digest into a stack-allocated buffer, returning |
| 15 | +/// a `&str`. Avoids the heap allocation that `encode_hex::<String>()` performs. |
| 16 | +#[inline] |
| 17 | +fn hex_encode_sha1(digest: &[u8; 20]) -> [u8; 40] { |
| 18 | + const HEX: &[u8; 16] = b"0123456789abcdef"; |
| 19 | + let mut buf = [0u8; 40]; |
| 20 | + for (i, &b) in digest.iter().enumerate() { |
| 21 | + buf[i * 2] = HEX[(b >> 4) as usize]; |
| 22 | + buf[i * 2 + 1] = HEX[(b & 0x0f) as usize]; |
| 23 | + } |
| 24 | + buf |
| 25 | +} |
| 26 | + |
15 | 27 | fn git_like_hash_file(path: &AbsoluteSystemPath) -> Result<String, Error> { |
16 | 28 | let mut hasher = Sha1::new(); |
17 | 29 | let mut f = path.open()?; |
18 | | - // Pre-allocate the buffer based on file metadata to avoid repeated |
19 | | - // reallocations during read_to_end. The +1 accounts for read_to_end's |
20 | | - // probe read that confirms EOF. |
21 | | - let estimated_size = f.metadata().map(|m| m.len() as usize + 1).unwrap_or(0); |
22 | | - let mut buffer = Vec::with_capacity(estimated_size); |
23 | | - // Note that read_to_end reads the target if f is a symlink. Currently, this can |
| 30 | + let metadata = f.metadata()?; |
| 31 | + let file_size = metadata.len(); |
| 32 | + |
| 33 | + // Write the git blob header: "blob <size>\0" |
| 34 | + // This must use the exact byte count that will follow. |
| 35 | + hasher.update(b"blob "); |
| 36 | + hasher.update(file_size.to_string().as_bytes()); |
| 37 | + hasher.update([b'\0']); |
| 38 | + |
| 39 | + // Stream file contents through the hasher using a fixed stack buffer |
| 40 | + // instead of reading the entire file into a heap-allocated Vec. This |
| 41 | + // reduces per-file memory from O(file_size) to O(1) (8 KiB). |
| 42 | + // |
| 43 | + // Note that read reads the target if f is a symlink. Currently, this can |
24 | 44 | // happen when we are hashing a specific set of files, which in turn only |
25 | 45 | // happens for handling dotEnv files. It is likely that in the future we |
26 | 46 | // will want to ensure that the target is better accounted for in the set of |
27 | 47 | // inputs to the task. Manual hashing, as well as global deps and other |
28 | 48 | // places that support globs all ignore symlinks. |
29 | | - let size = f.read_to_end(&mut buffer)?; |
30 | | - hasher.update("blob ".as_bytes()); |
31 | | - hasher.update(size.to_string().as_bytes()); |
32 | | - hasher.update([b'\0']); |
33 | | - hasher.update(buffer.as_slice()); |
34 | | - let result = hasher.finalize(); |
35 | | - Ok(result.encode_hex::<String>()) |
| 49 | + let mut buf = [0u8; 8192]; |
| 50 | + loop { |
| 51 | + let n = f.read(&mut buf)?; |
| 52 | + if n == 0 { |
| 53 | + break; |
| 54 | + } |
| 55 | + hasher.update(&buf[..n]); |
| 56 | + } |
| 57 | + |
| 58 | + let digest: [u8; 20] = hasher.finalize().into(); |
| 59 | + let hex_buf = hex_encode_sha1(&digest); |
| 60 | + // SAFETY: hex_buf contains only ASCII hex characters. |
| 61 | + Ok(unsafe { String::from_utf8_unchecked(hex_buf.to_vec()) }) |
36 | 62 | } |
37 | 63 |
|
38 | 64 | fn to_glob(input: &str) -> Result<Glob<'_>, Error> { |
|
0 commit comments