Skip to content

Commit b03a2ab

Browse files
authored
impl(storage): fine grained known checksum tracking (#2805)
With this change we track whether the CRC32C checksum, the MD5 hash, or both are set to known values. Once one of them is set to a known value, the value cannot be changed: the corresponding `with_known_*()` function is not available. It is also not possible to revert from known values to computed values. The `compute_*()` functions are not available. As a side effect: the CRC32C checksum is always present, either known or computed. It cannot be removed from the upload. Renamed the (crate private) types from `Precomputed` to `Known*`.
1 parent 0fe7a87 commit b03a2ab

File tree

4 files changed

+306
-102
lines changed

4 files changed

+306
-102
lines changed

src/storage/src/storage/checksum.rs

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@
1515
use crate::model::ObjectChecksums;
1616
use crate::storage::ChecksumMismatch;
1717

18+
pub fn update(known: &mut ObjectChecksums, computed: ObjectChecksums) {
19+
known.crc32c = known.crc32c.or(computed.crc32c);
20+
if known.md5_hash.is_empty() {
21+
known.md5_hash = computed.md5_hash;
22+
}
23+
}
24+
1825
/// Compare the received object checksums vs. the computed checksums.
1926
///
2027
/// If the `crc32c` field is `None`, or the `md5_mash` field is empty, they do
@@ -84,13 +91,40 @@ impl ChecksumEngine for Null {
8491
}
8592
}
8693

87-
/// Assumes the checksums are provided as part of the object metadata.
94+
/// Assumes the CRC32C checksum is known and included in the object metadata.
95+
#[derive(Clone, Debug)]
96+
pub struct KnownCrc32c;
97+
98+
impl sealed::ChecksumEngine for KnownCrc32c {}
99+
100+
impl ChecksumEngine for KnownCrc32c {
101+
fn update(&mut self, _offset: u64, _data: &bytes::Bytes) {}
102+
fn finalize(&self) -> ObjectChecksums {
103+
ObjectChecksums::new()
104+
}
105+
}
106+
107+
/// Assumes the MD5 hash is known and included in the object metadata.
108+
#[derive(Clone, Debug)]
109+
pub struct KnownMd5;
110+
111+
impl sealed::ChecksumEngine for KnownMd5 {}
112+
113+
impl ChecksumEngine for KnownMd5 {
114+
fn update(&mut self, _offset: u64, _data: &bytes::Bytes) {}
115+
fn finalize(&self) -> ObjectChecksums {
116+
ObjectChecksums::new()
117+
}
118+
}
119+
120+
/// Assumes both the CRC32C checksum, and the MD5 checksums are known and
121+
/// included in the object metadata.
88122
#[derive(Clone, Debug)]
89-
pub struct Precomputed;
123+
pub struct Known;
90124

91-
impl sealed::ChecksumEngine for Precomputed {}
125+
impl sealed::ChecksumEngine for Known {}
92126

93-
impl ChecksumEngine for Precomputed {
127+
impl ChecksumEngine for Known {
94128
fn update(&mut self, _offset: u64, _data: &bytes::Bytes) {}
95129
fn finalize(&self) -> ObjectChecksums {
96130
ObjectChecksums::new()
@@ -359,7 +393,7 @@ mod tests {
359393

360394
#[test]
361395
fn precomputed() {
362-
let mut engine = Precomputed;
396+
let mut engine = Known;
363397
engine.update(0, &data());
364398
assert_eq!(engine.finalize(), ObjectChecksums::new());
365399
}

src/storage/src/storage/perform_upload.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
use super::client::{StorageInner, apply_customer_supplied_encryption_headers};
1616
use crate::model::Object;
1717
use crate::retry_policy::ContinueOn308;
18-
use crate::storage::checksum::{ChecksumEngine, ChecksummedSource, Precomputed};
18+
use crate::storage::checksum::{ChecksumEngine, ChecksummedSource, Known};
1919
use crate::storage::client::enc;
2020
use crate::storage::client::info::X_GOOG_API_CLIENT_HEADER;
2121
use crate::storage::v1;

src/storage/src/storage/perform_upload/buffered.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
// limitations under the License.
1414

1515
use super::{
16-
ChecksumEngine, ChecksummedSource, ContinueOn308, Error, IterSource, Object, PerformUpload,
17-
Precomputed, Result, ResumableUploadStatus, StreamingSource, X_GOOG_API_CLIENT_HEADER,
16+
ChecksumEngine, ChecksummedSource, ContinueOn308, Error, IterSource, Known, Object,
17+
PerformUpload, Result, ResumableUploadStatus, StreamingSource, X_GOOG_API_CLIENT_HEADER,
1818
apply_customer_supplied_encryption_headers,
1919
};
2020
use crate::storage::checksum::validate as validate_checksum;
@@ -142,20 +142,15 @@ where
142142
// Use the computed checksum, if any, and if the spec does not have a
143143
// checksum already.
144144
let computed = stream.final_checksum();
145-
let has = self
145+
let current = self
146146
.spec
147147
.resource
148148
.get_or_insert_default()
149149
.checksums
150150
.get_or_insert_default();
151-
if has.crc32c.is_none() {
152-
has.crc32c = computed.crc32c;
153-
}
154-
if has.md5_hash.is_empty() {
155-
has.md5_hash = computed.md5_hash;
156-
}
151+
crate::storage::checksum::update(current, computed);
157152
let upload = PerformUpload {
158-
payload: Arc::new(Mutex::new(ChecksummedSource::new(Precomputed, source))),
153+
payload: Arc::new(Mutex::new(ChecksummedSource::new(Known, source))),
159154
inner: self.inner,
160155
spec: self.spec,
161156
params: self.params,

0 commit comments

Comments
 (0)