Skip to content

Commit 2a6dac1

Browse files
committed
refactor(upload): apply review feedback from #145
Four small follow-ups from @dobby-coder's review: 1. Cheap pre-check before reading the chunk body. Mirrors the structural part of `classify_chunk_request` (normal-next OR replay-candidate) and rejects mismatches before consuming up to `chunk_size` bytes, restoring the prior security profile against UUIDs leaked to an attacker. 2. Drop `chunk_sha256` and `chunk_len` from `LastChunkRecord`. Body identity falls out of the rolling-token chain itself: recompute `compute_hash(prev_token, body)` and compare to `response_token`. Length divergence surfaces as a hash mismatch — one less invariant to keep in sync, smaller `FileState`, simpler Phase 2 schema. The length-specific test folds into the body-mismatch test. 3. Promote the duplicated "Cryptify Token header does not match" string to a `TOKEN_MISMATCH_MSG` const referenced by both `check_cryptify_token` (finalize path) and the chunk classifier so the wording can't drift silently between call sites. 4. One fewer `shasum.clone()` — the final responder takes the moved value while `state.cryptify_token` and `last_chunk.response_token` share a single clone. `sha256_of` is removed (sole caller is gone). Tests gain a `last_chunk_for(prev_token, offset, body)` helper that builds a `LastChunkRecord` whose `response_token` is the actual rolling hash, matching the production handler's invariant. Refs #145 review.
1 parent d7c0d54 commit 2a6dac1

2 files changed

Lines changed: 93 additions & 94 deletions

File tree

src/main.rs

Lines changed: 83 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,14 @@ fn compute_hash(cryptify_token: &[u8], data: &[u8]) -> String {
421421
bytes_to_hex(&hash.finalize())
422422
}
423423

424+
/// Wire-level error message for a `CryptifyToken` mismatch. Reused by both
425+
/// `check_cryptify_token` (the finalize path) and the chunk classifier so the
426+
/// message can't drift silently between call sites.
427+
const TOKEN_MISMATCH_MSG: &str = "Cryptify Token header does not match";
428+
424429
fn check_cryptify_token(header: &str, expected: &str) -> Result<(), Error> {
425430
if header != expected {
426-
return Err(Error::BadRequest(Some(
427-
"Cryptify Token header does not match".to_owned(),
428-
)));
431+
return Err(Error::BadRequest(Some(TOKEN_MISMATCH_MSG.to_owned())));
429432
}
430433
Ok(())
431434
}
@@ -470,6 +473,25 @@ async fn upload_chunk(
470473
))));
471474
}
472475

476+
// Cheap pre-check before reading the body, so a leaked UUID can't be
477+
// used to force the server to buffer up to `chunk_size` bytes per
478+
// request just to be rejected. Mirrors the structural part of
479+
// `classify_chunk_request` — we only commit to reading the body when
480+
// the request looks like either a normal next chunk or a candidate
481+
// replay of the last committed chunk.
482+
let is_normal_next = state.uploaded == start && headers.cryptify_token == state.cryptify_token;
483+
let is_replay_candidate = state.last_chunk.as_ref().is_some_and(|last| {
484+
last.prev_uploaded == start && headers.cryptify_token == last.prev_token
485+
});
486+
if !is_normal_next && !is_replay_candidate {
487+
if state.uploaded != start {
488+
return Err(Error::BadRequest(Some(
489+
"Incorrect Content-Range header".to_owned(),
490+
)));
491+
}
492+
return Err(Error::BadRequest(Some(TOKEN_MISMATCH_MSG.to_owned())));
493+
}
494+
473495
let body = data
474496
.open((end - start).bytes())
475497
.into_bytes()
@@ -482,7 +504,7 @@ async fn upload_chunk(
482504

483505
// Three branches: normal next chunk, idempotent retry of the last
484506
// committed chunk, or rejection.
485-
match classify_chunk_request(&state, &headers.cryptify_token, start, end, &body) {
507+
match classify_chunk_request(&state, &headers.cryptify_token, start, &body) {
486508
ChunkClassification::NormalNext => {}
487509
ChunkClassification::ReplayLastChunk(token) => {
488510
drop(state);
@@ -540,16 +562,13 @@ async fn upload_chunk(
540562
.await
541563
.map_err(|_| Error::InternalServerError(Some("Could not write file".to_owned())))?;
542564

543-
let prev_token = headers.cryptify_token.clone();
544-
let chunk_sha256 = sha256_of(&body);
565+
let prev_token = headers.cryptify_token;
545566
let shasum = compute_hash(prev_token.as_bytes(), &body);
546567
state.cryptify_token = shasum.clone();
547568
state.uploaded += end - start;
548569
state.last_chunk = Some(LastChunkRecord {
549570
prev_token,
550571
prev_uploaded: start,
551-
chunk_len: end - start,
552-
chunk_sha256,
553572
response_token: shasum.clone(),
554573
});
555574

@@ -571,17 +590,17 @@ enum ChunkClassification {
571590
/// returns this token to the client without re-writing or double-counting.
572591
ReplayLastChunk(String),
573592
/// Reject the request with this error — the standard 400 you'd get
574-
/// before idempotent-retry support, plus stricter 400s when the request
575-
/// looks like a retry but the body or length diverges (almost certainly
576-
/// a client bug, never accept different bytes for the same offset).
593+
/// before idempotent-retry support, plus a stricter 400 when the
594+
/// request looks like a retry but the body bytes (or their length)
595+
/// diverge from the cached chunk. Never accept different bytes for
596+
/// the same offset.
577597
Reject(Error),
578598
}
579599

580600
fn classify_chunk_request(
581601
state: &FileState,
582602
request_token: &str,
583603
start: u64,
584-
end: u64,
585604
body: &[u8],
586605
) -> ChunkClassification {
587606
if state.uploaded == start && request_token == state.cryptify_token {
@@ -590,17 +609,18 @@ fn classify_chunk_request(
590609

591610
if let Some(last) = state.last_chunk.as_ref() {
592611
if request_token == last.prev_token && start == last.prev_uploaded {
593-
if end - start != last.chunk_len {
594-
return ChunkClassification::Reject(Error::BadRequest(Some(
595-
"Idempotent retry: chunk length differs from the original".to_owned(),
596-
)));
597-
}
598-
if sha256_of(body) != last.chunk_sha256 {
599-
return ChunkClassification::Reject(Error::BadRequest(Some(
600-
"Idempotent retry: body hash differs from the original chunk".to_owned(),
601-
)));
612+
// Recompute the rolling hash over the incoming body. Identity
613+
// is implicit in the rolling-token construction itself: if the
614+
// hash matches `response_token`, the body is byte-identical to
615+
// the original chunk (modulo a SHA-256 collision, which would
616+
// also break the rolling chain). Length divergence surfaces
617+
// here too.
618+
if compute_hash(last.prev_token.as_bytes(), body) == last.response_token {
619+
return ChunkClassification::ReplayLastChunk(last.response_token.clone());
602620
}
603-
return ChunkClassification::ReplayLastChunk(last.response_token.clone());
621+
return ChunkClassification::Reject(Error::BadRequest(Some(
622+
"Idempotent retry: body differs from the original chunk".to_owned(),
623+
)));
604624
}
605625
}
606626

@@ -610,17 +630,7 @@ fn classify_chunk_request(
610630
)));
611631
}
612632

613-
// Right offset but wrong token: the existing `check_cryptify_token`
614-
// wording, so error messages don't change for non-retry callers.
615-
ChunkClassification::Reject(Error::BadRequest(Some(
616-
"Cryptify Token header does not match".to_owned(),
617-
)))
618-
}
619-
620-
fn sha256_of(data: &[u8]) -> [u8; 32] {
621-
let mut hash = sha2::Sha256::new();
622-
hash.update(data);
623-
hash.finalize().into()
633+
ChunkClassification::Reject(Error::BadRequest(Some(TOKEN_MISMATCH_MSG.to_owned())))
624634
}
625635

626636
struct FinalizeHeaders {
@@ -1151,10 +1161,22 @@ mod tests {
11511161
s
11521162
}
11531163

1164+
/// Build a `LastChunkRecord` whose `response_token` correctly encodes
1165+
/// `prev_token + body`, the same construction the production handler
1166+
/// uses. Tests use this so the replay path's hash check passes on a
1167+
/// genuine retry and fails when the body is tampered with.
1168+
fn last_chunk_for(prev_token: &str, prev_uploaded: u64, body: &[u8]) -> LastChunkRecord {
1169+
LastChunkRecord {
1170+
prev_token: prev_token.to_owned(),
1171+
prev_uploaded,
1172+
response_token: compute_hash(prev_token.as_bytes(), body),
1173+
}
1174+
}
1175+
11541176
#[test]
11551177
fn classify_normal_next_chunk() {
11561178
let state = empty_filestate(100, "tok-current");
1157-
match classify_chunk_request(&state, "tok-current", 100, 200, b"chunk") {
1179+
match classify_chunk_request(&state, "tok-current", 100, b"chunk") {
11581180
ChunkClassification::NormalNext => {}
11591181
_ => panic!("expected NormalNext"),
11601182
}
@@ -1163,73 +1185,54 @@ mod tests {
11631185
#[test]
11641186
fn classify_replays_last_chunk_on_matching_retry() {
11651187
let body = b"hello world";
1166-
let last = LastChunkRecord {
1167-
prev_token: "tok-prev".into(),
1168-
prev_uploaded: 100,
1169-
chunk_len: body.len() as u64,
1170-
chunk_sha256: sha256_of(body),
1171-
response_token: "tok-after".into(),
1172-
};
1173-
let state = filestate_with_last_chunk(100 + body.len() as u64, "tok-after", last);
1174-
match classify_chunk_request(&state, "tok-prev", 100, 100 + body.len() as u64, body) {
1175-
ChunkClassification::ReplayLastChunk(t) => assert_eq!(t, "tok-after"),
1188+
let last = last_chunk_for("tok-prev", 100, body);
1189+
let response_token = last.response_token.clone();
1190+
let state = filestate_with_last_chunk(100 + body.len() as u64, &response_token, last);
1191+
match classify_chunk_request(&state, "tok-prev", 100, body) {
1192+
ChunkClassification::ReplayLastChunk(t) => assert_eq!(t, response_token),
11761193
_ => panic!("expected ReplayLastChunk"),
11771194
}
11781195
}
11791196

11801197
#[test]
11811198
fn classify_rejects_retry_with_different_body() {
11821199
let body = b"original";
1183-
let last = LastChunkRecord {
1184-
prev_token: "tok-prev".into(),
1185-
prev_uploaded: 100,
1186-
chunk_len: body.len() as u64,
1187-
chunk_sha256: sha256_of(body),
1188-
response_token: "tok-after".into(),
1189-
};
1190-
let state = filestate_with_last_chunk(100 + body.len() as u64, "tok-after", last);
1200+
let last = last_chunk_for("tok-prev", 100, body);
1201+
let response_token = last.response_token.clone();
1202+
let state = filestate_with_last_chunk(100 + body.len() as u64, &response_token, last);
11911203
let tampered = b"tampered";
1192-
let result = classify_chunk_request(
1193-
&state,
1194-
"tok-prev",
1195-
100,
1196-
100 + tampered.len() as u64,
1197-
tampered,
1198-
);
1204+
let result = classify_chunk_request(&state, "tok-prev", 100, tampered);
11991205
match result {
12001206
ChunkClassification::Reject(Error::BadRequest(Some(msg))) => {
1201-
assert!(msg.contains("body hash"), "got: {}", msg);
1207+
assert!(msg.contains("body differs"), "got: {}", msg);
12021208
}
1203-
_ => panic!("expected BadRequest about body hash"),
1209+
_ => panic!("expected BadRequest about body differs"),
12041210
}
12051211
}
12061212

12071213
#[test]
12081214
fn classify_rejects_retry_with_different_length() {
1215+
// Same prev_token + start, but a shorter body. The recomputed
1216+
// rolling hash won't match, so the body-differs path catches this
1217+
// case too — we no longer need a length-specific record.
12091218
let body = b"original";
1210-
let last = LastChunkRecord {
1211-
prev_token: "tok-prev".into(),
1212-
prev_uploaded: 100,
1213-
chunk_len: body.len() as u64,
1214-
chunk_sha256: sha256_of(body),
1215-
response_token: "tok-after".into(),
1216-
};
1217-
let state = filestate_with_last_chunk(100 + body.len() as u64, "tok-after", last);
1218-
// Same prev_token + start, but length differs from cached record.
1219-
let result = classify_chunk_request(&state, "tok-prev", 100, 100 + 5, b"short");
1219+
let last = last_chunk_for("tok-prev", 100, body);
1220+
let response_token = last.response_token.clone();
1221+
let state = filestate_with_last_chunk(100 + body.len() as u64, &response_token, last);
1222+
let result = classify_chunk_request(&state, "tok-prev", 100, b"short");
12201223
match result {
12211224
ChunkClassification::Reject(Error::BadRequest(Some(msg))) => {
1222-
assert!(msg.contains("chunk length"), "got: {}", msg);
1225+
assert!(msg.contains("body differs"), "got: {}", msg);
12231226
}
1224-
_ => panic!("expected BadRequest about chunk length"),
1227+
_ => panic!("expected BadRequest about body differs"),
12251228
}
12261229
}
12271230

12281231
#[test]
12291232
fn classify_rejects_offset_mismatch_with_no_replay() {
12301233
// No last_chunk recorded → offset mismatch is just the regular 400.
12311234
let state = empty_filestate(100, "tok-current");
1232-
let result = classify_chunk_request(&state, "tok-current", 50, 60, b"abc");
1235+
let result = classify_chunk_request(&state, "tok-current", 50, b"abc");
12331236
match result {
12341237
ChunkClassification::Reject(Error::BadRequest(Some(msg))) => {
12351238
assert_eq!(msg, "Incorrect Content-Range header");
@@ -1241,10 +1244,10 @@ mod tests {
12411244
#[test]
12421245
fn classify_rejects_token_mismatch_at_correct_offset() {
12431246
let state = empty_filestate(100, "tok-current");
1244-
let result = classify_chunk_request(&state, "tok-wrong", 100, 110, b"chunk");
1247+
let result = classify_chunk_request(&state, "tok-wrong", 100, b"chunk");
12451248
match result {
12461249
ChunkClassification::Reject(Error::BadRequest(Some(msg))) => {
1247-
assert_eq!(msg, "Cryptify Token header does not match");
1250+
assert_eq!(msg, TOKEN_MISMATCH_MSG);
12481251
}
12491252
_ => panic!("expected BadRequest about token mismatch"),
12501253
}
@@ -1255,15 +1258,10 @@ mod tests {
12551258
// Last chunk exists but the retry presents a *different* prev_token.
12561259
// Falls through to the regular offset-mismatch rejection.
12571260
let body = b"original";
1258-
let last = LastChunkRecord {
1259-
prev_token: "tok-prev".into(),
1260-
prev_uploaded: 100,
1261-
chunk_len: body.len() as u64,
1262-
chunk_sha256: sha256_of(body),
1263-
response_token: "tok-after".into(),
1264-
};
1265-
let state = filestate_with_last_chunk(100 + body.len() as u64, "tok-after", last);
1266-
let result = classify_chunk_request(&state, "tok-something-else", 100, 108, body);
1261+
let last = last_chunk_for("tok-prev", 100, body);
1262+
let response_token = last.response_token.clone();
1263+
let state = filestate_with_last_chunk(100 + body.len() as u64, &response_token, last);
1264+
let result = classify_chunk_request(&state, "tok-something-else", 100, body);
12671265
match result {
12681266
ChunkClassification::Reject(Error::BadRequest(Some(msg))) => {
12691267
assert_eq!(msg, "Incorrect Content-Range header");

src/store.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,21 @@ pub struct FileState {
5151
/// handler detect a duplicate retry (when the client never saw the
5252
/// previous response): if the request's `CryptifyToken` matches
5353
/// `prev_token` and `Content-Range.start` matches `prev_uploaded`, and
54-
/// the body's SHA-256 matches `chunk_sha256`, the server replays
55-
/// `response_token` instead of advancing the rolling-token chain or
56-
/// double-writing the chunk. `None` until at least one chunk has been
57-
/// successfully committed.
54+
/// recomputing the rolling hash over the incoming body equals
55+
/// `response_token`, the server replays `response_token` instead of
56+
/// advancing the rolling-token chain or double-writing the chunk.
57+
/// `None` until at least one chunk has been successfully committed.
5858
pub last_chunk: Option<LastChunkRecord>,
5959
}
6060

6161
/// Replay record of the most recently committed chunk. See
6262
/// [`FileState::last_chunk`].
63+
///
64+
/// Body identity is checked by recomputing the rolling hash
65+
/// `sha256(prev_token || body)` and comparing against `response_token` —
66+
/// the same construction the rolling-token chain itself relies on, so no
67+
/// separate digest needs to be cached. Length differences also surface as
68+
/// a hash mismatch.
6369
#[derive(Clone, Debug)]
6470
pub struct LastChunkRecord {
6571
/// The `CryptifyToken` the client sent in the chunk PUT — i.e., the
@@ -69,11 +75,6 @@ pub struct LastChunkRecord {
6975
/// `state.uploaded` *before* this chunk was applied — equals the
7076
/// chunk's `Content-Range` start.
7177
pub prev_uploaded: u64,
72-
/// The chunk body length in bytes.
73-
pub chunk_len: u64,
74-
/// SHA-256 of the chunk body. Verified on retry to ensure the client
75-
/// is replaying the same bytes (not a colliding-offset different chunk).
76-
pub chunk_sha256: [u8; 32],
7778
/// The token the server returned in response to the original PUT —
7879
/// i.e., the value of `state.cryptify_token` after this chunk was
7980
/// applied. Replayed verbatim on a detected retry.

0 commit comments

Comments
 (0)