Skip to content

Commit cda2f59

Browse files
authored
Merge pull request #359 from hssyoo/full-object-checksum
Implement combine function for CRC32
2 parents 44a8fb2 + e410565 commit cda2f59

2 files changed

Lines changed: 155 additions & 0 deletions

File tree

s3transfer/checksums.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
"""
14+
NOTE: All classes and functions in this module are considered private and are
15+
subject to abrupt breaking changes. Please do not use them directly.
16+
"""
17+
18+
19+
def combine_crc32(crc1, crc2, len2):
20+
"""Combine two CRC32 values.
21+
22+
:type crc1: int
23+
:param crc1: Current CRC32 integer value.
24+
25+
:type crc2: int
26+
:param crc2: Second CRC32 integer value to combine.
27+
28+
:type len2: int
29+
:param len2: Length of data that produced `crc2`.
30+
31+
:rtype: int
32+
:returns: Combined CRC32 integer value.
33+
"""
34+
_GF2_DIM = 32
35+
_CRC32_POLY = 0xEDB88320
36+
_MASK_32BIT = 0xFFFFFFFF
37+
38+
def _gf2_matrix_times(mat, vec):
39+
res = 0
40+
idx = 0
41+
while vec != 0:
42+
if vec & 1:
43+
res ^= mat[idx]
44+
vec >>= 1
45+
idx += 1
46+
return res
47+
48+
def _gf2_matrix_square(square, mat):
49+
for n in range(_GF2_DIM):
50+
d = mat[n]
51+
square[n] = _gf2_matrix_times(mat, d)
52+
return square
53+
54+
even = [0] * _GF2_DIM
55+
odd = [0] * _GF2_DIM
56+
57+
if len2 <= 0:
58+
return crc1
59+
60+
odd[0] = _CRC32_POLY
61+
row = 1
62+
for i in range(1, _GF2_DIM):
63+
odd[i] = row
64+
row <<= 1
65+
66+
even = _gf2_matrix_square(even, odd)
67+
odd = _gf2_matrix_square(odd, even)
68+
69+
while len2 != 0:
70+
even = _gf2_matrix_square(even, odd)
71+
if len2 & 1:
72+
crc1 = _gf2_matrix_times(even, crc1)
73+
len2 >>= 1
74+
75+
if len2 == 0:
76+
break
77+
78+
odd = _gf2_matrix_square(odd, even)
79+
if len2 & 1:
80+
crc1 = _gf2_matrix_times(odd, crc1)
81+
len2 >>= 1
82+
83+
return (crc1 ^ crc2) & _MASK_32BIT
84+
85+
86+
_CRC_CHECKSUM_TO_COMBINE_FUNCTION = {
87+
"ChecksumCRC32": combine_crc32,
88+
}

tests/unit/test_checksums.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
import random
14+
from binascii import crc32
15+
16+
from s3transfer.checksums import combine_crc32
17+
18+
19+
class TestCombineCrc32:
20+
def test_combine(self):
21+
for _ in range(100):
22+
data1 = random.randbytes(32)
23+
crc1 = crc32(data1)
24+
data2 = random.randbytes(32)
25+
crc2 = crc32(data2)
26+
serial = crc32(data1 + data2)
27+
combined = combine_crc32(crc1, crc2, len(data2))
28+
assert serial == combined
29+
30+
def test_combine_no_update(self):
31+
data = random.randbytes(32)
32+
init = random.randint(1, 0x80000000)
33+
serial = crc32(data, init)
34+
combined = combine_crc32(init, crc32(data), len(data))
35+
assert serial == combined
36+
37+
def test_combine_many_parts(self):
38+
parts = [f"Part{i}".encode() for i in range(1000)]
39+
40+
serial_crc = crc32(b"".join(parts))
41+
combined_crc = crc32(parts[0])
42+
for i in range(1, len(parts)):
43+
part_crc = crc32(parts[i])
44+
combined_crc = combine_crc32(combined_crc, part_crc, len(parts[i]))
45+
46+
assert combined_crc == serial_crc
47+
48+
def test_combine_associative_property(self):
49+
data_a = b"foo"
50+
data_b = b"bar"
51+
data_c = b"baz"
52+
53+
serial_crc = crc32(data_a + data_b + data_c)
54+
55+
crc_a = crc32(data_a)
56+
crc_b = crc32(data_b)
57+
crc_c = crc32(data_c)
58+
59+
# (a+b) + c
60+
crc_ab = combine_crc32(crc_a, crc_b, len(data_b))
61+
crc_ab_c = combine_crc32(crc_ab, crc_c, len(data_c))
62+
63+
# a + (b+c)
64+
crc_bc = combine_crc32(crc_b, crc_c, len(data_c))
65+
crc_a_bc = combine_crc32(crc_a, crc_bc, len(data_b + data_c))
66+
67+
assert serial_crc == crc_ab_c == crc_a_bc

0 commit comments

Comments
 (0)