Skip to content

Commit f90eea8

Browse files
CopilotCodeLieutenant
authored andcommitted
fix: correct data validation for clustering-row-size < 57
The ValidateData function incorrectly failed validation for clustering-row-size values less than 57, causing false corruption errors when reading data that was written successfully. The bug occurred because: * For sizes >= 24, the size field is stored as int64 (8 bytes) * buf.UnreadByte() was called to rewind the buffer, but only unreads 1 byte * buf.Bytes() then returned incomplete data missing the first 7 bytes of the size field * This caused the comparison with expected data to fail Example error from the issue: ``` data corruption in pk(1882), ck(10): actual value doesn't match expected value: expected: 10500700000000000000000000000000 actual: 500700000000000000000000000000 ``` Fix: * Remove incorrect buf.UnreadByte() call that only unreads 1 byte * Compare original data slice directly with expectedBuf instead of using buf.Bytes()
1 parent b0de8ed commit f90eea8

File tree

2 files changed

+207
-6
lines changed

2 files changed

+207
-6
lines changed

modes.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -314,15 +314,14 @@ func ValidateData(pk, ck int64, data []byte, validateData bool) error {
314314
return errors.Wrap(err, "failed to generate expected data for validation")
315315
}
316316

317-
if err = buf.UnreadByte(); err != nil {
318-
return err
319-
}
320-
321-
if !bytes.Equal(buf.Bytes(), expectedBuf) {
317+
// Compare the original data slice directly, not the buffer's remaining bytes.
318+
// After reading the size field (either int8 or int64), the buffer position has advanced,
319+
// and buf.Bytes() would return incomplete data. We need to compare the full original data.
320+
if !bytes.Equal(data, expectedBuf) {
322321
return errors.Errorf(
323322
"actual value doesn't match expected value:\nexpected: %x\nactual: %x",
324323
expectedBuf,
325-
buf.Bytes(),
324+
data,
326325
)
327326
}
328327

validate_data_size_test.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
package main
2+
3+
import (
4+
"testing"
5+
)
6+
7+
// TestValidateDataWithSmallSizes tests the reported issue where data validation
8+
// fails for clustering-row-size values less than 57.
9+
// This test specifically targets the bug reported in the issue where:
10+
// - Writing with -clustering-row-size 16 succeeds
11+
// - Reading with -clustering-row-size 16 -validate-data fails with corruption errors
12+
func TestValidateDataWithSmallSizes(t *testing.T) {
13+
t.Parallel()
14+
15+
// Test cases covering different size ranges based on the constants:
16+
// - generatedDataHeaderSize = 24
17+
// - generatedDataMinSize = 24 + 33 = 57
18+
testCases := []struct {
19+
name string
20+
size int64
21+
pk int64
22+
ck int64
23+
}{
24+
// Sizes less than header size (< 24) - uses compact format (int8 size + pk^ck)
25+
{name: "size 1", size: 1, pk: 1, ck: 1},
26+
{name: "size 9", size: 9, pk: 100, ck: 200}, // 9 = 1 byte size + 8 bytes pk^ck
27+
{name: "size 10", size: 10, pk: 1882, ck: 10},
28+
{name: "size 16 (from issue)", size: 16, pk: 1882, ck: 10}, // Exact case from the issue
29+
{name: "size 20", size: 20, pk: 500, ck: 700},
30+
{name: "size 23", size: 23, pk: 999, ck: 123},
31+
32+
// Sizes >= header but < min size (24 <= size < 57) - uses full header but no checksum
33+
{name: "size 24 (boundary)", size: 24, pk: 1000, ck: 2000},
34+
{name: "size 30", size: 30, pk: 1500, ck: 2500},
35+
{name: "size 40", size: 40, pk: 2000, ck: 3000},
36+
{name: "size 50", size: 50, pk: 2500, ck: 3500},
37+
{name: "size 56", size: 56, pk: 3000, ck: 4000}, // Just below min size
38+
39+
// Sizes >= min size (>= 57) - uses full header with payload and checksum
40+
{name: "size 57 (boundary)", size: 57, pk: 3500, ck: 4500},
41+
{name: "size 60", size: 60, pk: 4000, ck: 5000},
42+
{name: "size 100", size: 100, pk: 5000, ck: 6000},
43+
}
44+
45+
for _, tc := range testCases {
46+
t.Run(tc.name, func(t *testing.T) {
47+
t.Parallel()
48+
49+
// Generate data with validation enabled
50+
data, err := GenerateData(tc.pk, tc.ck, tc.size, true)
51+
if err != nil {
52+
t.Fatalf("GenerateData failed: %v", err)
53+
}
54+
55+
// Verify the generated data has the correct size
56+
if int64(len(data)) != tc.size {
57+
t.Fatalf("Generated data size %d doesn't match expected size %d", len(data), tc.size)
58+
}
59+
60+
// Validate the data - this should NOT fail
61+
err = ValidateData(tc.pk, tc.ck, data, true)
62+
if err != nil {
63+
t.Errorf("ValidateData failed for size %d: %v\nThis reproduces the issue where sizes < 57 fail validation",
64+
tc.size, err)
65+
}
66+
})
67+
}
68+
}
69+
70+
// TestValidateDataRoundTripAllSizes performs a comprehensive round-trip test
71+
// for all sizes from 1 to 100, ensuring generate/validate cycle works correctly
72+
func TestValidateDataRoundTripAllSizes(t *testing.T) {
73+
t.Parallel()
74+
75+
// Test a range of partition and clustering keys
76+
testKeys := []struct {
77+
pk int64
78+
ck int64
79+
}{
80+
{pk: 0, ck: 0},
81+
{pk: 1, ck: 1},
82+
{pk: 100, ck: 200},
83+
{pk: 1882, ck: 10}, // From the original issue
84+
{pk: 10000, ck: 50000},
85+
{pk: -1, ck: -1}, // Negative values
86+
}
87+
88+
for _, keys := range testKeys {
89+
keys := keys // Capture loop variable
90+
t.Run("", func(t *testing.T) {
91+
t.Parallel()
92+
93+
// Test sizes from 1 to 100
94+
for size := int64(1); size <= 100; size++ {
95+
// Generate data
96+
data, err := GenerateData(keys.pk, keys.ck, size, true)
97+
if err != nil {
98+
t.Fatalf("GenerateData failed for pk=%d, ck=%d, size=%d: %v",
99+
keys.pk, keys.ck, size, err)
100+
}
101+
102+
// Validate data
103+
err = ValidateData(keys.pk, keys.ck, data, true)
104+
if err != nil {
105+
t.Errorf("Round-trip validation failed for pk=%d, ck=%d, size=%d: %v",
106+
keys.pk, keys.ck, size, err)
107+
}
108+
}
109+
})
110+
}
111+
}
112+
113+
// TestValidateDataDetectsCorruption ensures that ValidateData properly detects
114+
// corrupted data for all size ranges
115+
func TestValidateDataDetectsCorruption(t *testing.T) {
116+
t.Parallel()
117+
118+
testCases := []struct {
119+
corruptFunc func([]byte) []byte
120+
name string
121+
expectError string
122+
size int64
123+
pk int64
124+
ck int64
125+
}{
126+
{
127+
name: "corrupt small data - flip bit in xor value",
128+
size: 16,
129+
pk: 100,
130+
ck: 200,
131+
corruptFunc: func(data []byte) []byte {
132+
// Flip a bit in the xor value (byte position 1-8)
133+
if len(data) > 5 {
134+
data[5] ^= 0xFF
135+
}
136+
return data
137+
},
138+
expectError: "actual value doesn't match expected value",
139+
},
140+
{
141+
name: "corrupt medium data - flip bit in zeros",
142+
size: 40,
143+
pk: 500,
144+
ck: 600,
145+
corruptFunc: func(data []byte) []byte {
146+
// Flip a bit in the zero-filled area
147+
if len(data) > 30 {
148+
data[30] = 0xFF
149+
}
150+
return data
151+
},
152+
expectError: "actual value doesn't match expected value",
153+
},
154+
{
155+
name: "corrupt large data - wrong pk",
156+
size: 100,
157+
pk: 1000,
158+
ck: 2000,
159+
corruptFunc: func(data []byte) []byte {
160+
// This will be caught by pk validation
161+
// We can't easily corrupt just pk without regenerating,
162+
// so we'll use a different approach in this test
163+
return data
164+
},
165+
expectError: "", // This test case will use different data
166+
},
167+
}
168+
169+
for _, tc := range testCases {
170+
t.Run(tc.name, func(t *testing.T) {
171+
t.Parallel()
172+
173+
// Generate valid data
174+
data, err := GenerateData(tc.pk, tc.ck, tc.size, true)
175+
if err != nil {
176+
t.Fatalf("GenerateData failed: %v", err)
177+
}
178+
179+
// Corrupt the data if corruption function is provided
180+
if tc.corruptFunc != nil {
181+
data = tc.corruptFunc(data)
182+
}
183+
184+
// For the large data case, test with wrong pk/ck
185+
if tc.size >= generatedDataMinSize && tc.expectError == "" {
186+
// Generate data with different pk
187+
wrongPkData, _ := GenerateData(tc.pk+1, tc.ck, tc.size, true)
188+
err = ValidateData(tc.pk, tc.ck, wrongPkData, true)
189+
if err == nil {
190+
t.Error("ValidateData should detect wrong pk")
191+
}
192+
return
193+
}
194+
195+
// Validate corrupted data - should fail
196+
err = ValidateData(tc.pk, tc.ck, data, true)
197+
if err == nil && tc.expectError != "" {
198+
t.Error("ValidateData should detect corruption but didn't")
199+
}
200+
})
201+
}
202+
}

0 commit comments

Comments
 (0)