Skip to content

Commit 1123aeb

Browse files
author
Mahesh Pai
committed
Added testcases
1 parent 1b91666 commit 1123aeb

File tree

2 files changed

+190
-1
lines changed

2 files changed

+190
-1
lines changed

common/test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ target_sources(common_test
7575
# now the integration test part
7676
add_executable(integration_test)
7777

78-
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib)
78+
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple quantiles common_test_lib)
7979

8080
set_target_properties(integration_test PROPERTIES
8181
CXX_STANDARD 11
@@ -90,4 +90,5 @@ add_test(
9090
target_sources(integration_test
9191
PRIVATE
9292
integration_test.cpp
93+
deserialize_hardening_test.cpp
9394
)
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <catch2/catch.hpp>
21+
#include <sstream>
22+
#include <vector>
23+
24+
// Include all affected sketch types
25+
#include <quantiles_sketch.hpp>
26+
#include <kll_sketch.hpp>
27+
#include <req_sketch.hpp>
28+
29+
namespace datasketches {
30+
31+
/**
32+
* Test for fix of issue #477:
33+
* BUG: SIGABRT in deserialize(): dereferencing empty std::optional (libc++ verbose_abort)
34+
*
35+
* These tests exercise the actual deserialization code path that contained the bug.
36+
* With buggy code (&*tmp on empty optional) and hardening enabled, these will SIGABRT.
37+
* With fixed code (aligned_storage), these pass normally.
38+
*
39+
* IMPORTANT: These tests actually call deserialize() on multi-item sketches, which
40+
* exercises the buggy code path where min/max are deserialized.
41+
*/
42+
43+
TEST_CASE("quantiles_sketch: deserialize multi-item sketch", "[deserialize_hardening]") {
44+
// Create sketch with multiple items (so min/max are stored in serialization)
45+
quantiles_sketch<double> sketch(128);
46+
for (int i = 0; i < 1000; i++) {
47+
sketch.update(static_cast<double>(i));
48+
}
49+
50+
// Serialize
51+
auto bytes = sketch.serialize();
52+
53+
// Deserialize - WITH BUGGY CODE AND HARDENING, THIS WILL SIGABRT HERE
54+
// The bug is: sd.deserialize(is, &*tmp, 1) where tmp is empty optional
55+
auto sketch2 = quantiles_sketch<double>::deserialize(bytes.data(), bytes.size());
56+
57+
// Verify deserialization worked correctly
58+
REQUIRE(sketch2.get_n() == sketch.get_n());
59+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
60+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
61+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
62+
}
63+
64+
TEST_CASE("quantiles_sketch: deserialize from stream", "[deserialize_hardening]") {
65+
quantiles_sketch<float> sketch(256);
66+
for (int i = 0; i < 2000; i++) {
67+
sketch.update(static_cast<float>(i) * 0.5f);
68+
}
69+
70+
// Serialize to stream
71+
std::stringstream ss;
72+
sketch.serialize(ss);
73+
74+
// Deserialize from stream - exercises the buggy code path
75+
auto sketch2 = quantiles_sketch<float>::deserialize(ss);
76+
77+
REQUIRE(sketch2.get_n() == sketch.get_n());
78+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
79+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
80+
}
81+
82+
TEST_CASE("kll_sketch: deserialize multi-item sketch", "[deserialize_hardening]") {
83+
kll_sketch<float> sketch(200);
84+
for (int i = 0; i < 1500; i++) {
85+
sketch.update(static_cast<float>(i));
86+
}
87+
88+
auto bytes = sketch.serialize();
89+
90+
// Deserialize - exercises buggy &*tmp code path
91+
auto sketch2 = kll_sketch<float>::deserialize(bytes.data(), bytes.size());
92+
93+
REQUIRE(sketch2.get_n() == sketch.get_n());
94+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
95+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
96+
}
97+
98+
TEST_CASE("kll_sketch: deserialize from stream", "[deserialize_hardening]") {
99+
kll_sketch<int> sketch(400);
100+
for (int i = 0; i < 3000; i++) {
101+
sketch.update(i);
102+
}
103+
104+
std::stringstream ss;
105+
sketch.serialize(ss);
106+
107+
// Deserialize from stream
108+
auto sketch2 = kll_sketch<int>::deserialize(ss);
109+
110+
REQUIRE(sketch2.get_n() == sketch.get_n());
111+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
112+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
113+
}
114+
115+
TEST_CASE("req_sketch: deserialize multi-level sketch", "[deserialize_hardening]") {
116+
// REQ sketch only has the bug when num_levels > 1
117+
// We need to add enough items to trigger multiple levels
118+
req_sketch<float> sketch(12);
119+
for (int i = 0; i < 10000; i++) {
120+
sketch.update(static_cast<float>(i));
121+
}
122+
123+
auto bytes = sketch.serialize();
124+
125+
// Deserialize - exercises buggy code path when num_levels > 1
126+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
127+
128+
REQUIRE(sketch2.get_n() == sketch.get_n());
129+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
130+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
131+
}
132+
133+
TEST_CASE("req_sketch: deserialize from stream", "[deserialize_hardening]") {
134+
req_sketch<double> sketch(20);
135+
for (int i = 0; i < 15000; i++) {
136+
sketch.update(static_cast<double>(i) * 0.1);
137+
}
138+
139+
std::stringstream ss;
140+
sketch.serialize(ss);
141+
142+
// Deserialize from stream
143+
auto sketch2 = req_sketch<double>::deserialize(ss);
144+
145+
REQUIRE(sketch2.get_n() == sketch.get_n());
146+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
147+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
148+
}
149+
150+
TEST_CASE("multiple sketch types: stress test", "[deserialize_hardening]") {
151+
SECTION("quantiles with various sizes") {
152+
for (int k : {64, 128, 256}) {
153+
quantiles_sketch<int> sketch(k);
154+
for (int i = 0; i < 5000; i++) {
155+
sketch.update(i);
156+
}
157+
auto bytes = sketch.serialize();
158+
auto sketch2 = quantiles_sketch<int>::deserialize(bytes.data(), bytes.size());
159+
REQUIRE(sketch2.get_n() == sketch.get_n());
160+
}
161+
}
162+
163+
SECTION("kll with various sizes") {
164+
for (int k : {100, 200, 400}) {
165+
kll_sketch<double> sketch(k);
166+
for (int i = 0; i < 4000; i++) {
167+
sketch.update(static_cast<double>(i) / 10.0);
168+
}
169+
auto bytes = sketch.serialize();
170+
auto sketch2 = kll_sketch<double>::deserialize(bytes.data(), bytes.size());
171+
REQUIRE(sketch2.get_n() == sketch.get_n());
172+
}
173+
}
174+
175+
SECTION("req with various sizes") {
176+
for (int k : {12, 20}) {
177+
req_sketch<float> sketch(k);
178+
for (int i = 0; i < 8000; i++) {
179+
sketch.update(static_cast<float>(i));
180+
}
181+
auto bytes = sketch.serialize();
182+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
183+
REQUIRE(sketch2.get_n() == sketch.get_n());
184+
}
185+
}
186+
}
187+
188+
} // namespace datasketches

0 commit comments

Comments
 (0)