Skip to content

Commit 4f069a3

Browse files
authored
Merge pull request #478 from MaheshGPai/mahesh_pr
BugFix: SIGABRT in quantiles_sketch::deserialize(): dereferencing emp…
2 parents 7548810 + f5fb9d9 commit 4f069a3

File tree

7 files changed

+376
-52
lines changed

7 files changed

+376
-52
lines changed

.github/workflows/hardening.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: libc++ Hardening Tests
2+
3+
on:
4+
push:
5+
branches:
6+
- master
7+
pull_request:
8+
branches:
9+
- master
10+
workflow_dispatch:
11+
12+
env:
13+
BUILD_TYPE: Debug
14+
15+
jobs:
16+
hardening-test:
17+
name: C++17 with libc++ Hardening Mode
18+
runs-on: ubuntu-latest
19+
20+
steps:
21+
- name: Checkout
22+
uses: actions/checkout@v4
23+
with:
24+
submodules: true
25+
persist-credentials: false
26+
27+
- name: Install LLVM and libc++
28+
run: |
29+
# Install LLVM/Clang with libc++
30+
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
31+
sudo add-apt-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
32+
sudo apt-get update
33+
sudo apt-get install -y clang-18 libc++-18-dev libc++abi-18-dev
34+
35+
- name: Configure with C++17 and libc++ hardening
36+
env:
37+
CC: clang-18
38+
CXX: clang++-18
39+
run: |
40+
cmake -B build -S . \
41+
-DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} \
42+
-DCMAKE_CXX_STANDARD=17 \
43+
-DBUILD_TESTS=ON \
44+
-DCMAKE_CXX_FLAGS="-stdlib=libc++ -D_LIBCPP_HARDENING_MODE=_LIBCPP_HARDENING_MODE_DEBUG" \
45+
-DCMAKE_EXE_LINKER_FLAGS="-stdlib=libc++ -lc++abi"
46+
47+
- name: Build hardening tests
48+
run: cmake --build build --target hardening_test --config ${{ env.BUILD_TYPE }}
49+
50+
- name: Run hardening tests
51+
run: |
52+
cd build
53+
./common/test/hardening_test "[deserialize_hardening]"
54+
55+
- name: Report results
56+
if: always()
57+
run: |
58+
echo "✅ Tests passed with libc++ hardening enabled!"
59+
echo "This verifies the fix for issue #477 prevents SIGABRT."

common/test/CMakeLists.txt

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,15 @@ target_sources(common_test
7575
# now the integration test part
7676
add_executable(integration_test)
7777

78-
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib)
78+
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple quantiles common_test_lib)
7979

80+
# Use CMAKE_CXX_STANDARD if defined, otherwise C++11
81+
set(_integration_cxx_standard 11)
82+
if(DEFINED CMAKE_CXX_STANDARD)
83+
set(_integration_cxx_standard ${CMAKE_CXX_STANDARD})
84+
endif()
8085
set_target_properties(integration_test PROPERTIES
81-
CXX_STANDARD 11
86+
CXX_STANDARD ${_integration_cxx_standard}
8287
CXX_STANDARD_REQUIRED YES
8388
)
8489

@@ -91,3 +96,39 @@ target_sources(integration_test
9196
PRIVATE
9297
integration_test.cpp
9398
)
99+
100+
# Separate hardening test executable (header-only, no pre-compiled libs)
101+
# This ensures the sketch code is compiled with C++17 + hardening
102+
# Always build this target - it will use CMAKE_CXX_STANDARD if set (and >= 17), otherwise C++17
103+
104+
add_executable(hardening_test)
105+
target_link_libraries(hardening_test common common_test_lib)
106+
107+
# Include directories for header-only sketch implementations
108+
target_include_directories(hardening_test PRIVATE
109+
${CMAKE_SOURCE_DIR}/quantiles/include
110+
${CMAKE_SOURCE_DIR}/kll/include
111+
${CMAKE_SOURCE_DIR}/req/include
112+
${CMAKE_SOURCE_DIR}/common/include
113+
)
114+
115+
# Use C++17 minimum for hardening tests
116+
set(_hardening_cxx_standard 17)
117+
if(DEFINED CMAKE_CXX_STANDARD AND CMAKE_CXX_STANDARD GREATER_EQUAL 17)
118+
set(_hardening_cxx_standard ${CMAKE_CXX_STANDARD})
119+
endif()
120+
set_target_properties(hardening_test PROPERTIES
121+
CXX_STANDARD ${_hardening_cxx_standard}
122+
CXX_STANDARD_REQUIRED YES
123+
)
124+
message(STATUS "hardening_test will use C++${_hardening_cxx_standard}")
125+
126+
add_test(
127+
NAME hardening_test
128+
COMMAND hardening_test "[deserialize_hardening]"
129+
)
130+
131+
target_sources(hardening_test
132+
PRIVATE
133+
deserialize_hardening_test.cpp
134+
)
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <catch2/catch.hpp>
21+
#include <sstream>
22+
#include <vector>
23+
24+
// Include all affected sketch types
25+
#include <quantiles_sketch.hpp>
26+
#include <kll_sketch.hpp>
27+
#include <req_sketch.hpp>
28+
29+
namespace datasketches {
30+
31+
/**
32+
* Test for fix of issue #477:
33+
* BUG: SIGABRT in deserialize(): dereferencing empty std::optional (libc++ verbose_abort)
34+
*
35+
* These tests exercise the actual deserialization code path that contained the bug.
36+
* With buggy code (&*tmp on empty optional) and hardening enabled, these will SIGABRT.
37+
* With fixed code (aligned_storage), these pass normally.
38+
*
39+
* IMPORTANT: These tests actually call deserialize() on multi-item sketches, which
40+
* exercises the buggy code path where min/max are deserialized.
41+
*/
42+
43+
TEST_CASE("quantiles_sketch: deserialize multi-item sketch", "[deserialize_hardening]") {
44+
// Create sketch with multiple items (so min/max are stored in serialization)
45+
quantiles_sketch<double> sketch(128);
46+
for (int i = 0; i < 1000; i++) {
47+
sketch.update(static_cast<double>(i));
48+
}
49+
50+
// Serialize
51+
auto bytes = sketch.serialize();
52+
53+
// Deserialize - WITH BUGGY CODE AND HARDENING, THIS WILL SIGABRT HERE
54+
// The bug is: sd.deserialize(is, &*tmp, 1) where tmp is empty optional
55+
auto sketch2 = quantiles_sketch<double>::deserialize(bytes.data(), bytes.size());
56+
57+
// Verify deserialization worked correctly
58+
REQUIRE(sketch2.get_n() == sketch.get_n());
59+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
60+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
61+
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
62+
}
63+
64+
TEST_CASE("quantiles_sketch: deserialize from stream", "[deserialize_hardening]") {
65+
quantiles_sketch<float> sketch(256);
66+
for (int i = 0; i < 2000; i++) {
67+
sketch.update(static_cast<float>(i) * 0.5f);
68+
}
69+
70+
// Serialize to stream
71+
std::stringstream ss;
72+
sketch.serialize(ss);
73+
74+
// Deserialize from stream - exercises the buggy code path
75+
auto sketch2 = quantiles_sketch<float>::deserialize(ss);
76+
77+
REQUIRE(sketch2.get_n() == sketch.get_n());
78+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
79+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
80+
}
81+
82+
TEST_CASE("kll_sketch: deserialize multi-item sketch", "[deserialize_hardening]") {
83+
kll_sketch<float> sketch(200);
84+
for (int i = 0; i < 1500; i++) {
85+
sketch.update(static_cast<float>(i));
86+
}
87+
88+
auto bytes = sketch.serialize();
89+
90+
// Deserialize - exercises buggy &*tmp code path
91+
auto sketch2 = kll_sketch<float>::deserialize(bytes.data(), bytes.size());
92+
93+
REQUIRE(sketch2.get_n() == sketch.get_n());
94+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
95+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
96+
}
97+
98+
TEST_CASE("kll_sketch: deserialize from stream", "[deserialize_hardening]") {
99+
kll_sketch<int> sketch(400);
100+
for (int i = 0; i < 3000; i++) {
101+
sketch.update(i);
102+
}
103+
104+
std::stringstream ss;
105+
sketch.serialize(ss);
106+
107+
// Deserialize from stream
108+
auto sketch2 = kll_sketch<int>::deserialize(ss);
109+
110+
REQUIRE(sketch2.get_n() == sketch.get_n());
111+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
112+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
113+
}
114+
115+
TEST_CASE("req_sketch: deserialize multi-level sketch", "[deserialize_hardening]") {
116+
// REQ sketch only has the bug when num_levels > 1
117+
// We need to add enough items to trigger multiple levels
118+
req_sketch<float> sketch(12);
119+
for (int i = 0; i < 10000; i++) {
120+
sketch.update(static_cast<float>(i));
121+
}
122+
123+
auto bytes = sketch.serialize();
124+
125+
// Deserialize - exercises buggy code path when num_levels > 1
126+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
127+
128+
REQUIRE(sketch2.get_n() == sketch.get_n());
129+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
130+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
131+
}
132+
133+
TEST_CASE("req_sketch: deserialize from stream", "[deserialize_hardening]") {
134+
req_sketch<double> sketch(20);
135+
for (int i = 0; i < 15000; i++) {
136+
sketch.update(static_cast<double>(i) * 0.1);
137+
}
138+
139+
std::stringstream ss;
140+
sketch.serialize(ss);
141+
142+
// Deserialize from stream
143+
auto sketch2 = req_sketch<double>::deserialize(ss);
144+
145+
REQUIRE(sketch2.get_n() == sketch.get_n());
146+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
147+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
148+
}
149+
150+
TEST_CASE("multiple sketch types: stress test", "[deserialize_hardening]") {
151+
SECTION("quantiles with various sizes") {
152+
for (int k : {64, 128, 256}) {
153+
quantiles_sketch<int> sketch(k);
154+
for (int i = 0; i < 5000; i++) {
155+
sketch.update(i);
156+
}
157+
auto bytes = sketch.serialize();
158+
auto sketch2 = quantiles_sketch<int>::deserialize(bytes.data(), bytes.size());
159+
REQUIRE(sketch2.get_n() == sketch.get_n());
160+
}
161+
}
162+
163+
SECTION("kll with various sizes") {
164+
for (int k : {100, 200, 400}) {
165+
kll_sketch<double> sketch(k);
166+
for (int i = 0; i < 4000; i++) {
167+
sketch.update(static_cast<double>(i) / 10.0);
168+
}
169+
auto bytes = sketch.serialize();
170+
auto sketch2 = kll_sketch<double>::deserialize(bytes.data(), bytes.size());
171+
REQUIRE(sketch2.get_n() == sketch.get_n());
172+
}
173+
}
174+
175+
SECTION("req with various sizes") {
176+
for (int k : {12, 20}) {
177+
req_sketch<float> sketch(k);
178+
for (int i = 0; i < 8000; i++) {
179+
sketch.update(static_cast<float>(i));
180+
}
181+
auto bytes = sketch.serialize();
182+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
183+
REQUIRE(sketch2.get_n() == sketch.get_n());
184+
}
185+
}
186+
}
187+
188+
} // namespace datasketches

kll/include/kll_sketch_impl.hpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <iomanip>
2525
#include <sstream>
2626
#include <stdexcept>
27+
#include <type_traits>
2728

2829
#include "conditional_forward.hpp"
2930
#include "count_zeros.hpp"
@@ -481,18 +482,22 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const Ser
481482
read(is, levels.data(), sizeof(levels[0]) * num_levels);
482483
}
483484
levels[num_levels] = capacity;
484-
optional<T> tmp; // space to deserialize min and max
485485
optional<T> min_item;
486486
optional<T> max_item;
487487
if (!is_single_item) {
488-
sd.deserialize(is, &*tmp, 1);
488+
// Space to deserialize min and max.
489+
// serde::deserialize expects allocated but not initialized storage.
490+
typename std::aligned_storage<sizeof(T), alignof(T)>::type tmp_storage;
491+
T* tmp = reinterpret_cast<T*>(&tmp_storage);
492+
493+
sd.deserialize(is, tmp, 1);
489494
// serde call did not throw, repackage and cleanup
490-
min_item.emplace(*tmp);
491-
(*tmp).~T();
492-
sd.deserialize(is, &*tmp, 1);
495+
min_item.emplace(std::move(*tmp));
496+
tmp->~T();
497+
sd.deserialize(is, tmp, 1);
493498
// serde call did not throw, repackage and cleanup
494-
max_item.emplace(*tmp);
495-
(*tmp).~T();
499+
max_item.emplace(std::move(*tmp));
500+
tmp->~T();
496501
}
497502
A alloc(allocator);
498503
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
@@ -565,18 +570,22 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t s
565570
ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
566571
}
567572
levels[num_levels] = capacity;
568-
optional<T> tmp; // space to deserialize min and max
569573
optional<T> min_item;
570574
optional<T> max_item;
571575
if (!is_single_item) {
572-
ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
576+
// Space to deserialize min and max.
577+
// serde::deserialize expects allocated but not initialized storage.
578+
typename std::aligned_storage<sizeof(T), alignof(T)>::type tmp_storage;
579+
T* tmp = reinterpret_cast<T*>(&tmp_storage);
580+
581+
ptr += sd.deserialize(ptr, end_ptr - ptr, tmp, 1);
573582
// serde call did not throw, repackage and cleanup
574-
min_item.emplace(*tmp);
575-
(*tmp).~T();
576-
ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
583+
min_item.emplace(std::move(*tmp));
584+
tmp->~T();
585+
ptr += sd.deserialize(ptr, end_ptr - ptr, tmp, 1);
577586
// serde call did not throw, repackage and cleanup
578-
max_item.emplace(*tmp);
579-
(*tmp).~T();
587+
max_item.emplace(std::move(*tmp));
588+
tmp->~T();
580589
}
581590
A alloc(allocator);
582591
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };

0 commit comments

Comments
 (0)