Skip to content

Commit 6d2c43d

Browse files
authored
Add safe size checks for weights (#34483)
### Details: - *Add safe size checks to avoid Buffer Overflow when parsing Paddle Frontend Weight File* ### Tickets: - *CVS-182171*
1 parent f954d5e commit 6d2c43d

File tree

2 files changed

+212
-20
lines changed

2 files changed

+212
-20
lines changed

src/frontends/paddle/src/input_model.cpp

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
#if defined(__MINGW32__) || defined(__MINGW64__)
99
# include <filesystem>
1010
#endif
11+
#include <limits>
1112
#include <queue>
1213

1314
#include "decoder_proto.hpp"
1415
#include "framework.pb.h"
1516
#include "input_model.hpp"
1617
#include "openvino/core/log_util.hpp"
18+
#include "openvino/core/memory_util.hpp"
1719
#include "openvino/frontend/paddle/node_context.hpp"
1820
#include "openvino/opsets/opset7.hpp"
1921
#include "openvino/util/common_util.hpp"
@@ -160,8 +162,29 @@ void InputModel::InputModelImpl::load_places() {
160162

161163
namespace {
162164
bool read_tensor(std::istream& is, char* data, size_t len) {
163-
is.read(data, len);
164-
return (size_t)is.gcount() == len;
165+
if (len == 0) {
166+
return true;
167+
}
168+
if (len > static_cast<size_t>(std::numeric_limits<std::streamsize>::max())) {
169+
return false;
170+
}
171+
is.read(data, static_cast<std::streamsize>(len));
172+
return is && (size_t)is.gcount() == len;
173+
}
174+
175+
constexpr size_t kMaxTensorDescSize = 64 * 1024 * 1024;
176+
177+
template <typename DimsT>
178+
ov::Shape make_shape_checked(const DimsT& dims) {
179+
ov::Shape shape;
180+
shape.reserve(dims.size());
181+
for (const auto& dim : dims) {
182+
FRONT_END_GENERAL_CHECK(dim >= 0, "Negative dimension in Paddle weight tensor.");
183+
FRONT_END_GENERAL_CHECK(static_cast<unsigned long long>(dim) <= std::numeric_limits<size_t>::max(),
184+
"Dimension is too large for size_t in Paddle weight tensor.");
185+
shape.push_back(static_cast<size_t>(dim));
186+
}
187+
return shape;
165188
}
166189

167190
template <typename T>
@@ -288,10 +311,11 @@ void InputModel::InputModelImpl::load_consts(const std::basic_string<T>& folder_
288311

289312
FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::paddle::framework::proto::VarType::LOD_TENSOR);
290313
const auto& tensor = var_desc.type().lod_tensor().tensor();
291-
Shape shape(tensor.dims().cbegin(), tensor.dims().cend());
314+
Shape shape = make_shape_checked(tensor.dims());
292315
const auto& type = get_ov_type(tensor.data_type());
293-
const auto& data_length = shape_size(shape) * type.size();
294-
std::vector<uint8_t> tensor_data(data_length);
316+
auto data_length = ov::util::get_memory_size_safe(type, shape);
317+
FRONT_END_GENERAL_CHECK(data_length, "Weight tensor size overflow for constant ", name, ".");
318+
std::vector<uint8_t> tensor_data(*data_length);
295319

296320
bool read_succeed = false;
297321
if (!folder_with_weights.empty()) {
@@ -304,21 +328,28 @@ void InputModel::InputModelImpl::load_consts(const std::basic_string<T>& folder_
304328
FRONT_END_GENERAL_CHECK(is && is.is_open(), "Cannot open file for constant value.");
305329
const size_t header_size = 16;
306330
std::vector<char> header(header_size);
307-
is.read(&header[0], header_size);
331+
FRONT_END_GENERAL_CHECK(is.read(&header[0], header_size), "Failed to read constant header for ", name, ".");
308332

309333
uint32_t dims_len = 0;
310-
is.read(reinterpret_cast<char*>(&dims_len), 4);
334+
FRONT_END_GENERAL_CHECK(is.read(reinterpret_cast<char*>(&dims_len), sizeof(dims_len)),
335+
"Failed to read dims length for ",
336+
name,
337+
".");
338+
FRONT_END_GENERAL_CHECK(dims_len <= kMaxTensorDescSize, "Dims struct size too large for ", name, ".");
311339
std::vector<char> dims_struct(dims_len);
312-
is.read(&dims_struct[0], dims_len);
313-
read_succeed = read_tensor(is, reinterpret_cast<char*>(&tensor_data[0]), data_length);
340+
FRONT_END_GENERAL_CHECK(is.read(dims_struct.data(), dims_len),
341+
"Failed to read dims struct for ",
342+
name,
343+
".");
344+
read_succeed = read_tensor(is, reinterpret_cast<char*>(tensor_data.data()), *data_length);
314345
} else {
315346
FRONT_END_GENERAL_CHECK(false, "Folder with weights must be provided.");
316347
}
317348
FRONT_END_GENERAL_CHECK(read_succeed,
318349
"File containing constant with name ",
319350
name,
320351
" wasn't successfully read.");
321-
auto const_node = opset7::Constant::create(type, shape, &tensor_data[0]);
352+
auto const_node = opset7::Constant::create(type, shape, tensor_data.data());
322353
const_node->set_friendly_name(name);
323354
m_tensor_values[name] = const_node;
324355
}
@@ -353,30 +384,47 @@ void InputModel::InputModelImpl::load_consts(std::istream* weight_stream) {
353384
{
354385
const size_t header_size = 16;
355386
std::vector<char> header(header_size);
356-
weight_stream->read(&header[0], header_size);
387+
FRONT_END_GENERAL_CHECK(weight_stream->read(&header[0], header_size),
388+
"Failed to read weight header for ",
389+
name,
390+
".");
357391
}
358392

359393
int32_t size;
360-
weight_stream->read(reinterpret_cast<char*>(&size), sizeof(size));
394+
FRONT_END_GENERAL_CHECK(weight_stream->read(reinterpret_cast<char*>(&size), sizeof(size)),
395+
"Failed to read TensorDesc size for ",
396+
name,
397+
".");
398+
FRONT_END_GENERAL_CHECK(size > 0 && static_cast<size_t>(size) <= kMaxTensorDescSize,
399+
"TensorDesc size is invalid for ",
400+
name,
401+
".");
361402

362-
std::unique_ptr<char[]> buf(new char[size]);
363-
weight_stream->read(reinterpret_cast<char*>(buf.get()), size);
403+
std::vector<char> buf(static_cast<size_t>(size));
404+
FRONT_END_GENERAL_CHECK(weight_stream->read(buf.data(), size),
405+
"Failed to read TensorDesc data for ",
406+
name,
407+
".");
364408

365409
std::unique_ptr<::paddle::framework::proto::VarType_TensorDesc> tensor_desc(
366410
new ::paddle::framework::proto::VarType_TensorDesc());
367-
tensor_desc->ParseFromArray(buf.get(), size);
368-
Shape shape(tensor_desc->dims().cbegin(), tensor_desc->dims().cend());
411+
FRONT_END_GENERAL_CHECK(tensor_desc->ParseFromArray(buf.data(), size),
412+
"Failed to parse TensorDesc for ",
413+
name,
414+
".");
415+
Shape shape = make_shape_checked(tensor_desc->dims());
369416
const auto& type = get_ov_type(tensor_desc->data_type());
370-
const auto& data_length = shape_size(shape) * type.size();
371-
std::vector<uint8_t> tensor_data(data_length);
417+
auto data_length = ov::util::get_memory_size_safe(type, shape);
418+
FRONT_END_GENERAL_CHECK(data_length, "Weight tensor size overflow for constant ", name, ".");
419+
std::vector<uint8_t> tensor_data(*data_length);
372420

373-
bool read_succeed = read_tensor(*weight_stream, reinterpret_cast<char*>(&tensor_data[0]), data_length);
421+
bool read_succeed = read_tensor(*weight_stream, reinterpret_cast<char*>(tensor_data.data()), *data_length);
374422
FRONT_END_GENERAL_CHECK(read_succeed,
375423
"File containing constant with name ",
376424
name,
377425
" wasn't successfully read.");
378426

379-
auto const_node = opset7::Constant::create(type, shape, &tensor_data[0]);
427+
auto const_node = opset7::Constant::create(type, shape, tensor_data.data());
380428
const_node->set_friendly_name(name);
381429
m_tensor_values[name] = const_node;
382430
}

src/frontends/paddle/tests/read_paddle_model_test.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,67 @@
55
#include <gtest/gtest.h>
66

77
#include <fstream>
8+
#include <limits>
89
#include <openvino/util/file_util.hpp>
910
#include <set>
11+
#include <sstream>
1012
#include <string>
13+
#include <vector>
1114

1215
#include "common_test_utils/ov_test_utils.hpp"
1316
#include "common_test_utils/unicode_utils.hpp"
1417
#include "frontend/shared/include/utils.hpp"
18+
#include "openvino/frontend/manager.hpp"
1519
#include "openvino/openvino.hpp"
1620
#include "openvino/opsets/opset1.hpp"
1721
#include "openvino/opsets/opset8.hpp"
1822
#include "openvino/pass/serialize.hpp"
1923

24+
namespace {
25+
void append_varint(std::string& out, uint64_t value) {
26+
while (value > 0x7F) {
27+
out.push_back(static_cast<char>((value & 0x7F) | 0x80));
28+
value >>= 7;
29+
}
30+
out.push_back(static_cast<char>(value));
31+
}
32+
33+
void append_key(std::string& out, uint32_t field_number, uint8_t wire_type) {
34+
append_varint(out, (static_cast<uint64_t>(field_number) << 3) | wire_type);
35+
}
36+
37+
std::string make_tensor_desc_bytes(const std::vector<int64_t>& dims, int32_t data_type) {
38+
std::string out;
39+
append_key(out, 1, 0);
40+
append_varint(out, static_cast<uint64_t>(data_type));
41+
for (const auto& dim : dims) {
42+
append_key(out, 2, 0);
43+
append_varint(out, static_cast<uint64_t>(dim));
44+
}
45+
return out;
46+
}
47+
48+
std::string make_weights_with_tensor_desc(const std::string& desc_bytes) {
49+
std::string out;
50+
out.reserve(16 + sizeof(int32_t) + desc_bytes.size());
51+
out.append(16, '\0');
52+
int32_t desc_size = static_cast<int32_t>(desc_bytes.size());
53+
out.append(reinterpret_cast<const char*>(&desc_size), sizeof(desc_size));
54+
out.append(desc_bytes);
55+
return out;
56+
}
57+
58+
std::string make_invalid_weights_with_bad_desc_size() {
59+
constexpr int32_t kMaxTensorDescSize = 64 * 1024 * 1024;
60+
std::string out;
61+
out.reserve(16 + sizeof(int32_t));
62+
out.append(16, '\0');
63+
int32_t desc_size = kMaxTensorDescSize + 1;
64+
out.append(reinterpret_cast<const char*>(&desc_size), sizeof(desc_size));
65+
return out;
66+
}
67+
} // namespace
68+
2069
TEST(Paddle_Reader_Tests, LoadModelMemoryToCore) {
2170
auto model = FrontEndTestUtils::make_model_path(std::string(TEST_PADDLE_MODELS_DIRNAME) +
2271
"conv2d_relu/conv2d_relu" + std::string(TEST_PADDLE_MODEL_EXT));
@@ -132,6 +181,101 @@ TEST(Paddle_Reader_Tests, ImportBasicModelToCore) {
132181
ASSERT_TRUE(res.valid) << res.message;
133182
}
134183

184+
TEST(Paddle_Reader_Tests, LoadModelWithInvalidTensorDescSize) {
185+
auto model_path = FrontEndTestUtils::make_model_path(
186+
std::string(TEST_PADDLE_MODELS_DIRNAME) + "conv2d_relu/conv2d_relu" + std::string(TEST_PADDLE_MODEL_EXT));
187+
188+
std::ifstream model_ifs(model_path, std::ios::in | std::ios::binary);
189+
ASSERT_TRUE(model_ifs.is_open()) << "Cannot open model file: " << model_path;
190+
191+
const auto weights_bytes = make_invalid_weights_with_bad_desc_size();
192+
std::istringstream weights_is(weights_bytes, std::ios::in | std::ios::binary);
193+
194+
auto fem = ov::frontend::FrontEndManager();
195+
std::istream* model_stream = &model_ifs;
196+
std::istream* weights_stream = &weights_is;
197+
auto fe = fem.load_by_model(model_stream, weights_stream);
198+
ASSERT_NE(fe, nullptr);
199+
200+
model_ifs.clear();
201+
model_ifs.seekg(0, std::ios::beg);
202+
weights_is.clear();
203+
weights_is.seekg(0, std::ios::beg);
204+
205+
try {
206+
fe->load(model_stream, weights_stream);
207+
FAIL() << "Expected load to fail due to invalid TensorDesc size";
208+
} catch (const std::exception& ex) {
209+
const std::string msg = ex.what();
210+
ASSERT_NE(msg.find("TensorDesc size is invalid"), std::string::npos) << msg;
211+
}
212+
}
213+
214+
TEST(Paddle_Reader_Tests, LoadModelWithNegativeDimInTensorDesc) {
215+
auto model_path = FrontEndTestUtils::make_model_path(
216+
std::string(TEST_PADDLE_MODELS_DIRNAME) + "conv2d_relu/conv2d_relu" + std::string(TEST_PADDLE_MODEL_EXT));
217+
218+
std::ifstream model_ifs(model_path, std::ios::in | std::ios::binary);
219+
ASSERT_TRUE(model_ifs.is_open()) << "Cannot open model file: " << model_path;
220+
221+
const auto desc_bytes = make_tensor_desc_bytes({-1}, 5);
222+
const auto weights_bytes = make_weights_with_tensor_desc(desc_bytes);
223+
std::istringstream weights_is(weights_bytes, std::ios::in | std::ios::binary);
224+
225+
auto fem = ov::frontend::FrontEndManager();
226+
std::istream* model_stream = &model_ifs;
227+
std::istream* weights_stream = &weights_is;
228+
auto fe = fem.load_by_model(model_stream, weights_stream);
229+
ASSERT_NE(fe, nullptr);
230+
231+
model_ifs.clear();
232+
model_ifs.seekg(0, std::ios::beg);
233+
weights_is.clear();
234+
weights_is.seekg(0, std::ios::beg);
235+
236+
try {
237+
fe->load(model_stream, weights_stream);
238+
FAIL() << "Expected load to fail due to negative dimension";
239+
} catch (const std::exception& ex) {
240+
const std::string msg = ex.what();
241+
ASSERT_NE(msg.find("Negative dimension in Paddle weight tensor"), std::string::npos) << msg;
242+
}
243+
}
244+
245+
TEST(Paddle_Reader_Tests, LoadModelWithOverflowingTensorSize) {
246+
auto model_path = FrontEndTestUtils::make_model_path(
247+
std::string(TEST_PADDLE_MODELS_DIRNAME) + "conv2d_relu/conv2d_relu" + std::string(TEST_PADDLE_MODEL_EXT));
248+
249+
std::ifstream model_ifs(model_path, std::ios::in | std::ios::binary);
250+
ASSERT_TRUE(model_ifs.is_open()) << "Cannot open model file: " << model_path;
251+
252+
const auto desc_bytes = make_tensor_desc_bytes({std::numeric_limits<int64_t>::max(), 2}, 5);
253+
const auto weights_bytes = make_weights_with_tensor_desc(desc_bytes);
254+
std::istringstream weights_is(weights_bytes, std::ios::in | std::ios::binary);
255+
256+
auto fem = ov::frontend::FrontEndManager();
257+
std::istream* model_stream = &model_ifs;
258+
std::istream* weights_stream = &weights_is;
259+
auto fe = fem.load_by_model(model_stream, weights_stream);
260+
ASSERT_NE(fe, nullptr);
261+
262+
model_ifs.clear();
263+
model_ifs.seekg(0, std::ios::beg);
264+
weights_is.clear();
265+
weights_is.seekg(0, std::ios::beg);
266+
267+
try {
268+
fe->load(model_stream, weights_stream);
269+
FAIL() << "Expected load to fail due to overflowing tensor size";
270+
} catch (const std::exception& ex) {
271+
const std::string msg = ex.what();
272+
const bool has_weight_overflow = msg.find("Weight tensor size overflow for constant") != std::string::npos;
273+
const bool has_dim_overflow =
274+
msg.find("Dimension is too large for size_t in Paddle weight tensor.") != std::string::npos;
275+
ASSERT_TRUE(has_weight_overflow || has_dim_overflow) << msg;
276+
}
277+
}
278+
135279
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
136280
TEST(Paddle_Reader_Tests, ImportBasicModelToCoreWstring) {
137281
std::string win_dir_path{TEST_PADDLE_MODELS_DIRNAME "relu/relu" + std::string(TEST_PADDLE_MODEL_EXT)};

0 commit comments

Comments
 (0)