Skip to content

Commit f54c140

Browse files
committed
new pointer encoding
This changes the encoding of pointers in the flattening solver as follows: * Pointers with constant zero offset are encoded by numbering the object (0 is the NULL object). * Pointers that might have an offset are numbered, and encoded/decoded as needed. The numbering points into a table, which records an n-bit object and n-bit offset for an n-bit pointer. The key benefit of the encoding above are that a) offset arithmetic no longer overflows before the given width of the pointer is reached, b) the full width of the pointer (usually 32 or 64 bits) can be used to encode the object number, thereby eliminating the requirement to configure the number of 'object bits'.
1 parent bf40378 commit f54c140

File tree

3 files changed

+181
-99
lines changed

3 files changed

+181
-99
lines changed

src/solvers/flattening/bv_pointers.cpp

+169-90
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ Author: Daniel Kroening, [email protected]
1111
#include <util/arith_tools.h>
1212
#include <util/byte_operators.h>
1313
#include <util/c_types.h>
14-
#include <util/config.h>
1514
#include <util/exception_utils.h>
1615
#include <util/expr_util.h>
1716
#include <util/namespace.h>
@@ -71,53 +70,93 @@ bv_pointerst::endianness_map(const typet &type, bool little_endian) const
7170
return bv_endianness_mapt{type, little_endian, ns, bv_width};
7271
}
7372

74-
std::size_t bv_pointerst::get_object_width(const pointer_typet &) const
75-
{
76-
// not actually type-dependent for now
77-
return config.bv_encoding.object_bits;
78-
}
79-
80-
std::size_t bv_pointerst::get_offset_width(const pointer_typet &type) const
73+
bvt bv_pointerst::object_literals(const bvt &bv, const pointer_typet &type)
8174
{
82-
const std::size_t pointer_width = type.get_width();
83-
const std::size_t object_width = get_object_width(type);
84-
PRECONDITION(pointer_width >= object_width);
85-
return pointer_width - object_width;
86-
}
75+
const auto width = type.get_width();
76+
PRECONDITION(width == bv.size());
8777

88-
std::size_t bv_pointerst::get_address_width(const pointer_typet &type) const
89-
{
90-
return type.get_width();
91-
}
78+
const auto result = prop.new_variables(width);
79+
bvt match_found;
9280

93-
bvt bv_pointerst::object_literals(const bvt &bv, const pointer_typet &type)
94-
const
95-
{
96-
const std::size_t offset_width = get_offset_width(type);
97-
const std::size_t object_width = get_object_width(type);
98-
PRECONDITION(bv.size() >= offset_width + object_width);
81+
for(std::size_t i = 0; i < numbered_pointers.size(); i++)
82+
{
83+
auto cond = bv_utils.equal(
84+
bv,
85+
bv_utilst::concatenate(
86+
bv_utilst::build_constant(i, width - 1), {const_literal(true)}));
87+
match_found.push_back(cond);
88+
bv_utils.cond_implies_equal(
89+
cond,
90+
bv_utilst::zero_extension(numbered_pointers[i].first, width),
91+
result);
92+
}
9993

100-
return bvt(
101-
bv.begin() + offset_width, bv.begin() + offset_width + object_width);
94+
auto within_bounds = prop.lor(match_found);
95+
96+
// The top bit distinguishes 'object only' vs. 'table encoded'.
97+
// When outside of the table, return an invalid object.
98+
return bv_utils.select(
99+
bv.back(),
100+
bv_utils.select(
101+
within_bounds,
102+
result,
103+
bv_utilst::build_constant(pointer_logic.get_invalid_object(), width)),
104+
bv);
102105
}
103106

104107
bvt bv_pointerst::offset_literals(const bvt &bv, const pointer_typet &type)
105-
const
106108
{
107-
const std::size_t offset_width = get_offset_width(type);
108-
PRECONDITION(bv.size() >= offset_width);
109+
const auto width = type.get_width();
110+
PRECONDITION(width == bv.size());
111+
112+
const auto result = prop.new_variables(width);
113+
114+
for(std::size_t i = 0; i < numbered_pointers.size(); i++)
115+
{
116+
auto cond = bv_utils.equal(
117+
bv,
118+
bv_utilst::concatenate(
119+
bv_utilst::build_constant(i, width - 1), {const_literal(true)}));
120+
bv_utils.cond_implies_equal(
121+
cond,
122+
bv_utilst::sign_extension(numbered_pointers[i].second, width),
123+
result);
124+
}
109125

110-
return bvt(bv.begin(), bv.begin() + offset_width);
126+
// the top bit distinguishes 'object only' vs. 'table encoded'
127+
return bv_utils.select(bv.back(), result, bv_utilst::zeros(width));
111128
}
112129

113-
bvt bv_pointerst::object_offset_encoding(const bvt &object, const bvt &offset)
130+
bvt bv_pointerst::object_offset_encoding(
131+
const bvt &object_bv,
132+
const bvt &offset_bv,
133+
const pointer_typet &type)
114134
{
115-
bvt result;
116-
result.reserve(offset.size() + object.size());
117-
result.insert(result.end(), offset.begin(), offset.end());
118-
result.insert(result.end(), object.begin(), object.end());
119-
120-
return result;
135+
const auto width = type.get_width();
136+
PRECONDITION(object_bv.size() == width);
137+
PRECONDITION(offset_bv.size() == width);
138+
139+
// is the offset zero?
140+
if(std::find_if_not(offset_bv.begin(), offset_bv.end(), [](literalt l) {
141+
return l == const_literal(false);
142+
}) != offset_bv.end())
143+
{
144+
// offset is not zero, add to the pointer table
145+
auto number = numbered_pointers.size();
146+
numbered_pointers.emplace_back(object_bv, offset_bv);
147+
148+
// Encode the table index.
149+
// Also set top bit to distinguish from object-only pointers.
150+
return bv_utilst::concatenate(
151+
bv_utilst::build_constant(number, width - 1), {const_literal(true)});
152+
}
153+
else
154+
{
155+
// Offset is zero, just zero-extend object number.
156+
// Top bit is zero to indicate object-only pointer.
157+
return bv_utilst::concatenate(
158+
bv_utilst::zero_extension(object_bv, width - 1), {const_literal(false)});
159+
}
121160
}
122161

123162
literalt bv_pointerst::convert_rest(const exprt &expr)
@@ -141,17 +180,7 @@ literalt bv_pointerst::convert_rest(const exprt &expr)
141180
bvt invalid_bv = object_literals(
142181
encode(pointer_logic.get_invalid_object(), type), type);
143182

144-
const std::size_t object_bits = get_object_width(type);
145-
146-
bvt equal_invalid_bv;
147-
equal_invalid_bv.reserve(object_bits);
148-
149-
for(std::size_t i=0; i<object_bits; i++)
150-
{
151-
equal_invalid_bv.push_back(prop.lequal(object_bv[i], invalid_bv[i]));
152-
}
153-
154-
return prop.land(equal_invalid_bv);
183+
return bv_utils.equal(object_bv, invalid_bv);
155184
}
156185
}
157186
}
@@ -168,6 +197,32 @@ literalt bv_pointerst::convert_rest(const exprt &expr)
168197
return l;
169198
}
170199
}
200+
else if(expr.id() == ID_equal || expr.id() == ID_notequal)
201+
{
202+
if(
203+
operands.size() == 2 && operands[0].type().id() == ID_pointer &&
204+
operands[1].type().id() == ID_pointer)
205+
{
206+
// bit-wise comparison doesn't work because of numbered pointers
207+
const bvt &bv0 = convert_bv(operands[0]);
208+
const bvt &bv1 = convert_bv(operands[1]);
209+
210+
const pointer_typet &type0 = to_pointer_type(operands[0].type());
211+
bvt object_bv0 = object_literals(bv0, type0);
212+
bvt offset_bv0 = offset_literals(bv0, type0);
213+
214+
const pointer_typet &type1 = to_pointer_type(operands[1].type());
215+
bvt object_bv1 = object_literals(bv1, type1);
216+
bvt offset_bv1 = offset_literals(bv1, type1);
217+
218+
// object and offset need to match
219+
auto equal = prop.land(
220+
bv_utils.equal(object_bv0, object_bv1),
221+
bv_utils.equal(offset_bv0, offset_bv1));
222+
223+
return expr.id() == ID_equal ? equal : !equal;
224+
}
225+
}
171226
else if(expr.id()==ID_lt || expr.id()==ID_le ||
172227
expr.id()==ID_gt || expr.id()==ID_ge)
173228
{
@@ -463,8 +518,7 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr)
463518
count == 1,
464519
"there should be exactly one pointer-type operand in a pointer-type sum");
465520

466-
const std::size_t offset_bits = get_offset_width(type);
467-
bvt sum = bv_utils.build_constant(0, offset_bits);
521+
bvt sum = bv_utils.build_constant(0, bits);
468522

469523
forall_operands(it, plus_expr)
470524
{
@@ -484,7 +538,8 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr)
484538
bvt op=convert_bv(*it);
485539
CHECK_RETURN(!op.empty());
486540

487-
op = bv_utils.extension(op, offset_bits, rep);
541+
op = bv_utils.extension(op, bits, rep);
542+
CHECK_RETURN(op.size() == sum.size());
488543

489544
sum=bv_utils.add(sum, op);
490545
}
@@ -794,50 +849,66 @@ exprt bv_pointerst::bv_get_rec(
794849
bvt value_bv(bv.begin() + offset, bv.begin() + offset + bits);
795850

796851
std::string value = bits_to_string(prop, value_bv);
797-
std::string value_addr = bits_to_string(prop, object_literals(value_bv, pt));
798-
std::string value_offset =
799-
bits_to_string(prop, offset_literals(value_bv, pt));
800-
801852
// we treat these like bit-vector constants, but with
802853
// some additional annotation
803-
804854
const irep_idt bvrep = make_bvrep(bits, [&value](std::size_t i) {
805855
return value[value.size() - 1 - i] == '1';
806856
});
807857

808858
constant_exprt result(bvrep, type);
809859

810860
pointer_logict::pointert pointer;
811-
pointer.object =
812-
numeric_cast_v<std::size_t>(binary2integer(value_addr, false));
813-
pointer.offset=binary2integer(value_offset, true);
861+
862+
// Top bit set?
863+
if(value.front() == '1')
864+
{
865+
// It's a pointer into the table. Turn bits into number, but
866+
// first strip top bit.
867+
auto index = binary2integer(value.substr(1, std::string::npos), false);
868+
869+
if(index >= 0 && index < numbered_pointers.size())
870+
{
871+
// copy from table
872+
const auto &entry = numbered_pointers[numeric_cast_v<std::size_t>(index)];
873+
pointer.object = binary2integer(bits_to_string(prop, entry.first), false);
874+
pointer.offset = binary2integer(bits_to_string(prop, entry.second), true);
875+
}
876+
else
877+
{
878+
// out of bounds, we'll make it an 'invalid pointer'
879+
pointer.object = pointer_logic.get_invalid_object();
880+
pointer.offset = 0;
881+
}
882+
}
883+
else
884+
{
885+
// It's an object only, offset is zero.
886+
pointer.object = binary2integer(value, false);
887+
pointer.offset = 0;
888+
}
814889

815890
return annotated_pointer_constant_exprt{
816891
bvrep, pointer_logic.pointer_expr(pointer, pt)};
817892
}
818893

819-
bvt bv_pointerst::encode(std::size_t addr, const pointer_typet &type) const
894+
bvt bv_pointerst::encode(const mp_integer &object, const pointer_typet &type)
895+
const
820896
{
821-
const std::size_t offset_bits = get_offset_width(type);
822-
const std::size_t object_bits = get_object_width(type);
823-
824-
bvt zero_offset(offset_bits, const_literal(false));
897+
const auto width = type.get_width();
825898

826-
// set variable part
827-
bvt object;
828-
object.reserve(object_bits);
829-
for(std::size_t i=0; i<object_bits; i++)
830-
object.push_back(const_literal((addr & (std::size_t(1) << i)) != 0));
899+
auto object_bv = bv_utilst::build_constant(object, width - 1);
831900

832-
return object_offset_encoding(object, zero_offset);
901+
// Offset is zero, just zero-extend object number.
902+
// Top bit is zero to indicate object-only pointer.
903+
return bv_utilst::concatenate(object_bv, {const_literal(false)});
833904
}
834905

835906
bvt bv_pointerst::offset_arithmetic(
836907
const pointer_typet &type,
837908
const bvt &bv,
838909
const mp_integer &x)
839910
{
840-
const std::size_t offset_bits = get_offset_width(type);
911+
const std::size_t offset_bits = type.get_width();
841912

842913
return offset_arithmetic(
843914
type, bv, 1, bv_utils.build_constant(x, offset_bits));
@@ -855,52 +926,60 @@ bvt bv_pointerst::offset_arithmetic(
855926
index.type().id()==ID_signedbv?bv_utilst::representationt::SIGNED:
856927
bv_utilst::representationt::UNSIGNED;
857928

858-
const std::size_t offset_bits = get_offset_width(type);
859-
bv_index=bv_utils.extension(bv_index, offset_bits, rep);
929+
PRECONDITION(bv.size() == type.get_width());
930+
931+
bv_index = bv_utils.extension(bv_index, bv.size(), rep);
860932

861933
return offset_arithmetic(type, bv, factor, bv_index);
862934
}
863935

864936
bvt bv_pointerst::offset_arithmetic(
865937
const pointer_typet &type,
866-
const bvt &bv,
938+
const bvt &pointer_bv,
867939
const mp_integer &factor,
868-
const bvt &index)
940+
const bvt &index_bv)
869941
{
870-
bvt bv_index;
942+
PRECONDITION(pointer_bv.size() == type.get_width());
943+
PRECONDITION(index_bv.size() == type.get_width());
944+
945+
bvt index_scaled_bv;
871946

872947
if(factor==1)
873-
bv_index=index;
948+
{
949+
index_scaled_bv = index_bv;
950+
}
874951
else
875952
{
876-
bvt bv_factor=bv_utils.build_constant(factor, index.size());
877-
bv_index = bv_utils.signed_multiplier(index, bv_factor);
953+
bvt factor_bv = bv_utils.build_constant(factor, pointer_bv.size());
954+
index_scaled_bv = bv_utils.signed_multiplier(
955+
bv_utils.sign_extension(index_bv, pointer_bv.size()), factor_bv);
878956
}
879957

880-
const std::size_t offset_bits = get_offset_width(type);
881-
bv_index = bv_utils.sign_extension(bv_index, offset_bits);
958+
bvt offset_bv = offset_literals(pointer_bv, type);
959+
CHECK_RETURN(offset_bv.size() == pointer_bv.size());
882960

883-
bvt offset_bv = offset_literals(bv, type);
961+
DATA_INVARIANT(
962+
offset_bv.size() == index_scaled_bv.size(),
963+
"pointer offset bitvector width");
884964

885-
bvt bv_tmp = bv_utils.add(offset_bv, bv_index);
965+
bvt sum = bv_utils.add(offset_bv, index_scaled_bv);
886966

887-
return object_offset_encoding(object_literals(bv, type), bv_tmp);
967+
return object_offset_encoding(object_literals(pointer_bv, type), sum, type);
888968
}
889969

890970
bvt bv_pointerst::add_addr(const exprt &expr)
891971
{
892-
std::size_t a=pointer_logic.add_object(expr);
972+
const auto a = pointer_logic.add_object(expr);
893973

894974
const pointer_typet type = pointer_type(expr.type());
895-
const std::size_t object_bits = get_object_width(type);
896-
const std::size_t max_objects=std::size_t(1)<<object_bits;
975+
const mp_integer object_bits = type.get_width();
976+
const mp_integer max_objects = mp_integer(1) << object_bits;
897977

898978
if(a==max_objects)
899979
throw analysis_exceptiont(
900-
"too many addressed objects: maximum number of objects is set to 2^n=" +
901-
std::to_string(max_objects) + " (with n=" + std::to_string(object_bits) +
902-
"); " +
903-
"use the `--object-bits n` option to increase the maximum number");
980+
"too many addressed objects: maximum number of objects is 2^n=" +
981+
integer2string(max_objects) + " (with n=" + integer2string(object_bits) +
982+
")");
904983

905984
return encode(a, type);
906985
}

0 commit comments

Comments
 (0)